mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2025-01-30 00:59:46 +01:00
ee214cb886
Some checks are pending
/ release (push) Waiting to run
testing / backend-checks (push) Waiting to run
testing / frontend-checks (push) Waiting to run
testing / test-unit (push) Blocked by required conditions
testing / test-e2e (push) Blocked by required conditions
testing / test-remote-cacher (redis) (push) Blocked by required conditions
testing / test-remote-cacher (valkey) (push) Blocked by required conditions
testing / test-remote-cacher (garnet) (push) Blocked by required conditions
testing / test-remote-cacher (redict) (push) Blocked by required conditions
testing / test-mysql (push) Blocked by required conditions
testing / test-pgsql (push) Blocked by required conditions
testing / test-sqlite (push) Blocked by required conditions
testing / security-check (push) Blocked by required conditions
Added support for searching content in a specific directory or file. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/6143 Reviewed-by: Gusted <gusted@noreply.codeberg.org> Reviewed-by: 0ko <0ko@noreply.codeberg.org> Co-authored-by: Shiny Nematoda <snematoda.751k2@aleeas.com> Co-committed-by: Shiny Nematoda <snematoda.751k2@aleeas.com>
69 lines
1.8 KiB
Go
69 lines
1.8 KiB
Go
// Copyright 2024 The Forgejo Authors. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
package hierarchy
|
|
|
|
import (
|
|
"bytes"
|
|
|
|
"github.com/blevesearch/bleve/v2/analysis"
|
|
"github.com/blevesearch/bleve/v2/registry"
|
|
)
|
|
|
|
const Name = "path_hierarchy"
|
|
|
|
type PathHierarchyTokenizer struct{}
|
|
|
|
// Similar to elastic's path_hierarchy tokenizer
|
|
// This tokenizes a given path into all the possible hierarchies
|
|
// For example,
|
|
// modules/indexer/code/search.go =>
|
|
//
|
|
// modules/
|
|
// modules/indexer
|
|
// modules/indexer/code
|
|
// modules/indexer/code/search.go
|
|
func (t *PathHierarchyTokenizer) Tokenize(input []byte) analysis.TokenStream {
|
|
// trim any extra slashes
|
|
input = bytes.Trim(input, "/")
|
|
|
|
// zero allocations until the nested directories exceed a depth of 8 (which is unlikely)
|
|
rv := make(analysis.TokenStream, 0, 8)
|
|
count, off := 1, 0
|
|
|
|
// iterate till all directory seperators
|
|
for i := bytes.IndexRune(input[off:], '/'); i != -1; i = bytes.IndexRune(input[off:], '/') {
|
|
// the index is relative to input[offest...]
|
|
// add this index to the accumlated offset to get the index of the current seperator in input[0...]
|
|
off += i
|
|
rv = append(rv, &analysis.Token{
|
|
Term: input[:off], // take the slice, input[0...index of seperator]
|
|
Start: 0,
|
|
End: off,
|
|
Position: count,
|
|
Type: analysis.AlphaNumeric,
|
|
})
|
|
// increment the offset after considering the seperator
|
|
off++
|
|
count++
|
|
}
|
|
|
|
// the entire file path should always be the last token
|
|
rv = append(rv, &analysis.Token{
|
|
Term: input,
|
|
Start: 0,
|
|
End: len(input),
|
|
Position: count,
|
|
Type: analysis.AlphaNumeric,
|
|
})
|
|
|
|
return rv
|
|
}
|
|
|
|
func TokenizerConstructor(config map[string]any, cache *registry.Cache) (analysis.Tokenizer, error) {
|
|
return &PathHierarchyTokenizer{}, nil
|
|
}
|
|
|
|
func init() {
|
|
registry.RegisterTokenizer(Name, TokenizerConstructor)
|
|
}
|