From baac15f316db558dc855626960ed3d38d04940f9 Mon Sep 17 00:00:00 2001 From: Shiny Nematoda <snematoda.751k2@aleeas.com> Date: Sat, 6 Apr 2024 13:25:39 +0000 Subject: [PATCH] [FEAT] Support Include/Exclude Filters for Grep (#3058) fixes `TestRepoSearch` failing occasionally Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/3058 Reviewed-by: Gusted <gusted@noreply.codeberg.org> Co-authored-by: Shiny Nematoda <snematoda.751k2@aleeas.com> Co-committed-by: Shiny Nematoda <snematoda.751k2@aleeas.com> --- modules/git/grep.go | 12 ++++++- modules/setting/indexer.go | 23 ++++++++++--- tests/integration/repo_search_test.go | 48 +++++++++++++++++++++------ 3 files changed, 67 insertions(+), 16 deletions(-) diff --git a/modules/git/grep.go b/modules/git/grep.go index 5a51c1a20c..ee6a858f74 100644 --- a/modules/git/grep.go +++ b/modules/git/grep.go @@ -13,6 +13,8 @@ import ( "os" "strconv" "strings" + + "code.gitea.io/gitea/modules/setting" ) type GrepResult struct { @@ -58,7 +60,15 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO } else { cmd.AddOptionValues("-e", strings.TrimLeft(search, "-")) } - cmd.AddDynamicArguments(cmp.Or(opts.RefName, "HEAD")) + // pathspec + files := make([]string, 0, len(setting.Indexer.IncludePatterns)+len(setting.Indexer.ExcludePatterns)) + for _, expr := range setting.Indexer.IncludePatterns { + files = append(files, expr.Pattern()) + } + for _, expr := range setting.Indexer.ExcludePatterns { + files = append(files, ":^"+expr.Pattern()) + } + cmd.AddDynamicArguments(cmp.Or(opts.RefName, "HEAD")).AddDashesAndList(files...) opts.MaxResultLimit = cmp.Or(opts.MaxResultLimit, 50) stderr := bytes.Buffer{} err = cmd.Run(&RunOpts{ diff --git a/modules/setting/indexer.go b/modules/setting/indexer.go index 16f3d80168..3c96b58740 100644 --- a/modules/setting/indexer.go +++ b/modules/setting/indexer.go @@ -30,8 +30,8 @@ var Indexer = struct { RepoConnStr string RepoIndexerName string MaxIndexerFileSize int64 - IncludePatterns []glob.Glob - ExcludePatterns []glob.Glob + IncludePatterns []Glob + ExcludePatterns []Glob ExcludeVendored bool }{ IssueType: "bleve", @@ -50,6 +50,19 @@ var Indexer = struct { ExcludeVendored: true, } +type Glob struct { + glob glob.Glob + pattern string +} + +func (g *Glob) Match(s string) bool { + return g.glob.Match(s) +} + +func (g *Glob) Pattern() string { + return g.pattern +} + func loadIndexerFrom(rootCfg ConfigProvider) { sec := rootCfg.Section("indexer") Indexer.IssueType = sec.Key("ISSUE_INDEXER_TYPE").MustString("bleve") @@ -90,15 +103,15 @@ func loadIndexerFrom(rootCfg ConfigProvider) { } // IndexerGlobFromString parses a comma separated list of patterns and returns a glob.Glob slice suited for repo indexing -func IndexerGlobFromString(globstr string) []glob.Glob { - extarr := make([]glob.Glob, 0, 10) +func IndexerGlobFromString(globstr string) []Glob { + extarr := make([]Glob, 0, 10) for _, expr := range strings.Split(strings.ToLower(globstr), ",") { expr = strings.TrimSpace(expr) if expr != "" { if g, err := glob.Compile(expr, '.', '/'); err != nil { log.Info("Invalid glob expression '%s' (skipped): %v", expr, err) } else { - extarr = append(extarr, g) + extarr = append(extarr, Glob{glob: g, pattern: expr}) } } } diff --git a/tests/integration/repo_search_test.go b/tests/integration/repo_search_test.go index 56cc45d901..b6d2e24df6 100644 --- a/tests/integration/repo_search_test.go +++ b/tests/integration/repo_search_test.go @@ -11,6 +11,7 @@ import ( repo_model "code.gitea.io/gitea/models/repo" code_indexer "code.gitea.io/gitea/modules/indexer/code" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/test" "code.gitea.io/gitea/tests" "github.com/PuerkitoBio/goquery" @@ -26,30 +27,57 @@ func resultFilenames(t testing.TB, doc *HTMLDoc) []string { return result } -func TestSearchRepo(t *testing.T) { +func TestSearchRepoIndexer(t *testing.T) { + testSearchRepo(t, true) +} + +func TestSearchRepoNoIndexer(t *testing.T) { + testSearchRepo(t, false) +} + +func testSearchRepo(t *testing.T, indexer bool) { defer tests.PrepareTestEnv(t)() + defer test.MockVariableValue(&setting.Indexer.RepoIndexerEnabled, indexer)() repo, err := repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "repo1") assert.NoError(t, err) - code_indexer.UpdateRepoIndexer(repo) + if indexer { + code_indexer.UpdateRepoIndexer(repo) + } testSearch(t, "/user2/repo1/search?q=Description&page=1", []string{"README.md"}) - setting.Indexer.IncludePatterns = setting.IndexerGlobFromString("**.txt") - setting.Indexer.ExcludePatterns = setting.IndexerGlobFromString("**/y/**") + defer test.MockVariableValue(&setting.Indexer.IncludePatterns, setting.IndexerGlobFromString("**.txt"))() + defer test.MockVariableValue(&setting.Indexer.ExcludePatterns, setting.IndexerGlobFromString("**/y/**"))() repo, err = repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "glob") assert.NoError(t, err) - code_indexer.UpdateRepoIndexer(repo) + if indexer { + code_indexer.UpdateRepoIndexer(repo) + } testSearch(t, "/user2/glob/search?q=loren&page=1", []string{"a.txt"}) - testSearch(t, "/user2/glob/search?q=loren&page=1&t=match", []string{"a.txt"}) - testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt", "a.txt"}) - testSearch(t, "/user2/glob/search?q=file3&page=1&t=match", []string{"x/b.txt", "a.txt"}) - testSearch(t, "/user2/glob/search?q=file4&page=1&t=match", []string{"x/b.txt", "a.txt"}) - testSearch(t, "/user2/glob/search?q=file5&page=1&t=match", []string{"x/b.txt", "a.txt"}) + testSearch(t, "/user2/glob/search?q=loren&page=1&fuzzy=false", []string{"a.txt"}) + + if indexer { + // fuzzy search: matches both file3 (x/b.txt) and file1 (a.txt) + // when indexer is enabled + testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt", "a.txt"}) + testSearch(t, "/user2/glob/search?q=file4&page=1", []string{"x/b.txt", "a.txt"}) + testSearch(t, "/user2/glob/search?q=file5&page=1", []string{"x/b.txt", "a.txt"}) + } else { + // fuzzy search: OR of all the keywords + // when indexer is disabled + testSearch(t, "/user2/glob/search?q=file3+file1&page=1", []string{"a.txt", "x/b.txt"}) + testSearch(t, "/user2/glob/search?q=file4&page=1", []string{}) + testSearch(t, "/user2/glob/search?q=file5&page=1", []string{}) + } + + testSearch(t, "/user2/glob/search?q=file3&page=1&fuzzy=false", []string{"x/b.txt"}) + testSearch(t, "/user2/glob/search?q=file4&page=1&fuzzy=false", []string{}) + testSearch(t, "/user2/glob/search?q=file5&page=1&fuzzy=false", []string{}) } func testSearch(t *testing.T, url string, expected []string) {