From baac15f316db558dc855626960ed3d38d04940f9 Mon Sep 17 00:00:00 2001
From: Shiny Nematoda <snematoda.751k2@aleeas.com>
Date: Sat, 6 Apr 2024 13:25:39 +0000
Subject: [PATCH] [FEAT] Support Include/Exclude Filters for Grep (#3058)

fixes `TestRepoSearch` failing occasionally

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/3058
Reviewed-by: Gusted <gusted@noreply.codeberg.org>
Co-authored-by: Shiny Nematoda <snematoda.751k2@aleeas.com>
Co-committed-by: Shiny Nematoda <snematoda.751k2@aleeas.com>
---
 modules/git/grep.go                   | 12 ++++++-
 modules/setting/indexer.go            | 23 ++++++++++---
 tests/integration/repo_search_test.go | 48 +++++++++++++++++++++------
 3 files changed, 67 insertions(+), 16 deletions(-)

diff --git a/modules/git/grep.go b/modules/git/grep.go
index 5a51c1a20c..ee6a858f74 100644
--- a/modules/git/grep.go
+++ b/modules/git/grep.go
@@ -13,6 +13,8 @@ import (
 	"os"
 	"strconv"
 	"strings"
+
+	"code.gitea.io/gitea/modules/setting"
 )
 
 type GrepResult struct {
@@ -58,7 +60,15 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO
 	} else {
 		cmd.AddOptionValues("-e", strings.TrimLeft(search, "-"))
 	}
-	cmd.AddDynamicArguments(cmp.Or(opts.RefName, "HEAD"))
+	// pathspec
+	files := make([]string, 0, len(setting.Indexer.IncludePatterns)+len(setting.Indexer.ExcludePatterns))
+	for _, expr := range setting.Indexer.IncludePatterns {
+		files = append(files, expr.Pattern())
+	}
+	for _, expr := range setting.Indexer.ExcludePatterns {
+		files = append(files, ":^"+expr.Pattern())
+	}
+	cmd.AddDynamicArguments(cmp.Or(opts.RefName, "HEAD")).AddDashesAndList(files...)
 	opts.MaxResultLimit = cmp.Or(opts.MaxResultLimit, 50)
 	stderr := bytes.Buffer{}
 	err = cmd.Run(&RunOpts{
diff --git a/modules/setting/indexer.go b/modules/setting/indexer.go
index 16f3d80168..3c96b58740 100644
--- a/modules/setting/indexer.go
+++ b/modules/setting/indexer.go
@@ -30,8 +30,8 @@ var Indexer = struct {
 	RepoConnStr          string
 	RepoIndexerName      string
 	MaxIndexerFileSize   int64
-	IncludePatterns      []glob.Glob
-	ExcludePatterns      []glob.Glob
+	IncludePatterns      []Glob
+	ExcludePatterns      []Glob
 	ExcludeVendored      bool
 }{
 	IssueType:        "bleve",
@@ -50,6 +50,19 @@ var Indexer = struct {
 	ExcludeVendored:      true,
 }
 
+type Glob struct {
+	glob    glob.Glob
+	pattern string
+}
+
+func (g *Glob) Match(s string) bool {
+	return g.glob.Match(s)
+}
+
+func (g *Glob) Pattern() string {
+	return g.pattern
+}
+
 func loadIndexerFrom(rootCfg ConfigProvider) {
 	sec := rootCfg.Section("indexer")
 	Indexer.IssueType = sec.Key("ISSUE_INDEXER_TYPE").MustString("bleve")
@@ -90,15 +103,15 @@ func loadIndexerFrom(rootCfg ConfigProvider) {
 }
 
 // IndexerGlobFromString parses a comma separated list of patterns and returns a glob.Glob slice suited for repo indexing
-func IndexerGlobFromString(globstr string) []glob.Glob {
-	extarr := make([]glob.Glob, 0, 10)
+func IndexerGlobFromString(globstr string) []Glob {
+	extarr := make([]Glob, 0, 10)
 	for _, expr := range strings.Split(strings.ToLower(globstr), ",") {
 		expr = strings.TrimSpace(expr)
 		if expr != "" {
 			if g, err := glob.Compile(expr, '.', '/'); err != nil {
 				log.Info("Invalid glob expression '%s' (skipped): %v", expr, err)
 			} else {
-				extarr = append(extarr, g)
+				extarr = append(extarr, Glob{glob: g, pattern: expr})
 			}
 		}
 	}
diff --git a/tests/integration/repo_search_test.go b/tests/integration/repo_search_test.go
index 56cc45d901..b6d2e24df6 100644
--- a/tests/integration/repo_search_test.go
+++ b/tests/integration/repo_search_test.go
@@ -11,6 +11,7 @@ import (
 	repo_model "code.gitea.io/gitea/models/repo"
 	code_indexer "code.gitea.io/gitea/modules/indexer/code"
 	"code.gitea.io/gitea/modules/setting"
+	"code.gitea.io/gitea/modules/test"
 	"code.gitea.io/gitea/tests"
 
 	"github.com/PuerkitoBio/goquery"
@@ -26,30 +27,57 @@ func resultFilenames(t testing.TB, doc *HTMLDoc) []string {
 	return result
 }
 
-func TestSearchRepo(t *testing.T) {
+func TestSearchRepoIndexer(t *testing.T) {
+	testSearchRepo(t, true)
+}
+
+func TestSearchRepoNoIndexer(t *testing.T) {
+	testSearchRepo(t, false)
+}
+
+func testSearchRepo(t *testing.T, indexer bool) {
 	defer tests.PrepareTestEnv(t)()
+	defer test.MockVariableValue(&setting.Indexer.RepoIndexerEnabled, indexer)()
 
 	repo, err := repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "repo1")
 	assert.NoError(t, err)
 
-	code_indexer.UpdateRepoIndexer(repo)
+	if indexer {
+		code_indexer.UpdateRepoIndexer(repo)
+	}
 
 	testSearch(t, "/user2/repo1/search?q=Description&page=1", []string{"README.md"})
 
-	setting.Indexer.IncludePatterns = setting.IndexerGlobFromString("**.txt")
-	setting.Indexer.ExcludePatterns = setting.IndexerGlobFromString("**/y/**")
+	defer test.MockVariableValue(&setting.Indexer.IncludePatterns, setting.IndexerGlobFromString("**.txt"))()
+	defer test.MockVariableValue(&setting.Indexer.ExcludePatterns, setting.IndexerGlobFromString("**/y/**"))()
 
 	repo, err = repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "glob")
 	assert.NoError(t, err)
 
-	code_indexer.UpdateRepoIndexer(repo)
+	if indexer {
+		code_indexer.UpdateRepoIndexer(repo)
+	}
 
 	testSearch(t, "/user2/glob/search?q=loren&page=1", []string{"a.txt"})
-	testSearch(t, "/user2/glob/search?q=loren&page=1&t=match", []string{"a.txt"})
-	testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt", "a.txt"})
-	testSearch(t, "/user2/glob/search?q=file3&page=1&t=match", []string{"x/b.txt", "a.txt"})
-	testSearch(t, "/user2/glob/search?q=file4&page=1&t=match", []string{"x/b.txt", "a.txt"})
-	testSearch(t, "/user2/glob/search?q=file5&page=1&t=match", []string{"x/b.txt", "a.txt"})
+	testSearch(t, "/user2/glob/search?q=loren&page=1&fuzzy=false", []string{"a.txt"})
+
+	if indexer {
+		// fuzzy search: matches both file3 (x/b.txt) and file1 (a.txt)
+		// when indexer is enabled
+		testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt", "a.txt"})
+		testSearch(t, "/user2/glob/search?q=file4&page=1", []string{"x/b.txt", "a.txt"})
+		testSearch(t, "/user2/glob/search?q=file5&page=1", []string{"x/b.txt", "a.txt"})
+	} else {
+		// fuzzy search: OR of all the keywords
+		// when indexer is disabled
+		testSearch(t, "/user2/glob/search?q=file3+file1&page=1", []string{"a.txt", "x/b.txt"})
+		testSearch(t, "/user2/glob/search?q=file4&page=1", []string{})
+		testSearch(t, "/user2/glob/search?q=file5&page=1", []string{})
+	}
+
+	testSearch(t, "/user2/glob/search?q=file3&page=1&fuzzy=false", []string{"x/b.txt"})
+	testSearch(t, "/user2/glob/search?q=file4&page=1&fuzzy=false", []string{})
+	testSearch(t, "/user2/glob/search?q=file5&page=1&fuzzy=false", []string{})
 }
 
 func testSearch(t *testing.T, url string, expected []string) {