From 72a650c8e42f4abf59d5df7cd5dc27b451494cc6 Mon Sep 17 00:00:00 2001 From: Guillermo Prandi Date: Wed, 7 Aug 2019 01:26:12 -0300 Subject: [PATCH] Add wildcard support to REPO_INDEXER_EXTENSIONS --- .../doc/advanced/config-cheat-sheet.en-us.md | 4 +- go.mod | 1 + go.sum | 2 + models/repo_indexer.go | 16 ++++---- modules/setting/indexer.go | 39 +++++++++---------- vendor/modules.txt | 9 +++++ 6 files changed, 42 insertions(+), 29 deletions(-) diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index c12c05c43e86..b8d7bb056b24 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -177,8 +177,8 @@ Values containing `#` or `;` must be quoted using `` ` `` or `"""`. - `REPO_INDEXER_ENABLED`: **false**: Enables code search (uses a lot of disk space, about 6 times more than the repository size). - `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search. -- `REPO_INDEXER_EXTENSIONS`: **empty**: A comma separated list of file extensions to exclude from the index; a \`.' matches files with no extension. An empty list means do not exclude any files. -- `REPO_EXTENSIONS_LIST_INCLUDE`: **false**: If true, `REPO_INDEXER_EXTENSIONS` are the file extensions to include rather than exclude from the index. +- `REPO_INDEXER_PATTERNS`: **empty**: A comma separated list of file name patterns (see https://github.com/gobwas/glob) to **exclude** from the index. An empty list means do not exclude any files. Use `**.txt` to match any files with .txt extension. +- `REPO_PATTERNS_INCLUDE`: **false**: If true, `REPO_INDEXER_PATTERNS` are the file extensions to **include** rather than exclude from the index. - `UPDATE_BUFFER_LEN`: **20**: Buffer length of index request. - `MAX_FILE_SIZE`: **1048576**: Maximum size in bytes of files to be indexed. diff --git a/go.mod b/go.mod index 804573e0d7bf..ebe44f80e027 100644 --- a/go.mod +++ b/go.mod @@ -54,6 +54,7 @@ require ( github.com/go-redis/redis v6.15.2+incompatible github.com/go-sql-driver/mysql v1.4.1 github.com/go-xorm/xorm v0.7.4 + github.com/gobwas/glob v0.2.3 github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561 github.com/gogs/cron v0.0.0-20171120032916-9f6c956d3e14 github.com/google/go-github/v24 v24.0.1 diff --git a/go.sum b/go.sum index 89a2362e9657..9183c3d74f38 100644 --- a/go.sum +++ b/go.sum @@ -145,6 +145,8 @@ github.com/go-xorm/sqlfiddle v0.0.0-20180821085327-62ce714f951a h1:9wScpmSP5A3Bk github.com/go-xorm/sqlfiddle v0.0.0-20180821085327-62ce714f951a/go.mod h1:56xuuqnHyryaerycW3BfssRdxQstACi0Epw/yC5E2xM= github.com/go-xorm/xorm v0.7.4 h1:g/NgC590SzqV5VKmdRDNe/K3Holw3YJUCXX28r+rFGw= github.com/go-xorm/xorm v0.7.4/go.mod h1:vpza5fydeRgt+stvo9qgMhSNohYqmNt0I1/D6hkCekA= +github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= +github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561 h1:deE7ritpK04PgtpyVOS2TYcQEld9qLCD5b5EbVNOuLA= github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561/go.mod h1:YgYOrVn3Nj9Tq0EvjmFbphRytDj7JNRoWSStJZWDJTQ= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= diff --git a/models/repo_indexer.go b/models/repo_indexer.go index dd36cf931246..dabcce7aee40 100644 --- a/models/repo_indexer.go +++ b/models/repo_indexer.go @@ -232,14 +232,16 @@ func addDelete(filename string, repo *Repository, batch rupture.FlushingBatch) e } func isIndexable(entry *git.TreeEntry) bool { - if setting.Indexer.FileExtensions != nil { - var ext string - parts := strings.Split(entry.Name(), ".") - cnt := len(parts) - if cnt > 1 { - ext = strings.ToLower(parts[cnt-1]) + if setting.Indexer.FilePatterns != nil { + var found bool + name := strings.ToLower(entry.Name()) + for _, g := range setting.Indexer.FilePatterns { + if g.Match(name) { + found = true + break + } } - if setting.Indexer.FileExtensions[ext] != setting.Indexer.IncludeExtensions { + if found != setting.Indexer.IncludePatterns { return false } } diff --git a/modules/setting/indexer.go b/modules/setting/indexer.go index 51b822194a1d..3ee3b2e68058 100644 --- a/modules/setting/indexer.go +++ b/modules/setting/indexer.go @@ -8,6 +8,10 @@ import ( "path" "path/filepath" "strings" + + "code.gitea.io/gitea/modules/log" + + "github.com/gobwas/glob" ) // enumerates all the indexer queue types @@ -30,8 +34,8 @@ var ( IssueQueueDir string IssueQueueConnStr string IssueQueueBatchNumber int - FileExtensions map[string]bool - IncludeExtensions bool + FilePatterns []glob.Glob + IncludePatterns bool }{ IssueType: "bleve", IssuePath: "indexers/issues.bleve", @@ -54,8 +58,8 @@ func newIndexerService() { if !filepath.IsAbs(Indexer.RepoPath) { Indexer.RepoPath = path.Join(AppWorkPath, Indexer.RepoPath) } - Indexer.FileExtensions = extensionsFromString(sec.Key("REPO_INDEXER_EXTENSIONS").MustString("")) - Indexer.IncludeExtensions = sec.Key("REPO_EXTENSIONS_LIST_INCLUDE").MustBool(false) + Indexer.FilePatterns = extensionsFromString(sec.Key("REPO_INDEXER_PATTERNS").MustString("")) + Indexer.IncludePatterns = sec.Key("REPO_PATTERNS_INCLUDE").MustBool(false) Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20) Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024) @@ -65,25 +69,20 @@ func newIndexerService() { Indexer.IssueQueueBatchNumber = sec.Key("ISSUE_INDEXER_QUEUE_BATCH_NUMBER").MustInt(20) } -func extensionsFromString(from string) map[string]bool { - extmap := make(map[string]bool) - for _, ext := range strings.Split(strings.ToLower(from), ",") { - ext = strings.TrimSpace(ext) - // Accept *.txt, .txt and txt. Also use . to mean no ext - if strings.HasPrefix(ext, "*.") { - ext = ext[1:] - } - if ext == "." { - extmap[""] = true - } else { - ext = strings.TrimPrefix(ext, ".") - if ext != "" { - extmap[ext] = true +func extensionsFromString(from string) []glob.Glob { + extarr := make([]glob.Glob, 0, 10) + for _, expr := range strings.Split(strings.ToLower(from), ",") { + expr = strings.TrimSpace(expr) + if expr != "" { + if g, err := glob.Compile(expr, '.', '/'); err != nil { + log.Trace("Index file extensions: '%s': bad pattern: %v", expr, err) + } else { + extarr = append(extarr, g) } } } - if len(extmap) == 0 { + if len(extarr) == 0 { return nil } - return extmap + return extarr } diff --git a/vendor/modules.txt b/vendor/modules.txt index 703f161a8aab..6db4d2664e79 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -158,6 +158,15 @@ github.com/go-redis/redis/internal/util github.com/go-sql-driver/mysql # github.com/go-xorm/xorm v0.7.4 github.com/go-xorm/xorm +# github.com/gobwas/glob v0.2.3 +github.com/gobwas/glob +github.com/gobwas/glob/compiler +github.com/gobwas/glob/syntax +github.com/gobwas/glob/match +github.com/gobwas/glob/syntax/ast +github.com/gobwas/glob/util/runes +github.com/gobwas/glob/syntax/lexer +github.com/gobwas/glob/util/strings # github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561 github.com/gogits/chardet # github.com/gogs/cron v0.0.0-20171120032916-9f6c956d3e14