forked from gitea/gitea
Code/repo search (#2582)
Indexed search of repository contents (for default branch only)
This commit is contained in:
parent
762f1d7237
commit
5866eb2321
|
@ -192,7 +192,11 @@ ITERATE_BUFFER_SIZE = 50
|
|||
|
||||
[indexer]
|
||||
ISSUE_INDEXER_PATH = indexers/issues.bleve
|
||||
; repo indexer by default disabled, since it uses a lot of disk space
|
||||
REPO_INDEXER_ENABLED = false
|
||||
REPO_INDEXER_PATH = indexers/repos.bleve
|
||||
UPDATE_BUFFER_LEN = 20
|
||||
MAX_FILE_SIZE = 1048576
|
||||
|
||||
[admin]
|
||||
; Disable regular (non-admin) users to create organizations
|
||||
|
|
|
@ -63,6 +63,10 @@ func TestMain(m *testing.M) {
|
|||
fmt.Printf("os.RemoveAll: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
if err = os.RemoveAll(setting.Indexer.RepoPath); err != nil {
|
||||
fmt.Printf("Unable to remove repo indexer: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
os.Exit(exitCode)
|
||||
}
|
||||
|
|
|
@ -12,6 +12,8 @@ PATH = data/gitea.db
|
|||
|
||||
[indexer]
|
||||
ISSUE_INDEXER_PATH = integrations/indexers-mysql/issues.bleve
|
||||
REPO_INDEXER_ENABLED = true
|
||||
REPO_INDEXER_PATH = integrations/indexers-mysql/repos.bleve
|
||||
|
||||
[repository]
|
||||
ROOT = integrations/gitea-integration-mysql/gitea-repositories
|
||||
|
|
|
@ -12,6 +12,8 @@ PATH = data/gitea.db
|
|||
|
||||
[indexer]
|
||||
ISSUE_INDEXER_PATH = integrations/indexers-pgsql/issues.bleve
|
||||
REPO_INDEXER_ENABLED = true
|
||||
REPO_INDEXER_PATH = integrations/indexers-pgsql/repos.bleve
|
||||
|
||||
[repository]
|
||||
ROOT = integrations/gitea-integration-pgsql/gitea-repositories
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
// Copyright 2017 The Gitea Authors. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package integrations
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"testing"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func resultFilenames(t testing.TB, doc *HTMLDoc) []string {
|
||||
resultsSelection := doc.doc.Find(".repository.search")
|
||||
assert.EqualValues(t, 1, resultsSelection.Length(),
|
||||
"Invalid template (repo search template has changed?)")
|
||||
filenameSelections := resultsSelection.Find(".repo-search-result").Find(".header").Find("span.file")
|
||||
result := make([]string, filenameSelections.Length())
|
||||
filenameSelections.Each(func(i int, selection *goquery.Selection) {
|
||||
result[i] = selection.Text()
|
||||
})
|
||||
return result
|
||||
}
|
||||
|
||||
func TestSearchRepo(t *testing.T) {
|
||||
prepareTestEnv(t)
|
||||
|
||||
req := NewRequestf(t, "GET", "/user2/repo1/search?q=Description&page=1")
|
||||
resp := MakeRequest(t, req, http.StatusOK)
|
||||
|
||||
filenames := resultFilenames(t, NewHTMLParser(t, resp.Body))
|
||||
assert.EqualValues(t, []string{"README.md"}, filenames)
|
||||
}
|
|
@ -7,6 +7,8 @@ PATH = :memory:
|
|||
|
||||
[indexer]
|
||||
ISSUE_INDEXER_PATH = integrations/indexers-sqlite/issues.bleve
|
||||
REPO_INDEXER_ENABLED = true
|
||||
REPO_INDEXER_PATH = integrations/indexers-sqlite/repos.bleve
|
||||
|
||||
[repository]
|
||||
ROOT = integrations/gitea-integration-sqlite/gitea-repositories
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
[] # empty
|
|
@ -144,6 +144,8 @@ var migrations = []Migration{
|
|||
NewMigration("remove organization watch repositories", removeOrganizationWatchRepo),
|
||||
// v47 -> v48
|
||||
NewMigration("add deleted branches", addDeletedBranch),
|
||||
// v48 -> v49
|
||||
NewMigration("add repo indexer status", addRepoIndexerStatus),
|
||||
}
|
||||
|
||||
// Migrate database to current version
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
// Copyright 2017 The Gitea Authors. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package migrations
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-xorm/xorm"
|
||||
)
|
||||
|
||||
func addRepoIndexerStatus(x *xorm.Engine) error {
|
||||
// RepoIndexerStatus see models/repo_indexer.go
|
||||
type RepoIndexerStatus struct {
|
||||
ID int64 `xorm:"pk autoincr"`
|
||||
RepoID int64 `xorm:"INDEX NOT NULL"`
|
||||
CommitSha string `xorm:"VARCHAR(40)"`
|
||||
}
|
||||
|
||||
if err := x.Sync2(new(RepoIndexerStatus)); err != nil {
|
||||
return fmt.Errorf("Sync2: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -13,6 +13,10 @@ import (
|
|||
"path"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/modules/util"
|
||||
|
||||
// Needed for the MySQL driver
|
||||
_ "github.com/go-sql-driver/mysql"
|
||||
"github.com/go-xorm/core"
|
||||
|
@ -23,9 +27,6 @@ import (
|
|||
|
||||
// Needed for the MSSSQL driver
|
||||
_ "github.com/denisenkom/go-mssqldb"
|
||||
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
)
|
||||
|
||||
// Engine represents a xorm engine or session.
|
||||
|
@ -115,6 +116,7 @@ func init() {
|
|||
new(Stopwatch),
|
||||
new(TrackedTime),
|
||||
new(DeletedBranch),
|
||||
new(RepoIndexerStatus),
|
||||
)
|
||||
|
||||
gonicNames := []string{"SSL", "UID"}
|
||||
|
@ -150,8 +152,13 @@ func LoadConfigs() {
|
|||
DbCfg.Timeout = sec.Key("SQLITE_TIMEOUT").MustInt(500)
|
||||
|
||||
sec = setting.Cfg.Section("indexer")
|
||||
setting.Indexer.IssuePath = sec.Key("ISSUE_INDEXER_PATH").MustString("indexers/issues.bleve")
|
||||
setting.Indexer.IssuePath = absolutePath(
|
||||
sec.Key("ISSUE_INDEXER_PATH").MustString("indexers/issues.bleve"))
|
||||
setting.Indexer.RepoIndexerEnabled = sec.Key("REPO_INDEXER_ENABLED").MustBool(false)
|
||||
setting.Indexer.RepoPath = absolutePath(
|
||||
sec.Key("REPO_INDEXER_PATH").MustString("indexers/repos.bleve"))
|
||||
setting.Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20)
|
||||
setting.Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(512 * 1024 * 1024)
|
||||
}
|
||||
|
||||
// parsePostgreSQLHostPort parses given input in various forms defined in
|
||||
|
@ -336,3 +343,12 @@ func DumpDatabase(filePath string, dbType string) error {
|
|||
}
|
||||
return x.DumpTablesToFile(tbs, filePath)
|
||||
}
|
||||
|
||||
// absolutePath make path absolute if it is relative
|
||||
func absolutePath(path string) string {
|
||||
workDir, err := setting.WorkDir()
|
||||
if err != nil {
|
||||
log.Fatal(4, "Failed to get work directory: %v", err)
|
||||
}
|
||||
return util.EnsureAbsolutePath(path, workDir)
|
||||
}
|
||||
|
|
|
@ -209,6 +209,7 @@ type Repository struct {
|
|||
ForkID int64 `xorm:"INDEX"`
|
||||
BaseRepo *Repository `xorm:"-"`
|
||||
Size int64 `xorm:"NOT NULL DEFAULT 0"`
|
||||
IndexerStatus *RepoIndexerStatus `xorm:"-"`
|
||||
|
||||
Created time.Time `xorm:"-"`
|
||||
CreatedUnix int64 `xorm:"INDEX created"`
|
||||
|
@ -782,10 +783,12 @@ func UpdateLocalCopyBranch(repoPath, localPath, branch string) error {
|
|||
if err != nil {
|
||||
return fmt.Errorf("git fetch origin: %v", err)
|
||||
}
|
||||
if len(branch) > 0 {
|
||||
if err := git.ResetHEAD(localPath, true, "origin/"+branch); err != nil {
|
||||
return fmt.Errorf("git reset --hard origin/%s: %v", branch, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -989,6 +992,7 @@ func MigrateRepository(doer, u *User, opts MigrateRepoOptions) (*Repository, err
|
|||
if err = SyncReleasesWithTags(repo, gitRepo); err != nil {
|
||||
log.Error(4, "Failed to synchronize tags to releases for repository: %v", err)
|
||||
}
|
||||
UpdateRepoIndexer(repo)
|
||||
}
|
||||
|
||||
if err = repo.UpdateSize(); err != nil {
|
||||
|
@ -1883,6 +1887,7 @@ func DeleteRepository(doer *User, uid, repoID int64) error {
|
|||
go HookQueue.Add(repo.ID)
|
||||
}
|
||||
|
||||
DeleteRepoFromIndexer(repo)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -178,6 +178,8 @@ func (repo *Repository) UpdateRepoFile(doer *User, opts UpdateRepoFileOptions) (
|
|||
if err != nil {
|
||||
return fmt.Errorf("PushUpdate: %v", err)
|
||||
}
|
||||
UpdateRepoIndexer(repo)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,302 @@
|
|||
// Copyright 2017 The Gitea Authors. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package models
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/git"
|
||||
"code.gitea.io/gitea/modules/base"
|
||||
"code.gitea.io/gitea/modules/indexer"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
|
||||
"github.com/Unknwon/com"
|
||||
)
|
||||
|
||||
// RepoIndexerStatus status of a repo's entry in the repo indexer
|
||||
// For now, implicitly refers to default branch
|
||||
type RepoIndexerStatus struct {
|
||||
ID int64 `xorm:"pk autoincr"`
|
||||
RepoID int64 `xorm:"INDEX"`
|
||||
CommitSha string `xorm:"VARCHAR(40)"`
|
||||
}
|
||||
|
||||
func (repo *Repository) getIndexerStatus() error {
|
||||
if repo.IndexerStatus != nil {
|
||||
return nil
|
||||
}
|
||||
status := &RepoIndexerStatus{RepoID: repo.ID}
|
||||
has, err := x.Get(status)
|
||||
if err != nil {
|
||||
return err
|
||||
} else if !has {
|
||||
status.CommitSha = ""
|
||||
}
|
||||
repo.IndexerStatus = status
|
||||
return nil
|
||||
}
|
||||
|
||||
func (repo *Repository) updateIndexerStatus(sha string) error {
|
||||
if err := repo.getIndexerStatus(); err != nil {
|
||||
return err
|
||||
}
|
||||
if len(repo.IndexerStatus.CommitSha) == 0 {
|
||||
repo.IndexerStatus.CommitSha = sha
|
||||
_, err := x.Insert(repo.IndexerStatus)
|
||||
return err
|
||||
}
|
||||
repo.IndexerStatus.CommitSha = sha
|
||||
_, err := x.ID(repo.IndexerStatus.ID).Cols("commit_sha").
|
||||
Update(repo.IndexerStatus)
|
||||
return err
|
||||
}
|
||||
|
||||
type repoIndexerOperation struct {
|
||||
repo *Repository
|
||||
deleted bool
|
||||
}
|
||||
|
||||
var repoIndexerOperationQueue chan repoIndexerOperation
|
||||
|
||||
// InitRepoIndexer initialize the repo indexer
|
||||
func InitRepoIndexer() {
|
||||
if !setting.Indexer.RepoIndexerEnabled {
|
||||
return
|
||||
}
|
||||
indexer.InitRepoIndexer(populateRepoIndexer)
|
||||
repoIndexerOperationQueue = make(chan repoIndexerOperation, setting.Indexer.UpdateQueueLength)
|
||||
go processRepoIndexerOperationQueue()
|
||||
}
|
||||
|
||||
// populateRepoIndexer populate the repo indexer with data
|
||||
func populateRepoIndexer() error {
|
||||
log.Info("Populating repository indexer (this may take a while)")
|
||||
for page := 1; ; page++ {
|
||||
repos, _, err := SearchRepositoryByName(&SearchRepoOptions{
|
||||
Page: page,
|
||||
PageSize: 10,
|
||||
OrderBy: SearchOrderByID,
|
||||
Private: true,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
} else if len(repos) == 0 {
|
||||
return nil
|
||||
}
|
||||
for _, repo := range repos {
|
||||
if err = updateRepoIndexer(repo); err != nil {
|
||||
// only log error, since this should not prevent
|
||||
// gitea from starting up
|
||||
log.Error(4, "updateRepoIndexer: repoID=%d, %v", repo.ID, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type updateBatch struct {
|
||||
updates []indexer.RepoIndexerUpdate
|
||||
}
|
||||
|
||||
func updateRepoIndexer(repo *Repository) error {
|
||||
changes, err := getRepoChanges(repo)
|
||||
if err != nil {
|
||||
return err
|
||||
} else if changes == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
batch := indexer.RepoIndexerBatch()
|
||||
for _, filename := range changes.UpdatedFiles {
|
||||
if err := addUpdate(filename, repo, batch); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, filename := range changes.RemovedFiles {
|
||||
if err := addDelete(filename, repo, batch); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err = batch.Flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
return updateLastIndexSync(repo)
|
||||
}
|
||||
|
||||
// repoChanges changes (file additions/updates/removals) to a repo
|
||||
type repoChanges struct {
|
||||
UpdatedFiles []string
|
||||
RemovedFiles []string
|
||||
}
|
||||
|
||||
// getRepoChanges returns changes to repo since last indexer update
|
||||
func getRepoChanges(repo *Repository) (*repoChanges, error) {
|
||||
repoWorkingPool.CheckIn(com.ToStr(repo.ID))
|
||||
defer repoWorkingPool.CheckOut(com.ToStr(repo.ID))
|
||||
|
||||
if err := repo.UpdateLocalCopyBranch(""); err != nil {
|
||||
return nil, err
|
||||
} else if !git.IsBranchExist(repo.LocalCopyPath(), repo.DefaultBranch) {
|
||||
// repo does not have any commits yet, so nothing to update
|
||||
return nil, nil
|
||||
} else if err = repo.UpdateLocalCopyBranch(repo.DefaultBranch); err != nil {
|
||||
return nil, err
|
||||
} else if err = repo.getIndexerStatus(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(repo.IndexerStatus.CommitSha) == 0 {
|
||||
return genesisChanges(repo)
|
||||
}
|
||||
return nonGenesisChanges(repo)
|
||||
}
|
||||
|
||||
func addUpdate(filename string, repo *Repository, batch *indexer.Batch) error {
|
||||
filepath := path.Join(repo.LocalCopyPath(), filename)
|
||||
if stat, err := os.Stat(filepath); err != nil {
|
||||
return err
|
||||
} else if stat.Size() > setting.Indexer.MaxIndexerFileSize {
|
||||
return nil
|
||||
}
|
||||
fileContents, err := ioutil.ReadFile(filepath)
|
||||
if err != nil {
|
||||
return err
|
||||
} else if !base.IsTextFile(fileContents) {
|
||||
return nil
|
||||
}
|
||||
return batch.Add(indexer.RepoIndexerUpdate{
|
||||
Filepath: filename,
|
||||
Op: indexer.RepoIndexerOpUpdate,
|
||||
Data: &indexer.RepoIndexerData{
|
||||
RepoID: repo.ID,
|
||||
Content: string(fileContents),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func addDelete(filename string, repo *Repository, batch *indexer.Batch) error {
|
||||
return batch.Add(indexer.RepoIndexerUpdate{
|
||||
Filepath: filename,
|
||||
Op: indexer.RepoIndexerOpDelete,
|
||||
Data: &indexer.RepoIndexerData{
|
||||
RepoID: repo.ID,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// genesisChanges get changes to add repo to the indexer for the first time
|
||||
func genesisChanges(repo *Repository) (*repoChanges, error) {
|
||||
var changes repoChanges
|
||||
stdout, err := git.NewCommand("ls-files").RunInDir(repo.LocalCopyPath())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, line := range strings.Split(stdout, "\n") {
|
||||
filename := strings.TrimSpace(line)
|
||||
if len(filename) == 0 {
|
||||
continue
|
||||
} else if filename[0] == '"' {
|
||||
filename, err = strconv.Unquote(filename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
changes.UpdatedFiles = append(changes.UpdatedFiles, filename)
|
||||
}
|
||||
return &changes, nil
|
||||
}
|
||||
|
||||
// nonGenesisChanges get changes since the previous indexer update
|
||||
func nonGenesisChanges(repo *Repository) (*repoChanges, error) {
|
||||
diffCmd := git.NewCommand("diff", "--name-status",
|
||||
repo.IndexerStatus.CommitSha, "HEAD")
|
||||
stdout, err := diffCmd.RunInDir(repo.LocalCopyPath())
|
||||
if err != nil {
|
||||
// previous commit sha may have been removed by a force push, so
|
||||
// try rebuilding from scratch
|
||||
if err = indexer.DeleteRepoFromIndexer(repo.ID); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return genesisChanges(repo)
|
||||
}
|
||||
var changes repoChanges
|
||||
for _, line := range strings.Split(stdout, "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if len(line) == 0 {
|
||||
continue
|
||||
}
|
||||
filename := strings.TrimSpace(line[1:])
|
||||
if len(filename) == 0 {
|
||||
continue
|
||||
} else if filename[0] == '"' {
|
||||
filename, err = strconv.Unquote(filename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
switch status := line[0]; status {
|
||||
case 'M', 'A':
|
||||
changes.UpdatedFiles = append(changes.UpdatedFiles, filename)
|
||||
case 'D':
|
||||
changes.RemovedFiles = append(changes.RemovedFiles, filename)
|
||||
default:
|
||||
log.Warn("Unrecognized status: %c (line=%s)", status, line)
|
||||
}
|
||||
}
|
||||
return &changes, nil
|
||||
}
|
||||
|
||||
func updateLastIndexSync(repo *Repository) error {
|
||||
stdout, err := git.NewCommand("rev-parse", "HEAD").RunInDir(repo.LocalCopyPath())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sha := strings.TrimSpace(stdout)
|
||||
return repo.updateIndexerStatus(sha)
|
||||
}
|
||||
|
||||
func processRepoIndexerOperationQueue() {
|
||||
for {
|
||||
op := <-repoIndexerOperationQueue
|
||||
if op.deleted {
|
||||
if err := indexer.DeleteRepoFromIndexer(op.repo.ID); err != nil {
|
||||
log.Error(4, "DeleteRepoFromIndexer: %v", err)
|
||||
}
|
||||
} else {
|
||||
if err := updateRepoIndexer(op.repo); err != nil {
|
||||
log.Error(4, "updateRepoIndexer: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// DeleteRepoFromIndexer remove all of a repository's entries from the indexer
|
||||
func DeleteRepoFromIndexer(repo *Repository) {
|
||||
addOperationToQueue(repoIndexerOperation{repo: repo, deleted: true})
|
||||
}
|
||||
|
||||
// UpdateRepoIndexer update a repository's entries in the indexer
|
||||
func UpdateRepoIndexer(repo *Repository) {
|
||||
addOperationToQueue(repoIndexerOperation{repo: repo, deleted: false})
|
||||
}
|
||||
|
||||
func addOperationToQueue(op repoIndexerOperation) {
|
||||
if !setting.Indexer.RepoIndexerEnabled {
|
||||
return
|
||||
}
|
||||
select {
|
||||
case repoIndexerOperationQueue <- op:
|
||||
break
|
||||
default:
|
||||
go func() {
|
||||
repoIndexerOperationQueue <- op
|
||||
}()
|
||||
}
|
||||
}
|
|
@ -263,6 +263,10 @@ func pushUpdate(opts PushUpdateOptions) (repo *Repository, err error) {
|
|||
commits = ListToPushCommits(l)
|
||||
}
|
||||
|
||||
if opts.RefFullName == git.BranchPrefix+repo.DefaultBranch {
|
||||
UpdateRepoIndexer(repo)
|
||||
}
|
||||
|
||||
if err := CommitRepoAction(CommitRepoActionOptions{
|
||||
PusherName: opts.PusherName,
|
||||
RepoOwnerID: owner.ID,
|
||||
|
|
|
@ -367,6 +367,7 @@ func RepoAssignment() macaron.Handler {
|
|||
ctx.Data["DisableSSH"] = setting.SSH.Disabled
|
||||
ctx.Data["ExposeAnonSSH"] = setting.SSH.ExposeAnonymous
|
||||
ctx.Data["DisableHTTP"] = setting.Repository.DisableHTTPGit
|
||||
ctx.Data["RepoSearchEnabled"] = setting.Indexer.RepoIndexerEnabled
|
||||
ctx.Data["CloneLink"] = repo.CloneLink()
|
||||
ctx.Data["WikiCloneLink"] = repo.WikiCloneLink()
|
||||
|
||||
|
|
|
@ -0,0 +1,199 @@
|
|||
// Copyright 2017 The Gitea Authors. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package indexer
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
|
||||
"github.com/blevesearch/bleve"
|
||||
"github.com/blevesearch/bleve/analysis/analyzer/custom"
|
||||
"github.com/blevesearch/bleve/analysis/token/camelcase"
|
||||
"github.com/blevesearch/bleve/analysis/token/lowercase"
|
||||
"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
|
||||
)
|
||||
|
||||
const repoIndexerAnalyzer = "repoIndexerAnalyzer"
|
||||
|
||||
// repoIndexer (thread-safe) index for repository contents
|
||||
var repoIndexer bleve.Index
|
||||
|
||||
// RepoIndexerOp type of operation to perform on repo indexer
|
||||
type RepoIndexerOp int
|
||||
|
||||
const (
|
||||
// RepoIndexerOpUpdate add/update a file's contents
|
||||
RepoIndexerOpUpdate = iota
|
||||
|
||||
// RepoIndexerOpDelete delete a file
|
||||
RepoIndexerOpDelete
|
||||
)
|
||||
|
||||
// RepoIndexerData data stored in the repo indexer
|
||||
type RepoIndexerData struct {
|
||||
RepoID int64
|
||||
Content string
|
||||
}
|
||||
|
||||
// RepoIndexerUpdate an update to the repo indexer
|
||||
type RepoIndexerUpdate struct {
|
||||
Filepath string
|
||||
Op RepoIndexerOp
|
||||
Data *RepoIndexerData
|
||||
}
|
||||
|
||||
func (update RepoIndexerUpdate) addToBatch(batch *bleve.Batch) error {
|
||||
id := filenameIndexerID(update.Data.RepoID, update.Filepath)
|
||||
switch update.Op {
|
||||
case RepoIndexerOpUpdate:
|
||||
return batch.Index(id, update.Data)
|
||||
case RepoIndexerOpDelete:
|
||||
batch.Delete(id)
|
||||
default:
|
||||
log.Error(4, "Unrecognized repo indexer op: %d", update.Op)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// InitRepoIndexer initialize repo indexer
|
||||
func InitRepoIndexer(populateIndexer func() error) {
|
||||
_, err := os.Stat(setting.Indexer.RepoPath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
if err = createRepoIndexer(); err != nil {
|
||||
log.Fatal(4, "CreateRepoIndexer: %v", err)
|
||||
}
|
||||
if err = populateIndexer(); err != nil {
|
||||
log.Fatal(4, "PopulateRepoIndex: %v", err)
|
||||
}
|
||||
} else {
|
||||
log.Fatal(4, "InitRepoIndexer: %v", err)
|
||||
}
|
||||
} else {
|
||||
repoIndexer, err = bleve.Open(setting.Indexer.RepoPath)
|
||||
if err != nil {
|
||||
log.Fatal(4, "InitRepoIndexer, open index: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// createRepoIndexer create a repo indexer if one does not already exist
|
||||
func createRepoIndexer() error {
|
||||
docMapping := bleve.NewDocumentMapping()
|
||||
docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping())
|
||||
|
||||
textFieldMapping := bleve.NewTextFieldMapping()
|
||||
docMapping.AddFieldMappingsAt("Content", textFieldMapping)
|
||||
|
||||
mapping := bleve.NewIndexMapping()
|
||||
if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
|
||||
return err
|
||||
} else if err := mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]interface{}{
|
||||
"type": custom.Name,
|
||||
"char_filters": []string{},
|
||||
"tokenizer": unicode.Name,
|
||||
"token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
mapping.DefaultAnalyzer = repoIndexerAnalyzer
|
||||
mapping.AddDocumentMapping("repo", docMapping)
|
||||
var err error
|
||||
repoIndexer, err = bleve.New(setting.Indexer.RepoPath, mapping)
|
||||
return err
|
||||
}
|
||||
|
||||
func filenameIndexerID(repoID int64, filename string) string {
|
||||
return indexerID(repoID) + "_" + filename
|
||||
}
|
||||
|
||||
func filenameOfIndexerID(indexerID string) string {
|
||||
index := strings.IndexByte(indexerID, '_')
|
||||
if index == -1 {
|
||||
log.Error(4, "Unexpected ID in repo indexer: %s", indexerID)
|
||||
}
|
||||
return indexerID[index+1:]
|
||||
}
|
||||
|
||||
// RepoIndexerBatch batch to add updates to
|
||||
func RepoIndexerBatch() *Batch {
|
||||
return &Batch{
|
||||
batch: repoIndexer.NewBatch(),
|
||||
index: repoIndexer,
|
||||
}
|
||||
}
|
||||
|
||||
// DeleteRepoFromIndexer delete all of a repo's files from indexer
|
||||
func DeleteRepoFromIndexer(repoID int64) error {
|
||||
query := numericEqualityQuery(repoID, "RepoID")
|
||||
searchRequest := bleve.NewSearchRequestOptions(query, 2147483647, 0, false)
|
||||
result, err := repoIndexer.Search(searchRequest)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
batch := RepoIndexerBatch()
|
||||
for _, hit := range result.Hits {
|
||||
batch.batch.Delete(hit.ID)
|
||||
if err = batch.flushIfFull(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return batch.Flush()
|
||||
}
|
||||
|
||||
// RepoSearchResult result of performing a search in a repo
|
||||
type RepoSearchResult struct {
|
||||
StartIndex int
|
||||
EndIndex int
|
||||
Filename string
|
||||
Content string
|
||||
}
|
||||
|
||||
// SearchRepoByKeyword searches for files in the specified repo.
|
||||
// Returns the matching file-paths
|
||||
func SearchRepoByKeyword(repoID int64, keyword string, page, pageSize int) (int64, []*RepoSearchResult, error) {
|
||||
phraseQuery := bleve.NewMatchPhraseQuery(keyword)
|
||||
phraseQuery.FieldVal = "Content"
|
||||
phraseQuery.Analyzer = repoIndexerAnalyzer
|
||||
indexerQuery := bleve.NewConjunctionQuery(
|
||||
numericEqualityQuery(repoID, "RepoID"),
|
||||
phraseQuery,
|
||||
)
|
||||
from := (page - 1) * pageSize
|
||||
searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false)
|
||||
searchRequest.Fields = []string{"Content"}
|
||||
searchRequest.IncludeLocations = true
|
||||
|
||||
result, err := repoIndexer.Search(searchRequest)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
searchResults := make([]*RepoSearchResult, len(result.Hits))
|
||||
for i, hit := range result.Hits {
|
||||
var startIndex, endIndex int = -1, -1
|
||||
for _, locations := range hit.Locations["Content"] {
|
||||
location := locations[0]
|
||||
locationStart := int(location.Start)
|
||||
locationEnd := int(location.End)
|
||||
if startIndex < 0 || locationStart < startIndex {
|
||||
startIndex = locationStart
|
||||
}
|
||||
if endIndex < 0 || locationEnd > endIndex {
|
||||
endIndex = locationEnd
|
||||
}
|
||||
}
|
||||
searchResults[i] = &RepoSearchResult{
|
||||
StartIndex: startIndex,
|
||||
EndIndex: endIndex,
|
||||
Filename: filenameOfIndexerID(hit.ID),
|
||||
Content: hit.Fields["Content"].(string),
|
||||
}
|
||||
}
|
||||
return int64(result.Total), searchResults, nil
|
||||
}
|
|
@ -0,0 +1,128 @@
|
|||
// Copyright 2017 The Gitea Authors. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package search
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
gotemplate "html/template"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/highlight"
|
||||
"code.gitea.io/gitea/modules/indexer"
|
||||
"code.gitea.io/gitea/modules/util"
|
||||
)
|
||||
|
||||
// Result a search result to display
|
||||
type Result struct {
|
||||
Filename string
|
||||
HighlightClass string
|
||||
LineNumbers []int
|
||||
FormattedLines gotemplate.HTML
|
||||
}
|
||||
|
||||
func indices(content string, selectionStartIndex, selectionEndIndex int) (int, int) {
|
||||
startIndex := selectionStartIndex
|
||||
numLinesBefore := 0
|
||||
for ; startIndex > 0; startIndex-- {
|
||||
if content[startIndex-1] == '\n' {
|
||||
if numLinesBefore == 1 {
|
||||
break
|
||||
}
|
||||
numLinesBefore++
|
||||
}
|
||||
}
|
||||
|
||||
endIndex := selectionEndIndex
|
||||
numLinesAfter := 0
|
||||
for ; endIndex < len(content); endIndex++ {
|
||||
if content[endIndex] == '\n' {
|
||||
if numLinesAfter == 1 {
|
||||
break
|
||||
}
|
||||
numLinesAfter++
|
||||
}
|
||||
}
|
||||
|
||||
return startIndex, endIndex
|
||||
}
|
||||
|
||||
func writeStrings(buf *bytes.Buffer, strs ...string) error {
|
||||
for _, s := range strs {
|
||||
_, err := buf.WriteString(s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func searchResult(result *indexer.RepoSearchResult, startIndex, endIndex int) (*Result, error) {
|
||||
startLineNum := 1 + strings.Count(result.Content[:startIndex], "\n")
|
||||
|
||||
var formattedLinesBuffer bytes.Buffer
|
||||
|
||||
contentLines := strings.SplitAfter(result.Content[startIndex:endIndex], "\n")
|
||||
lineNumbers := make([]int, len(contentLines))
|
||||
index := startIndex
|
||||
for i, line := range contentLines {
|
||||
var err error
|
||||
if index < result.EndIndex &&
|
||||
result.StartIndex < index+len(line) &&
|
||||
result.StartIndex < result.EndIndex {
|
||||
openActiveIndex := util.Max(result.StartIndex-index, 0)
|
||||
closeActiveIndex := util.Min(result.EndIndex-index, len(line))
|
||||
err = writeStrings(&formattedLinesBuffer,
|
||||
`<li>`,
|
||||
line[:openActiveIndex],
|
||||
`<span class='active'>`,
|
||||
line[openActiveIndex:closeActiveIndex],
|
||||
`</span>`,
|
||||
line[closeActiveIndex:],
|
||||
`</li>`,
|
||||
)
|
||||
} else {
|
||||
err = writeStrings(&formattedLinesBuffer,
|
||||
`<li>`,
|
||||
line,
|
||||
`</li>`,
|
||||
)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
lineNumbers[i] = startLineNum + i
|
||||
index += len(line)
|
||||
}
|
||||
return &Result{
|
||||
Filename: result.Filename,
|
||||
HighlightClass: highlight.FileNameToHighlightClass(result.Filename),
|
||||
LineNumbers: lineNumbers,
|
||||
FormattedLines: gotemplate.HTML(formattedLinesBuffer.String()),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// PerformSearch perform a search on a repository
|
||||
func PerformSearch(repoID int64, keyword string, page, pageSize int) (int, []*Result, error) {
|
||||
if len(keyword) == 0 {
|
||||
return 0, nil, nil
|
||||
}
|
||||
|
||||
total, results, err := indexer.SearchRepoByKeyword(repoID, keyword, page, pageSize)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
displayResults := make([]*Result, len(results))
|
||||
|
||||
for i, result := range results {
|
||||
startIndex, endIndex := indices(result.Content, result.StartIndex, result.EndIndex)
|
||||
displayResults[i], err = searchResult(result, startIndex, endIndex)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
}
|
||||
return int(total), displayResults, nil
|
||||
}
|
|
@ -141,7 +141,10 @@ var (
|
|||
// Indexer settings
|
||||
Indexer struct {
|
||||
IssuePath string
|
||||
RepoIndexerEnabled bool
|
||||
RepoPath string
|
||||
UpdateQueueLength int
|
||||
MaxIndexerFileSize int64
|
||||
}
|
||||
|
||||
// Webhook settings
|
||||
|
@ -236,6 +239,7 @@ var (
|
|||
UI = struct {
|
||||
ExplorePagingNum int
|
||||
IssuePagingNum int
|
||||
RepoSearchPagingNum int
|
||||
FeedMaxCommitNum int
|
||||
ThemeColorMetaTag string
|
||||
MaxDisplayFileSize int64
|
||||
|
@ -258,6 +262,7 @@ var (
|
|||
}{
|
||||
ExplorePagingNum: 20,
|
||||
IssuePagingNum: 10,
|
||||
RepoSearchPagingNum: 10,
|
||||
FeedMaxCommitNum: 5,
|
||||
ThemeColorMetaTag: `#6cc644`,
|
||||
MaxDisplayFileSize: 8388608,
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
// Copyright 2017 The Gitea Authors. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package util
|
||||
|
||||
import "path/filepath"
|
||||
|
||||
// EnsureAbsolutePath ensure that a path is absolute, making it
|
||||
// relative to absoluteBase if necessary
|
||||
func EnsureAbsolutePath(path string, absoluteBase string) string {
|
||||
if filepath.IsAbs(path) {
|
||||
return path
|
||||
}
|
||||
return filepath.Join(absoluteBase, path)
|
||||
}
|
|
@ -38,3 +38,19 @@ func OptionalBoolOf(b bool) OptionalBool {
|
|||
}
|
||||
return OptionalBoolFalse
|
||||
}
|
||||
|
||||
// Max max of two ints
|
||||
func Max(a, b int) int {
|
||||
if a < b {
|
||||
return b
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
// Min min of two ints
|
||||
func Min(a, b int) int {
|
||||
if a > b {
|
||||
return b
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
|
|
@ -848,6 +848,10 @@ activity.title.releases_n = %d Releases
|
|||
activity.title.releases_published_by = %s published by %s
|
||||
activity.published_release_label = Published
|
||||
|
||||
search = Search
|
||||
search.search_repo = Search repository
|
||||
search.results = Search results for "%s" in <a href="%s">%s</a>
|
||||
|
||||
settings = Settings
|
||||
settings.desc = Settings is where you can manage the settings for the repository
|
||||
settings.options = Options
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -158,6 +158,11 @@
|
|||
}
|
||||
|
||||
&.file.list {
|
||||
.repo-description {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
}
|
||||
#repo-desc {
|
||||
font-size: 1.2em;
|
||||
}
|
||||
|
@ -226,7 +231,7 @@
|
|||
}
|
||||
}
|
||||
|
||||
#file-content {
|
||||
.non-diff-file-content {
|
||||
.header {
|
||||
.icon {
|
||||
font-size: 1em;
|
||||
|
@ -323,6 +328,9 @@
|
|||
}
|
||||
}
|
||||
}
|
||||
.active {
|
||||
background: #ffffdd;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1038,6 +1046,13 @@
|
|||
overflow-x: auto;
|
||||
overflow-y: hidden;
|
||||
}
|
||||
.repo-search-result {
|
||||
padding-top: 10px;
|
||||
padding-bottom: 10px;
|
||||
.lines-num a {
|
||||
color: inherit;
|
||||
}
|
||||
}
|
||||
|
||||
&.quickstart {
|
||||
.guide {
|
||||
|
|
|
@ -66,6 +66,7 @@ func GlobalInit() {
|
|||
// Booting long running goroutines.
|
||||
cron.NewContext()
|
||||
models.InitIssueIndexer()
|
||||
models.InitRepoIndexer()
|
||||
models.InitSyncMirrors()
|
||||
models.InitDeliverHooks()
|
||||
models.InitTestPullRequests()
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
// Copyright 2017 The Gitea Authors. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package repo
|
||||
|
||||
import (
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/base"
|
||||
"code.gitea.io/gitea/modules/context"
|
||||
"code.gitea.io/gitea/modules/search"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
|
||||
"github.com/Unknwon/paginater"
|
||||
)
|
||||
|
||||
const tplSearch base.TplName = "repo/search"
|
||||
|
||||
// Search render repository search page
|
||||
func Search(ctx *context.Context) {
|
||||
if !setting.Indexer.RepoIndexerEnabled {
|
||||
ctx.Redirect(ctx.Repo.RepoLink, 302)
|
||||
return
|
||||
}
|
||||
keyword := strings.TrimSpace(ctx.Query("q"))
|
||||
page := ctx.QueryInt("page")
|
||||
if page <= 0 {
|
||||
page = 1
|
||||
}
|
||||
total, searchResults, err := search.PerformSearch(ctx.Repo.Repository.ID, keyword, page, setting.UI.RepoSearchPagingNum)
|
||||
if err != nil {
|
||||
ctx.Handle(500, "SearchResults", err)
|
||||
return
|
||||
}
|
||||
ctx.Data["Keyword"] = keyword
|
||||
pager := paginater.New(total, setting.UI.RepoSearchPagingNum, page, 5)
|
||||
ctx.Data["Page"] = pager
|
||||
ctx.Data["SourcePath"] = setting.AppSubURL + "/" +
|
||||
path.Join(ctx.Repo.Repository.Owner.Name, ctx.Repo.Repository.Name, "src", ctx.Repo.Repository.DefaultBranch)
|
||||
ctx.Data["SearchResults"] = searchResults
|
||||
ctx.Data["RequireHighlightJS"] = true
|
||||
ctx.Data["PageIsViewCode"] = true
|
||||
ctx.HTML(200, tplSearch)
|
||||
}
|
|
@ -649,6 +649,7 @@ func RegisterRoutes(m *macaron.Macaron) {
|
|||
m.Group("/:username/:reponame", func() {
|
||||
m.Get("/stars", repo.Stars)
|
||||
m.Get("/watchers", repo.Watchers)
|
||||
m.Get("/search", context.CheckUnit(models.UnitTypeCode), repo.Search)
|
||||
}, ignSignIn, context.RepoAssignment(), context.RepoRef(), context.UnitTypes(), context.LoadRepoUnits())
|
||||
|
||||
m.Group("/:username", func() {
|
||||
|
|
|
@ -3,10 +3,26 @@
|
|||
{{template "repo/header" .}}
|
||||
<div class="ui container">
|
||||
{{template "base/alert" .}}
|
||||
<p id="repo-desc">
|
||||
<div class="ui repo-description">
|
||||
<div id="repo-desc">
|
||||
{{if .Repository.DescriptionHTML}}<span class="description has-emoji">{{.Repository.DescriptionHTML}}</span>{{else if .IsRepositoryAdmin}}<span class="no-description text-italic">{{.i18n.Tr "repo.no_desc"}}</span>{{end}}
|
||||
<a class="link" href="{{.Repository.Website}}">{{.Repository.Website}}</a>
|
||||
</p>
|
||||
</div>
|
||||
{{if .RepoSearchEnabled}}
|
||||
<div class="ui repo-search">
|
||||
<form class="ui form" action="{{.RepoLink}}/search" method="get">
|
||||
<div class="field">
|
||||
<div class="ui action input">
|
||||
<input name="q" value="{{.Keyword}}" placeholder="{{.i18n.Tr "repo.search.search_repo"}}">
|
||||
<button class="ui icon button" type="submit">
|
||||
<i class="search icon"></i>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
{{template "repo/sub_menu" .}}
|
||||
<div class="ui secondary menu">
|
||||
{{if .PullRequestCtx.Allowed}}
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
{{template "base/head" .}}
|
||||
<div class="repository file list">
|
||||
{{template "repo/header" .}}
|
||||
<div class="ui container">
|
||||
<div class="ui repo-search">
|
||||
<form class="ui form" method="get">
|
||||
<div class="ui fluid action input">
|
||||
<input name="q" value="{{.Keyword}}" placeholder="{{.i18n.Tr "repo.search.search_repo"}}">
|
||||
<button class="ui button" type="submit">
|
||||
<i class="search icon"></i>
|
||||
</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
{{if .Keyword}}
|
||||
<h3>
|
||||
{{.i18n.Tr "repo.search.results" .Keyword .RepoLink .RepoName | Str2html}}
|
||||
</h3>
|
||||
<div class="repository search">
|
||||
{{range $result := .SearchResults}}
|
||||
<div class="diff-file-box diff-box file-content non-diff-file-content repo-search-result">
|
||||
<h4 class="ui top attached normal header">
|
||||
<span class="file">{{.Filename}}</span>
|
||||
<a class="ui basic grey tiny button" rel="nofollow" href="{{EscapePound $.SourcePath}}/{{EscapePound .Filename}}">{{$.i18n.Tr "repo.diff.view_file"}}</a>
|
||||
</h4>
|
||||
<div class="ui attached table segment">
|
||||
<div class="file-body file-code code-view">
|
||||
<table>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td class="lines-num">
|
||||
{{range .LineNumbers}}
|
||||
<a href="{{EscapePound $.SourcePath}}/{{EscapePound $result.Filename}}#L{{.}}"><span>{{.}}</span></a>
|
||||
{{end}}
|
||||
</td>
|
||||
<td class="lines-code"><pre><code class="{{.HighlightClass}}"><ol class="linenums">{{.FormattedLines}}</ol></code></pre></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
{{template "base/paginate" .}}
|
||||
{{end}}
|
||||
</div>
|
||||
</div>
|
||||
{{template "base/footer" .}}
|
|
@ -1,4 +1,4 @@
|
|||
<div id="file-content" class="{{TabSizeClass .Editorconfig .FileName}}">
|
||||
<div class="{{TabSizeClass .Editorconfig .FileName}} non-diff-file-content">
|
||||
<h4 class="ui top attached header" id="{{if .ReadmeExist}}repo-readme{{else}}repo-read-file{{end}}">
|
||||
{{if .ReadmeExist}}
|
||||
<i class="book icon ui left"></i>
|
||||
|
|
78
vendor/github.com/blevesearch/bleve/analysis/token/camelcase/camelcase.go
generated
vendored
Normal file
78
vendor/github.com/blevesearch/bleve/analysis/token/camelcase/camelcase.go
generated
vendored
Normal file
|
@ -0,0 +1,78 @@
|
|||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package camelcase
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "camelCase"
|
||||
|
||||
// CamelCaseFilter splits a given token into a set of tokens where each resulting token
|
||||
// falls into one the following classes:
|
||||
// 1) Upper case followed by lower case letters.
|
||||
// Terminated by a number, an upper case letter, and a non alpha-numeric symbol.
|
||||
// 2) Upper case followed by upper case letters.
|
||||
// Terminated by a number, an upper case followed by a lower case letter, and a non alpha-numeric symbol.
|
||||
// 3) Lower case followed by lower case letters.
|
||||
// Terminated by a number, an upper case letter, and a non alpha-numeric symbol.
|
||||
// 4) Number followed by numbers.
|
||||
// Terminated by a letter, and a non alpha-numeric symbol.
|
||||
// 5) Non alpha-numeric symbol followed by non alpha-numeric symbols.
|
||||
// Terminated by a number, and a letter.
|
||||
//
|
||||
// It does a one-time sequential pass over an input token, from left to right.
|
||||
// The scan is greedy and generates the longest substring that fits into one of the classes.
|
||||
//
|
||||
// See the test file for examples of classes and their parsings.
|
||||
type CamelCaseFilter struct{}
|
||||
|
||||
func NewCamelCaseFilter() *CamelCaseFilter {
|
||||
return &CamelCaseFilter{}
|
||||
}
|
||||
|
||||
func (f *CamelCaseFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
rv := make(analysis.TokenStream, 0, len(input))
|
||||
|
||||
nextPosition := 1
|
||||
for _, token := range input {
|
||||
runeCount := utf8.RuneCount(token.Term)
|
||||
runes := bytes.Runes(token.Term)
|
||||
|
||||
p := NewParser(runeCount, nextPosition, token.Start)
|
||||
for i := 0; i < runeCount; i++ {
|
||||
if i+1 >= runeCount {
|
||||
p.Push(runes[i], nil)
|
||||
} else {
|
||||
p.Push(runes[i], &runes[i+1])
|
||||
}
|
||||
}
|
||||
rv = append(rv, p.FlushTokens()...)
|
||||
nextPosition = p.NextPosition()
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func CamelCaseFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewCamelCaseFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(Name, CamelCaseFilterConstructor)
|
||||
}
|
109
vendor/github.com/blevesearch/bleve/analysis/token/camelcase/parser.go
generated
vendored
Normal file
109
vendor/github.com/blevesearch/bleve/analysis/token/camelcase/parser.go
generated
vendored
Normal file
|
@ -0,0 +1,109 @@
|
|||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package camelcase
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
)
|
||||
|
||||
func (p *Parser) buildTokenFromTerm(buffer []rune) *analysis.Token {
|
||||
term := analysis.BuildTermFromRunes(buffer)
|
||||
token := &analysis.Token{
|
||||
Term: term,
|
||||
Position: p.position,
|
||||
Start: p.index,
|
||||
End: p.index + len(term),
|
||||
}
|
||||
p.position++
|
||||
p.index += len(term)
|
||||
return token
|
||||
}
|
||||
|
||||
// Parser accepts a symbol and passes it to the current state (representing a class).
|
||||
// The state can accept it (and accumulate it). Otherwise, the parser creates a new state that
|
||||
// starts with the pushed symbol.
|
||||
//
|
||||
// Parser accumulates a new resulting token every time it switches state.
|
||||
// Use FlushTokens() to get the results after the last symbol was pushed.
|
||||
type Parser struct {
|
||||
bufferLen int
|
||||
buffer []rune
|
||||
current State
|
||||
tokens []*analysis.Token
|
||||
position int
|
||||
index int
|
||||
}
|
||||
|
||||
func NewParser(len, position, index int) *Parser {
|
||||
return &Parser{
|
||||
bufferLen: len,
|
||||
buffer: make([]rune, 0, len),
|
||||
tokens: make([]*analysis.Token, 0, len),
|
||||
position: position,
|
||||
index: index,
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Parser) Push(sym rune, peek *rune) {
|
||||
if p.current == nil {
|
||||
// the start of parsing
|
||||
p.current = p.NewState(sym)
|
||||
p.buffer = append(p.buffer, sym)
|
||||
|
||||
} else if p.current.Member(sym, peek) {
|
||||
// same state, just accumulate
|
||||
p.buffer = append(p.buffer, sym)
|
||||
|
||||
} else {
|
||||
// the old state is no more, thus convert the buffer
|
||||
p.tokens = append(p.tokens, p.buildTokenFromTerm(p.buffer))
|
||||
|
||||
// let the new state begin
|
||||
p.current = p.NewState(sym)
|
||||
p.buffer = make([]rune, 0, p.bufferLen)
|
||||
p.buffer = append(p.buffer, sym)
|
||||
}
|
||||
}
|
||||
|
||||
// Note. States have to have different starting symbols.
|
||||
func (p *Parser) NewState(sym rune) State {
|
||||
var found State
|
||||
|
||||
found = &LowerCaseState{}
|
||||
if found.StartSym(sym) {
|
||||
return found
|
||||
}
|
||||
|
||||
found = &UpperCaseState{}
|
||||
if found.StartSym(sym) {
|
||||
return found
|
||||
}
|
||||
|
||||
found = &NumberCaseState{}
|
||||
if found.StartSym(sym) {
|
||||
return found
|
||||
}
|
||||
|
||||
return &NonAlphaNumericCaseState{}
|
||||
}
|
||||
|
||||
func (p *Parser) FlushTokens() []*analysis.Token {
|
||||
p.tokens = append(p.tokens, p.buildTokenFromTerm(p.buffer))
|
||||
return p.tokens
|
||||
}
|
||||
|
||||
func (p *Parser) NextPosition() int {
|
||||
return p.position
|
||||
}
|
87
vendor/github.com/blevesearch/bleve/analysis/token/camelcase/states.go
generated
vendored
Normal file
87
vendor/github.com/blevesearch/bleve/analysis/token/camelcase/states.go
generated
vendored
Normal file
|
@ -0,0 +1,87 @@
|
|||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package camelcase
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// States codify the classes that the parser recognizes.
|
||||
type State interface {
|
||||
// is _sym_ the start character
|
||||
StartSym(sym rune) bool
|
||||
|
||||
// is _sym_ a member of a class.
|
||||
// peek, the next sym on the tape, can also be used to determine a class.
|
||||
Member(sym rune, peek *rune) bool
|
||||
}
|
||||
|
||||
type LowerCaseState struct{}
|
||||
|
||||
func (s *LowerCaseState) Member(sym rune, peek *rune) bool {
|
||||
return unicode.IsLower(sym)
|
||||
}
|
||||
|
||||
func (s *LowerCaseState) StartSym(sym rune) bool {
|
||||
return s.Member(sym, nil)
|
||||
}
|
||||
|
||||
type UpperCaseState struct {
|
||||
startedCollecting bool // denotes that the start character has been read
|
||||
collectingUpper bool // denotes if this is a class of all upper case letters
|
||||
}
|
||||
|
||||
func (s *UpperCaseState) Member(sym rune, peek *rune) bool {
|
||||
if !(unicode.IsLower(sym) || unicode.IsUpper(sym)) {
|
||||
return false
|
||||
}
|
||||
|
||||
if peek != nil && unicode.IsUpper(sym) && unicode.IsLower(*peek) {
|
||||
return false
|
||||
}
|
||||
|
||||
if !s.startedCollecting {
|
||||
// now we have to determine if upper-case letters are collected.
|
||||
s.startedCollecting = true
|
||||
s.collectingUpper = unicode.IsUpper(sym)
|
||||
return true
|
||||
}
|
||||
|
||||
return s.collectingUpper == unicode.IsUpper(sym)
|
||||
}
|
||||
|
||||
func (s *UpperCaseState) StartSym(sym rune) bool {
|
||||
return unicode.IsUpper(sym)
|
||||
}
|
||||
|
||||
type NumberCaseState struct{}
|
||||
|
||||
func (s *NumberCaseState) Member(sym rune, peek *rune) bool {
|
||||
return unicode.IsNumber(sym)
|
||||
}
|
||||
|
||||
func (s *NumberCaseState) StartSym(sym rune) bool {
|
||||
return s.Member(sym, nil)
|
||||
}
|
||||
|
||||
type NonAlphaNumericCaseState struct{}
|
||||
|
||||
func (s *NonAlphaNumericCaseState) Member(sym rune, peek *rune) bool {
|
||||
return !unicode.IsLower(sym) && !unicode.IsUpper(sym) && !unicode.IsNumber(sym)
|
||||
}
|
||||
|
||||
func (s *NonAlphaNumericCaseState) StartSym(sym rune) bool {
|
||||
return s.Member(sym, nil)
|
||||
}
|
|
@ -98,6 +98,12 @@
|
|||
"revision": "011b168f7b84ffef05aed6716d73d21b1a33e971",
|
||||
"revisionTime": "2017-06-14T16:31:07Z"
|
||||
},
|
||||
{
|
||||
"checksumSHA1": "xj8o/nQj59yt+o+RZSa0n9V3vKY=",
|
||||
"path": "github.com/blevesearch/bleve/analysis/token/camelcase",
|
||||
"revision": "174f8ed44a0bf65e7c8fb228b60b58de62654cd2",
|
||||
"revisionTime": "2017-06-28T17:18:15Z"
|
||||
},
|
||||
{
|
||||
"checksumSHA1": "3VIPkl12t1ko4y6DkbPcz+MtQjY=",
|
||||
"path": "github.com/blevesearch/bleve/analysis/token/lowercase",
|
||||
|
|
Loading…
Reference in New Issue