fix #4479: add fuzzy keyword search to title

This commit is contained in:
silkentrance 2024-07-01 19:50:00 +02:00
parent e82f3caa6b
commit aa8cc1ff5a
5 changed files with 24 additions and 13 deletions

View File

@ -28,6 +28,14 @@ func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query
return q
}
// FuzzyQuery generates a fuzzy query for the given phrase, field, and fuzziness
func FuzzyQuery(matchPhrase, field string, fuzziness int) *query.FuzzyQuery {
q := bleve.NewFuzzyQuery(matchPhrase)
q.FieldVal = field
q.Fuzziness = fuzziness
return q
}
// BoolFieldQuery generates a bool field query for the given value and field
func BoolFieldQuery(value bool, field string) *query.BoolFieldQuery {
q := bleve.NewBoolFieldQuery(value)

View File

@ -50,12 +50,14 @@ func openIndexer(path string, latestVersion int) (bleve.Index, int, error) {
func GuessFuzzinessByKeyword(s string) int {
// according to https://github.com/blevesearch/bleve/issues/1563, the supported max fuzziness is 2
// magic number 4 was chosen to determine the levenshtein distance per each character of a keyword
// BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot.
// BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot
// which we need to live with, as we need to support not just ASCII
// in case of code points >= 128 we will increase the fuzziness to 2
// the standard is 1
for _, r := range s {
if r >= 128 {
return 0
return 2
}
}
return min(2, len(s)/4)
return 1
}

View File

@ -162,7 +162,7 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
}
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness),
inner_bleve.FuzzyQuery(options.Keyword, "title", fuzziness),
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness),
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness),
}...))

View File

@ -209,13 +209,13 @@ func searchIssueIsPull(t *testing.T) {
SearchOptions{
IsPull: optional.Some(false),
},
[]int64{17, 16, 15, 14, 13, 6, 5, 18, 10, 7, 4, 1},
[]int64{25, 24, 23, 17, 16, 15, 14, 13, 6, 5, 18, 10, 7, 4, 1},
},
{
SearchOptions{
IsPull: optional.Some(true),
},
[]int64{22, 21, 12, 11, 20, 19, 9, 8, 3, 2},
[]int64{22, 21, 28, 27, 26, 12, 11, 20, 19, 9, 8, 3, 2},
},
}
for _, test := range tests {
@ -236,7 +236,7 @@ func searchIssueIsClosed(t *testing.T) {
SearchOptions{
IsClosed: optional.Some(false),
},
[]int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 19, 18, 10, 7, 9, 8, 3, 2, 1},
[]int64{25, 24, 23, 22, 21, 28, 27, 26, 17, 16, 15, 14, 13, 12, 11, 20, 6, 19, 18, 10, 7, 9, 8, 3, 2, 1},
},
{
SearchOptions{
@ -302,7 +302,7 @@ func searchIssueByLabelID(t *testing.T) {
SearchOptions{
ExcludedLabelIDs: []int64{1},
},
[]int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3},
[]int64{25, 24, 23, 22, 21, 28, 27, 26, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3},
},
}
for _, test := range tests {
@ -323,7 +323,7 @@ func searchIssueByTime(t *testing.T) {
SearchOptions{
UpdatedAfterUnix: optional.Some(int64(0)),
},
[]int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3, 2, 1},
[]int64{25, 24, 23, 22, 21, 28, 27, 26, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3, 2, 1},
},
}
for _, test := range tests {
@ -344,7 +344,7 @@ func searchIssueWithOrder(t *testing.T) {
SearchOptions{
SortBy: internal.SortByCreatedAsc,
},
[]int64{1, 2, 3, 8, 9, 4, 7, 10, 18, 19, 5, 6, 20, 11, 12, 13, 14, 15, 16, 17, 21, 22},
[]int64{1, 2, 3, 8, 9, 4, 7, 10, 18, 19, 5, 6, 20, 11, 12, 13, 14, 15, 16, 17, 26, 27, 28, 21, 22, 23, 24, 25},
},
}
for _, test := range tests {
@ -401,8 +401,8 @@ func searchIssueWithPaginator(t *testing.T) {
PageSize: 5,
},
},
[]int64{22, 21, 17, 16, 15},
22,
[]int64{25, 24, 23, 22, 21},
28,
},
}
for _, test := range tests {

View File

@ -2677,6 +2677,7 @@ func SearchIssues(ctx *context.Context) {
MilestoneIDs: includedMilestones,
ProjectID: projectID,
SortBy: issue_indexer.SortByCreatedDesc,
IsFuzzyKeyword: true,
}
if since != 0 {