Improve issue & code search (#33860)
Some checks are pending
release-nightly / nightly-binary (push) Waiting to run
release-nightly / nightly-docker-rootful (push) Waiting to run
release-nightly / nightly-docker-rootless (push) Waiting to run

Each "indexer" should provide the "search modes" they support by
themselves. And we need to remove the "fuzzy" search for code.
This commit is contained in:
wxiaoguang
2025-03-13 11:07:48 +08:00
committed by GitHub
parent cd10456664
commit 403775e74e
31 changed files with 317 additions and 172 deletions

View File

@ -16,6 +16,7 @@ import (
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/indexer/code/internal"
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
@ -24,7 +25,6 @@ import (
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/typesniffer"
"code.gitea.io/gitea/modules/util"
"github.com/go-enry/go-enry/v2"
"github.com/olivere/elastic/v7"
@ -46,6 +46,10 @@ type Indexer struct {
indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much
}
func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
return indexer.SearchModesExactWords()
}
// NewIndexer creates a new elasticsearch indexer
func NewIndexer(url, indexerName string) *Indexer {
inner := inner_elasticsearch.NewIndexer(url, indexerName, esRepoIndexerLatestVersion, defaultMapping)
@ -361,15 +365,10 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan
// Search searches for codes and language stats by given conditions.
func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
var contentQuery elastic.Query
keywordAsPhrase, isPhrase := internal.ParseKeywordAsPhrase(opts.Keyword)
if isPhrase {
contentQuery = elastic.NewMatchPhraseQuery("content", keywordAsPhrase)
} else {
// TODO: this is the old logic, but not really using "fuzziness"
// * IsKeywordFuzzy=true: "best_fields"
// * IsKeywordFuzzy=false: "phrase_prefix"
contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).
Type(util.Iif(opts.IsKeywordFuzzy, esMultiMatchTypeBestFields, esMultiMatchTypePhrasePrefix))
if opts.SearchMode == indexer.SearchModeExact {
contentQuery = elastic.NewMatchPhraseQuery("content", opts.Keyword)
} else /* words */ {
contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).Type(esMultiMatchTypeBestFields).Operator("and")
}
kwQuery := elastic.NewBoolQuery().Should(
contentQuery,