diff --git a/.changeset/four-bees-look.md b/.changeset/four-bees-look.md new file mode 100644 index 0000000000..23531de32a --- /dev/null +++ b/.changeset/four-bees-look.md @@ -0,0 +1,5 @@ +--- +'@backstage/plugin-search-backend-module-elasticsearch': minor +--- + +Add the option to configure the fuzziness of the elasticsearch results by defining the fuzziness and `prefix_length` property. diff --git a/.github/vale/config/vocabularies/Backstage/accept.txt b/.github/vale/config/vocabularies/Backstage/accept.txt index 1091913e8a..ad6a9f5fde 100644 --- a/.github/vale/config/vocabularies/Backstage/accept.txt +++ b/.github/vale/config/vocabularies/Backstage/accept.txt @@ -223,6 +223,7 @@ learnings Leasot lerna Lerna +Levenshtein lightbox Lightsail limitranges diff --git a/docs/features/search/search-engines.md b/docs/features/search/search-engines.md index 55cab198a0..b61be1e0c8 100644 --- a/docs/features/search/search-engines.md +++ b/docs/features/search/search-engines.md @@ -255,3 +255,19 @@ search: ``` After applying this setting, an index name would look like this: `custom-prefix-software-catalog-index__20250219` + +### Elasticsearch query config + +By default the default settings for the Elasticsearch queries is used. If you need to tweak the fuzziness of the query results you can do this with 2 parameters, `fuzziness` and `prefixLength`. + +Fuzziness allows you to define the maximum Levenshtein distance, AUTO is the default and widely accepted standard. +`prefixLength` allows you to control the minimum number of characters that must match exactly at the beginning of the query term. This defaults to 0 +[More info](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-fuzzy-query.html) + +```yaml +search: + elasticsearch: + queryConfig: + fuzziness: AUTO + prefixLength: 3; +``` diff --git a/plugins/search-backend-module-elasticsearch/config.d.ts b/plugins/search-backend-module-elasticsearch/config.d.ts index e0717e33cc..731ad1c77e 100644 --- a/plugins/search-backend-module-elasticsearch/config.d.ts +++ b/plugins/search-backend-module-elasticsearch/config.d.ts @@ -47,6 +47,26 @@ export interface Config { */ fragmentDelimiter?: string; }; + queryOptions?: { + /** + * Fuzziness allows you to define the maximum Levenshtein distance for fuzzy queries, + * which determines how many single-character edits (insertions, deletions, substitutions) + * are allowed for a term to be considered a match. + * + * - 'AUTO': Automatically determines the fuzziness level based on the length of the term. + * This is the default and widely accepted standard. + * - number: Specifies a fixed fuzziness level. For example, a value of 1 allows for one edit. + * + * Example: + * - For a term "apple" with fuzziness set to 1, queries like "aple" or "apply" would match. + */ + + fuzziness?: 'AUTO' | number; + /** + * Minimum number of characters that must match exactly at the beginning of the qeury. Defaults to 0. + */ + prefixLength?: number; + }; /** Elasticsearch specific index template bodies */ indexTemplates?: Array<{ diff --git a/plugins/search-backend-module-elasticsearch/report.api.md b/plugins/search-backend-module-elasticsearch/report.api.md index cf7615541d..9250cfda31 100644 --- a/plugins/search-backend-module-elasticsearch/report.api.md +++ b/plugins/search-backend-module-elasticsearch/report.api.md @@ -317,6 +317,12 @@ export type ElasticSearchOptions = { translator?: ElasticSearchQueryTranslator; }; +// @public (undocumented) +export type ElasticSearchQueryConfig = { + fuzziness?: string | number; + prefixLength?: number; +}; + // @public export type ElasticSearchQueryTranslator = ( query: SearchQuery, @@ -332,6 +338,7 @@ export interface ElasticSearchQueryTranslatorExtensionPoint { // @public export type ElasticSearchQueryTranslatorOptions = { highlightOptions?: ElasticSearchHighlightConfig; + queryOptions?: ElasticSearchQueryConfig; }; // @public (undocumented) @@ -343,6 +350,7 @@ export class ElasticSearchSearchEngine implements SearchEngine { logger: LoggerService, batchSize: number, highlightOptions?: ElasticSearchHighlightOptions, + queryOptions?: ElasticSearchQueryConfig, ); // (undocumented) static fromConfig( diff --git a/plugins/search-backend-module-elasticsearch/src/engines/ElasticSearchSearchEngine.test.ts b/plugins/search-backend-module-elasticsearch/src/engines/ElasticSearchSearchEngine.test.ts index 379b712469..9b1f32c0ba 100644 --- a/plugins/search-backend-module-elasticsearch/src/engines/ElasticSearchSearchEngine.test.ts +++ b/plugins/search-backend-module-elasticsearch/src/engines/ElasticSearchSearchEngine.test.ts @@ -190,6 +190,7 @@ describe('ElasticSearchSearchEngine', () => { query: 'testTerm', fields: ['*'], fuzziness: 'auto', + prefix_length: 0, }, }, filter: { @@ -243,6 +244,7 @@ describe('ElasticSearchSearchEngine', () => { query: 'anotherTerm', fields: ['*'], fuzziness: 'auto', + prefix_length: 0, }, }, ], @@ -279,6 +281,7 @@ describe('ElasticSearchSearchEngine', () => { query: 'testTerm', fields: ['*'], fuzziness: 'auto', + prefix_length: 0, }, }, }, @@ -317,6 +320,7 @@ describe('ElasticSearchSearchEngine', () => { query: 'testTerm', fields: ['*'], fuzziness: 'auto', + prefix_length: 0, }, }, filter: [ @@ -372,6 +376,7 @@ describe('ElasticSearchSearchEngine', () => { query: 'testTerm', fields: ['*'], fuzziness: 'auto', + prefix_length: 0, }, }, filter: { @@ -433,6 +438,7 @@ describe('ElasticSearchSearchEngine', () => { query: 'testTerm', fields: ['*'], fuzziness: 'auto', + prefix_length: 0, }, }, }, @@ -758,6 +764,7 @@ describe('ElasticSearchSearchEngine', () => { query: 'testTerm', fields: ['*'], fuzziness: 'auto', + prefix_length: 0, }, }, filter: [], diff --git a/plugins/search-backend-module-elasticsearch/src/engines/ElasticSearchSearchEngine.ts b/plugins/search-backend-module-elasticsearch/src/engines/ElasticSearchSearchEngine.ts index 16233a5e78..8d950cf85f 100644 --- a/plugins/search-backend-module-elasticsearch/src/engines/ElasticSearchSearchEngine.ts +++ b/plugins/search-backend-module-elasticsearch/src/engines/ElasticSearchSearchEngine.ts @@ -60,6 +60,7 @@ export type ElasticSearchConcreteQuery = { */ export type ElasticSearchQueryTranslatorOptions = { highlightOptions?: ElasticSearchHighlightConfig; + queryOptions?: ElasticSearchQueryConfig; }; /** @@ -92,6 +93,14 @@ export type ElasticSearchHighlightOptions = { numFragments?: number; }; +/** + * @public + */ +export type ElasticSearchQueryConfig = { + fuzziness?: string | number; + prefixLength?: number; +}; + /** * @public */ @@ -125,6 +134,7 @@ const DEFAULT_INDEXER_BATCH_SIZE = 1000; export class ElasticSearchSearchEngine implements SearchEngine { private readonly elasticSearchClientWrapper: ElasticSearchClientWrapper; private readonly highlightOptions: ElasticSearchHighlightConfig; + private readonly queryOptions?: ElasticSearchQueryConfig; constructor( private readonly elasticSearchClientOptions: ElasticSearchClientOptions, @@ -133,6 +143,7 @@ export class ElasticSearchSearchEngine implements SearchEngine { private readonly logger: LoggerService, private readonly batchSize: number, highlightOptions?: ElasticSearchHighlightOptions, + queryOptions?: ElasticSearchQueryConfig, ) { this.elasticSearchClientWrapper = ElasticSearchClientWrapper.fromClientOptions(elasticSearchClientOptions); @@ -145,6 +156,7 @@ export class ElasticSearchSearchEngine implements SearchEngine { fragmentDelimiter: ' ... ', ...highlightOptions, }; + this.queryOptions = queryOptions; } static async fromConfig(options: ElasticSearchOptions) { @@ -266,11 +278,15 @@ export class ElasticSearchSearchEngine implements SearchEngine { if (restTerm?.length > 0) { const esbRestQuery = esb .multiMatchQuery(['*'], restTerm.trim()) - .fuzziness('auto'); + .fuzziness(options?.queryOptions?.fuzziness ?? 'auto') + .prefixLength(options?.queryOptions?.prefixLength ?? 0); esbQueries.push(esbRestQuery); } } else { - const esbQuery = esb.multiMatchQuery(['*'], term).fuzziness('auto'); + const esbQuery = esb + .multiMatchQuery(['*'], term) + .fuzziness(options?.queryOptions?.fuzziness ?? 'auto') + .prefixLength(options?.queryOptions?.prefixLength ?? 0); esbQueries.push(esbQuery); } @@ -386,7 +402,10 @@ export class ElasticSearchSearchEngine implements SearchEngine { async query(query: SearchQuery): Promise { const { elasticSearchQuery, documentTypes, pageSize } = this.translator( query, - { highlightOptions: this.highlightOptions }, + { + highlightOptions: this.highlightOptions, + queryOptions: this.queryOptions, + }, ); const queryIndices = documentTypes ? documentTypes.map(it => this.constructSearchAlias(it)) diff --git a/plugins/search-backend-module-elasticsearch/src/engines/index.ts b/plugins/search-backend-module-elasticsearch/src/engines/index.ts index cb24bdc56a..d0bf74d692 100644 --- a/plugins/search-backend-module-elasticsearch/src/engines/index.ts +++ b/plugins/search-backend-module-elasticsearch/src/engines/index.ts @@ -40,6 +40,7 @@ export type { export type { ElasticSearchConcreteQuery, ElasticSearchClientOptions, + ElasticSearchQueryConfig, ElasticSearchHighlightConfig, ElasticSearchHighlightOptions, ElasticSearchQueryTranslator, diff --git a/plugins/search-backend-module-elasticsearch/src/index.ts b/plugins/search-backend-module-elasticsearch/src/index.ts index 4a9f3474b1..a6acf8cdc8 100644 --- a/plugins/search-backend-module-elasticsearch/src/index.ts +++ b/plugins/search-backend-module-elasticsearch/src/index.ts @@ -36,6 +36,7 @@ export type { ElasticSearchClientOptions, ElasticSearchElasticSearchClientOptions, ElasticSearchHighlightConfig, + ElasticSearchQueryConfig, ElasticSearchHighlightOptions, ElasticSearchIndexAction, ElasticSearchQueryTranslator,