ElasticSearch backend: Add the option to configure the fuzziness. (#28661)

* feature: add option to finetune the fuzziness for elasticsearch

Signed-off-by: Lars van Steenbergen <lars@wingu.dev>

* feature: add option to finetune the fuzziness for elasticsearch

Signed-off-by: Lars van Steenbergen <lars@wingu.dev>

* feature: Missed 2 exports

Signed-off-by: Lars van Steenbergen <lars@wingu.dev>

* feature: Add the api report

Signed-off-by: Lars van Steenbergen <lars@wingu.dev>

* feature: clean up unwanted line

Signed-off-by: Lars van Steenbergen <lars@wingu.dev>

* feature: fix

Signed-off-by: Lars van Steenbergen <lars@wingu.dev>

* feature: readding the api-reports

Signed-off-by: Lars van Steenbergen <lars@wingu.dev>

* Update .changeset/four-bees-look.md

Co-authored-by: Andre Wanlin <67169551+awanlin@users.noreply.github.com>
Signed-off-by: Lars Van Steenbergen <45992455+lvstb@users.noreply.github.com>

* fix: added remarks

Signed-off-by: Lars van Steenbergen <lars@wingu.dev>

* Update docs/features/search/search-engines.md

Co-authored-by: John Philip <johnphilip283@gmail.com>
Signed-off-by: Lars Van Steenbergen <45992455+lvstb@users.noreply.github.com>

* Update docs/features/search/search-engines.md

Co-authored-by: Andre Wanlin <67169551+awanlin@users.noreply.github.com>
Signed-off-by: Lars Van Steenbergen <45992455+lvstb@users.noreply.github.com>

* Update docs/features/search/search-engines.md

Co-authored-by: Andre Wanlin <67169551+awanlin@users.noreply.github.com>
Signed-off-by: Lars Van Steenbergen <45992455+lvstb@users.noreply.github.com>

* Update comment

Co-authored-by: Andre Wanlin <67169551+awanlin@users.noreply.github.com>
Signed-off-by: Lars Van Steenbergen
<45992455+lvstb@users.noreply.github.com>
Signed-off-by: Lars van Steenbergen <lars@wingu.dev>

* Add vale accepted words and extra documentation

Co-authored-by: Andre Wanlin <67169551+awanlin@users.noreply.github.com>
Signed-off-by: Lars Van Steenbergen
<45992455+lvstb@users.noreply.github.com>
Signed-off-by: Lars van Steenbergen <lars@wingu.dev>

* Unneeded parameter removal + replace Distance by distance

Signed-off-by: Lars van Steenbergen <lars@wingu.dev>

---------

Signed-off-by: Lars van Steenbergen <lars@wingu.dev>
Signed-off-by: Lars Van Steenbergen <45992455+lvstb@users.noreply.github.com>
Signed-off-by: Lars Van Steenbergen
Co-authored-by: Andre Wanlin <67169551+awanlin@users.noreply.github.com>
Co-authored-by: John Philip <johnphilip283@gmail.com>
Co-authored-by: Benjamin Janssens <benji.janssens@gmail.com>
This commit is contained in:
Lars Van Steenbergen
2025-03-17 20:59:17 +01:00
committed by GitHub
parent c55be5c7de
commit 5f66007d58
9 changed files with 81 additions and 3 deletions
+5
View File
@@ -0,0 +1,5 @@
---
'@backstage/plugin-search-backend-module-elasticsearch': minor
---
Add the option to configure the fuzziness of the elasticsearch results by defining the fuzziness and `prefix_length` property.
@@ -223,6 +223,7 @@ learnings
Leasot
lerna
Lerna
Levenshtein
lightbox
Lightsail
limitranges
+16
View File
@@ -255,3 +255,19 @@ search:
```
After applying this setting, an index name would look like this: `custom-prefix-software-catalog-index__20250219`
### Elasticsearch query config
By default the default settings for the Elasticsearch queries is used. If you need to tweak the fuzziness of the query results you can do this with 2 parameters, `fuzziness` and `prefixLength`.
Fuzziness allows you to define the maximum Levenshtein distance, AUTO is the default and widely accepted standard.
`prefixLength` allows you to control the minimum number of characters that must match exactly at the beginning of the query term. This defaults to 0
[More info](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-fuzzy-query.html)
```yaml
search:
elasticsearch:
queryConfig:
fuzziness: AUTO
prefixLength: 3;
```
+20
View File
@@ -47,6 +47,26 @@ export interface Config {
*/
fragmentDelimiter?: string;
};
queryOptions?: {
/**
* Fuzziness allows you to define the maximum Levenshtein distance for fuzzy queries,
* which determines how many single-character edits (insertions, deletions, substitutions)
* are allowed for a term to be considered a match.
*
* - 'AUTO': Automatically determines the fuzziness level based on the length of the term.
* This is the default and widely accepted standard.
* - number: Specifies a fixed fuzziness level. For example, a value of 1 allows for one edit.
*
* Example:
* - For a term "apple" with fuzziness set to 1, queries like "aple" or "apply" would match.
*/
fuzziness?: 'AUTO' | number;
/**
* Minimum number of characters that must match exactly at the beginning of the qeury. Defaults to 0.
*/
prefixLength?: number;
};
/** Elasticsearch specific index template bodies */
indexTemplates?: Array<{
@@ -317,6 +317,12 @@ export type ElasticSearchOptions = {
translator?: ElasticSearchQueryTranslator;
};
// @public (undocumented)
export type ElasticSearchQueryConfig = {
fuzziness?: string | number;
prefixLength?: number;
};
// @public
export type ElasticSearchQueryTranslator = (
query: SearchQuery,
@@ -332,6 +338,7 @@ export interface ElasticSearchQueryTranslatorExtensionPoint {
// @public
export type ElasticSearchQueryTranslatorOptions = {
highlightOptions?: ElasticSearchHighlightConfig;
queryOptions?: ElasticSearchQueryConfig;
};
// @public (undocumented)
@@ -343,6 +350,7 @@ export class ElasticSearchSearchEngine implements SearchEngine {
logger: LoggerService,
batchSize: number,
highlightOptions?: ElasticSearchHighlightOptions,
queryOptions?: ElasticSearchQueryConfig,
);
// (undocumented)
static fromConfig(
@@ -190,6 +190,7 @@ describe('ElasticSearchSearchEngine', () => {
query: 'testTerm',
fields: ['*'],
fuzziness: 'auto',
prefix_length: 0,
},
},
filter: {
@@ -243,6 +244,7 @@ describe('ElasticSearchSearchEngine', () => {
query: 'anotherTerm',
fields: ['*'],
fuzziness: 'auto',
prefix_length: 0,
},
},
],
@@ -279,6 +281,7 @@ describe('ElasticSearchSearchEngine', () => {
query: 'testTerm',
fields: ['*'],
fuzziness: 'auto',
prefix_length: 0,
},
},
},
@@ -317,6 +320,7 @@ describe('ElasticSearchSearchEngine', () => {
query: 'testTerm',
fields: ['*'],
fuzziness: 'auto',
prefix_length: 0,
},
},
filter: [
@@ -372,6 +376,7 @@ describe('ElasticSearchSearchEngine', () => {
query: 'testTerm',
fields: ['*'],
fuzziness: 'auto',
prefix_length: 0,
},
},
filter: {
@@ -433,6 +438,7 @@ describe('ElasticSearchSearchEngine', () => {
query: 'testTerm',
fields: ['*'],
fuzziness: 'auto',
prefix_length: 0,
},
},
},
@@ -758,6 +764,7 @@ describe('ElasticSearchSearchEngine', () => {
query: 'testTerm',
fields: ['*'],
fuzziness: 'auto',
prefix_length: 0,
},
},
filter: [],
@@ -60,6 +60,7 @@ export type ElasticSearchConcreteQuery = {
*/
export type ElasticSearchQueryTranslatorOptions = {
highlightOptions?: ElasticSearchHighlightConfig;
queryOptions?: ElasticSearchQueryConfig;
};
/**
@@ -92,6 +93,14 @@ export type ElasticSearchHighlightOptions = {
numFragments?: number;
};
/**
* @public
*/
export type ElasticSearchQueryConfig = {
fuzziness?: string | number;
prefixLength?: number;
};
/**
* @public
*/
@@ -125,6 +134,7 @@ const DEFAULT_INDEXER_BATCH_SIZE = 1000;
export class ElasticSearchSearchEngine implements SearchEngine {
private readonly elasticSearchClientWrapper: ElasticSearchClientWrapper;
private readonly highlightOptions: ElasticSearchHighlightConfig;
private readonly queryOptions?: ElasticSearchQueryConfig;
constructor(
private readonly elasticSearchClientOptions: ElasticSearchClientOptions,
@@ -133,6 +143,7 @@ export class ElasticSearchSearchEngine implements SearchEngine {
private readonly logger: LoggerService,
private readonly batchSize: number,
highlightOptions?: ElasticSearchHighlightOptions,
queryOptions?: ElasticSearchQueryConfig,
) {
this.elasticSearchClientWrapper =
ElasticSearchClientWrapper.fromClientOptions(elasticSearchClientOptions);
@@ -145,6 +156,7 @@ export class ElasticSearchSearchEngine implements SearchEngine {
fragmentDelimiter: ' ... ',
...highlightOptions,
};
this.queryOptions = queryOptions;
}
static async fromConfig(options: ElasticSearchOptions) {
@@ -266,11 +278,15 @@ export class ElasticSearchSearchEngine implements SearchEngine {
if (restTerm?.length > 0) {
const esbRestQuery = esb
.multiMatchQuery(['*'], restTerm.trim())
.fuzziness('auto');
.fuzziness(options?.queryOptions?.fuzziness ?? 'auto')
.prefixLength(options?.queryOptions?.prefixLength ?? 0);
esbQueries.push(esbRestQuery);
}
} else {
const esbQuery = esb.multiMatchQuery(['*'], term).fuzziness('auto');
const esbQuery = esb
.multiMatchQuery(['*'], term)
.fuzziness(options?.queryOptions?.fuzziness ?? 'auto')
.prefixLength(options?.queryOptions?.prefixLength ?? 0);
esbQueries.push(esbQuery);
}
@@ -386,7 +402,10 @@ export class ElasticSearchSearchEngine implements SearchEngine {
async query(query: SearchQuery): Promise<IndexableResultSet> {
const { elasticSearchQuery, documentTypes, pageSize } = this.translator(
query,
{ highlightOptions: this.highlightOptions },
{
highlightOptions: this.highlightOptions,
queryOptions: this.queryOptions,
},
);
const queryIndices = documentTypes
? documentTypes.map(it => this.constructSearchAlias(it))
@@ -40,6 +40,7 @@ export type {
export type {
ElasticSearchConcreteQuery,
ElasticSearchClientOptions,
ElasticSearchQueryConfig,
ElasticSearchHighlightConfig,
ElasticSearchHighlightOptions,
ElasticSearchQueryTranslator,
@@ -36,6 +36,7 @@ export type {
ElasticSearchClientOptions,
ElasticSearchElasticSearchClientOptions,
ElasticSearchHighlightConfig,
ElasticSearchQueryConfig,
ElasticSearchHighlightOptions,
ElasticSearchIndexAction,
ElasticSearchQueryTranslator,