ElasticSearch backend: Add the option to configure the fuzziness. (#28661)
* feature: add option to finetune the fuzziness for elasticsearch Signed-off-by: Lars van Steenbergen <lars@wingu.dev> * feature: add option to finetune the fuzziness for elasticsearch Signed-off-by: Lars van Steenbergen <lars@wingu.dev> * feature: Missed 2 exports Signed-off-by: Lars van Steenbergen <lars@wingu.dev> * feature: Add the api report Signed-off-by: Lars van Steenbergen <lars@wingu.dev> * feature: clean up unwanted line Signed-off-by: Lars van Steenbergen <lars@wingu.dev> * feature: fix Signed-off-by: Lars van Steenbergen <lars@wingu.dev> * feature: readding the api-reports Signed-off-by: Lars van Steenbergen <lars@wingu.dev> * Update .changeset/four-bees-look.md Co-authored-by: Andre Wanlin <67169551+awanlin@users.noreply.github.com> Signed-off-by: Lars Van Steenbergen <45992455+lvstb@users.noreply.github.com> * fix: added remarks Signed-off-by: Lars van Steenbergen <lars@wingu.dev> * Update docs/features/search/search-engines.md Co-authored-by: John Philip <johnphilip283@gmail.com> Signed-off-by: Lars Van Steenbergen <45992455+lvstb@users.noreply.github.com> * Update docs/features/search/search-engines.md Co-authored-by: Andre Wanlin <67169551+awanlin@users.noreply.github.com> Signed-off-by: Lars Van Steenbergen <45992455+lvstb@users.noreply.github.com> * Update docs/features/search/search-engines.md Co-authored-by: Andre Wanlin <67169551+awanlin@users.noreply.github.com> Signed-off-by: Lars Van Steenbergen <45992455+lvstb@users.noreply.github.com> * Update comment Co-authored-by: Andre Wanlin <67169551+awanlin@users.noreply.github.com> Signed-off-by: Lars Van Steenbergen <45992455+lvstb@users.noreply.github.com> Signed-off-by: Lars van Steenbergen <lars@wingu.dev> * Add vale accepted words and extra documentation Co-authored-by: Andre Wanlin <67169551+awanlin@users.noreply.github.com> Signed-off-by: Lars Van Steenbergen <45992455+lvstb@users.noreply.github.com> Signed-off-by: Lars van Steenbergen <lars@wingu.dev> * Unneeded parameter removal + replace Distance by distance Signed-off-by: Lars van Steenbergen <lars@wingu.dev> --------- Signed-off-by: Lars van Steenbergen <lars@wingu.dev> Signed-off-by: Lars Van Steenbergen <45992455+lvstb@users.noreply.github.com> Signed-off-by: Lars Van Steenbergen Co-authored-by: Andre Wanlin <67169551+awanlin@users.noreply.github.com> Co-authored-by: John Philip <johnphilip283@gmail.com> Co-authored-by: Benjamin Janssens <benji.janssens@gmail.com>
This commit is contained in:
committed by
GitHub
parent
c55be5c7de
commit
5f66007d58
@@ -0,0 +1,5 @@
|
||||
---
|
||||
'@backstage/plugin-search-backend-module-elasticsearch': minor
|
||||
---
|
||||
|
||||
Add the option to configure the fuzziness of the elasticsearch results by defining the fuzziness and `prefix_length` property.
|
||||
@@ -223,6 +223,7 @@ learnings
|
||||
Leasot
|
||||
lerna
|
||||
Lerna
|
||||
Levenshtein
|
||||
lightbox
|
||||
Lightsail
|
||||
limitranges
|
||||
|
||||
@@ -255,3 +255,19 @@ search:
|
||||
```
|
||||
|
||||
After applying this setting, an index name would look like this: `custom-prefix-software-catalog-index__20250219`
|
||||
|
||||
### Elasticsearch query config
|
||||
|
||||
By default the default settings for the Elasticsearch queries is used. If you need to tweak the fuzziness of the query results you can do this with 2 parameters, `fuzziness` and `prefixLength`.
|
||||
|
||||
Fuzziness allows you to define the maximum Levenshtein distance, AUTO is the default and widely accepted standard.
|
||||
`prefixLength` allows you to control the minimum number of characters that must match exactly at the beginning of the query term. This defaults to 0
|
||||
[More info](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-fuzzy-query.html)
|
||||
|
||||
```yaml
|
||||
search:
|
||||
elasticsearch:
|
||||
queryConfig:
|
||||
fuzziness: AUTO
|
||||
prefixLength: 3;
|
||||
```
|
||||
|
||||
@@ -47,6 +47,26 @@ export interface Config {
|
||||
*/
|
||||
fragmentDelimiter?: string;
|
||||
};
|
||||
queryOptions?: {
|
||||
/**
|
||||
* Fuzziness allows you to define the maximum Levenshtein distance for fuzzy queries,
|
||||
* which determines how many single-character edits (insertions, deletions, substitutions)
|
||||
* are allowed for a term to be considered a match.
|
||||
*
|
||||
* - 'AUTO': Automatically determines the fuzziness level based on the length of the term.
|
||||
* This is the default and widely accepted standard.
|
||||
* - number: Specifies a fixed fuzziness level. For example, a value of 1 allows for one edit.
|
||||
*
|
||||
* Example:
|
||||
* - For a term "apple" with fuzziness set to 1, queries like "aple" or "apply" would match.
|
||||
*/
|
||||
|
||||
fuzziness?: 'AUTO' | number;
|
||||
/**
|
||||
* Minimum number of characters that must match exactly at the beginning of the qeury. Defaults to 0.
|
||||
*/
|
||||
prefixLength?: number;
|
||||
};
|
||||
|
||||
/** Elasticsearch specific index template bodies */
|
||||
indexTemplates?: Array<{
|
||||
|
||||
@@ -317,6 +317,12 @@ export type ElasticSearchOptions = {
|
||||
translator?: ElasticSearchQueryTranslator;
|
||||
};
|
||||
|
||||
// @public (undocumented)
|
||||
export type ElasticSearchQueryConfig = {
|
||||
fuzziness?: string | number;
|
||||
prefixLength?: number;
|
||||
};
|
||||
|
||||
// @public
|
||||
export type ElasticSearchQueryTranslator = (
|
||||
query: SearchQuery,
|
||||
@@ -332,6 +338,7 @@ export interface ElasticSearchQueryTranslatorExtensionPoint {
|
||||
// @public
|
||||
export type ElasticSearchQueryTranslatorOptions = {
|
||||
highlightOptions?: ElasticSearchHighlightConfig;
|
||||
queryOptions?: ElasticSearchQueryConfig;
|
||||
};
|
||||
|
||||
// @public (undocumented)
|
||||
@@ -343,6 +350,7 @@ export class ElasticSearchSearchEngine implements SearchEngine {
|
||||
logger: LoggerService,
|
||||
batchSize: number,
|
||||
highlightOptions?: ElasticSearchHighlightOptions,
|
||||
queryOptions?: ElasticSearchQueryConfig,
|
||||
);
|
||||
// (undocumented)
|
||||
static fromConfig(
|
||||
|
||||
+7
@@ -190,6 +190,7 @@ describe('ElasticSearchSearchEngine', () => {
|
||||
query: 'testTerm',
|
||||
fields: ['*'],
|
||||
fuzziness: 'auto',
|
||||
prefix_length: 0,
|
||||
},
|
||||
},
|
||||
filter: {
|
||||
@@ -243,6 +244,7 @@ describe('ElasticSearchSearchEngine', () => {
|
||||
query: 'anotherTerm',
|
||||
fields: ['*'],
|
||||
fuzziness: 'auto',
|
||||
prefix_length: 0,
|
||||
},
|
||||
},
|
||||
],
|
||||
@@ -279,6 +281,7 @@ describe('ElasticSearchSearchEngine', () => {
|
||||
query: 'testTerm',
|
||||
fields: ['*'],
|
||||
fuzziness: 'auto',
|
||||
prefix_length: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -317,6 +320,7 @@ describe('ElasticSearchSearchEngine', () => {
|
||||
query: 'testTerm',
|
||||
fields: ['*'],
|
||||
fuzziness: 'auto',
|
||||
prefix_length: 0,
|
||||
},
|
||||
},
|
||||
filter: [
|
||||
@@ -372,6 +376,7 @@ describe('ElasticSearchSearchEngine', () => {
|
||||
query: 'testTerm',
|
||||
fields: ['*'],
|
||||
fuzziness: 'auto',
|
||||
prefix_length: 0,
|
||||
},
|
||||
},
|
||||
filter: {
|
||||
@@ -433,6 +438,7 @@ describe('ElasticSearchSearchEngine', () => {
|
||||
query: 'testTerm',
|
||||
fields: ['*'],
|
||||
fuzziness: 'auto',
|
||||
prefix_length: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -758,6 +764,7 @@ describe('ElasticSearchSearchEngine', () => {
|
||||
query: 'testTerm',
|
||||
fields: ['*'],
|
||||
fuzziness: 'auto',
|
||||
prefix_length: 0,
|
||||
},
|
||||
},
|
||||
filter: [],
|
||||
|
||||
+22
-3
@@ -60,6 +60,7 @@ export type ElasticSearchConcreteQuery = {
|
||||
*/
|
||||
export type ElasticSearchQueryTranslatorOptions = {
|
||||
highlightOptions?: ElasticSearchHighlightConfig;
|
||||
queryOptions?: ElasticSearchQueryConfig;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -92,6 +93,14 @@ export type ElasticSearchHighlightOptions = {
|
||||
numFragments?: number;
|
||||
};
|
||||
|
||||
/**
|
||||
* @public
|
||||
*/
|
||||
export type ElasticSearchQueryConfig = {
|
||||
fuzziness?: string | number;
|
||||
prefixLength?: number;
|
||||
};
|
||||
|
||||
/**
|
||||
* @public
|
||||
*/
|
||||
@@ -125,6 +134,7 @@ const DEFAULT_INDEXER_BATCH_SIZE = 1000;
|
||||
export class ElasticSearchSearchEngine implements SearchEngine {
|
||||
private readonly elasticSearchClientWrapper: ElasticSearchClientWrapper;
|
||||
private readonly highlightOptions: ElasticSearchHighlightConfig;
|
||||
private readonly queryOptions?: ElasticSearchQueryConfig;
|
||||
|
||||
constructor(
|
||||
private readonly elasticSearchClientOptions: ElasticSearchClientOptions,
|
||||
@@ -133,6 +143,7 @@ export class ElasticSearchSearchEngine implements SearchEngine {
|
||||
private readonly logger: LoggerService,
|
||||
private readonly batchSize: number,
|
||||
highlightOptions?: ElasticSearchHighlightOptions,
|
||||
queryOptions?: ElasticSearchQueryConfig,
|
||||
) {
|
||||
this.elasticSearchClientWrapper =
|
||||
ElasticSearchClientWrapper.fromClientOptions(elasticSearchClientOptions);
|
||||
@@ -145,6 +156,7 @@ export class ElasticSearchSearchEngine implements SearchEngine {
|
||||
fragmentDelimiter: ' ... ',
|
||||
...highlightOptions,
|
||||
};
|
||||
this.queryOptions = queryOptions;
|
||||
}
|
||||
|
||||
static async fromConfig(options: ElasticSearchOptions) {
|
||||
@@ -266,11 +278,15 @@ export class ElasticSearchSearchEngine implements SearchEngine {
|
||||
if (restTerm?.length > 0) {
|
||||
const esbRestQuery = esb
|
||||
.multiMatchQuery(['*'], restTerm.trim())
|
||||
.fuzziness('auto');
|
||||
.fuzziness(options?.queryOptions?.fuzziness ?? 'auto')
|
||||
.prefixLength(options?.queryOptions?.prefixLength ?? 0);
|
||||
esbQueries.push(esbRestQuery);
|
||||
}
|
||||
} else {
|
||||
const esbQuery = esb.multiMatchQuery(['*'], term).fuzziness('auto');
|
||||
const esbQuery = esb
|
||||
.multiMatchQuery(['*'], term)
|
||||
.fuzziness(options?.queryOptions?.fuzziness ?? 'auto')
|
||||
.prefixLength(options?.queryOptions?.prefixLength ?? 0);
|
||||
esbQueries.push(esbQuery);
|
||||
}
|
||||
|
||||
@@ -386,7 +402,10 @@ export class ElasticSearchSearchEngine implements SearchEngine {
|
||||
async query(query: SearchQuery): Promise<IndexableResultSet> {
|
||||
const { elasticSearchQuery, documentTypes, pageSize } = this.translator(
|
||||
query,
|
||||
{ highlightOptions: this.highlightOptions },
|
||||
{
|
||||
highlightOptions: this.highlightOptions,
|
||||
queryOptions: this.queryOptions,
|
||||
},
|
||||
);
|
||||
const queryIndices = documentTypes
|
||||
? documentTypes.map(it => this.constructSearchAlias(it))
|
||||
|
||||
@@ -40,6 +40,7 @@ export type {
|
||||
export type {
|
||||
ElasticSearchConcreteQuery,
|
||||
ElasticSearchClientOptions,
|
||||
ElasticSearchQueryConfig,
|
||||
ElasticSearchHighlightConfig,
|
||||
ElasticSearchHighlightOptions,
|
||||
ElasticSearchQueryTranslator,
|
||||
|
||||
@@ -36,6 +36,7 @@ export type {
|
||||
ElasticSearchClientOptions,
|
||||
ElasticSearchElasticSearchClientOptions,
|
||||
ElasticSearchHighlightConfig,
|
||||
ElasticSearchQueryConfig,
|
||||
ElasticSearchHighlightOptions,
|
||||
ElasticSearchIndexAction,
|
||||
ElasticSearchQueryTranslator,
|
||||
|
||||
Reference in New Issue
Block a user