diff --git a/.changeset/search-olive-cars-talk.md b/.changeset/search-olive-cars-talk.md new file mode 100644 index 0000000000..64328d89b1 --- /dev/null +++ b/.changeset/search-olive-cars-talk.md @@ -0,0 +1,7 @@ +--- +'@backstage/plugin-search-backend-node': patch +--- + +Enhance the search results of `LunrSearchEngine` to support a more natural +search experience. This is done by allowing typos (by using fuzzy search) and +supporting typeahead search (using wildcard queries to match incomplete words). diff --git a/.changeset/search-slimy-taxis-live.md b/.changeset/search-slimy-taxis-live.md new file mode 100644 index 0000000000..c4615ef84c --- /dev/null +++ b/.changeset/search-slimy-taxis-live.md @@ -0,0 +1,10 @@ +--- +'@backstage/plugin-search-backend-node': minor +--- + +Build search queries using the query builder in `LunrSearchEngine`. This removes +the support for specifying custom queries with the lunr query syntax, but makes +sure that inputs are properly escaped. Supporting the full lunr syntax is still +possible by setting a custom query translator. +The interface of `LunrSearchEngine.setTranslator()` is changed to support +building lunr queries. diff --git a/.github/styles/vocab.txt b/.github/styles/vocab.txt index f6826cbefe..c095717def 100644 --- a/.github/styles/vocab.txt +++ b/.github/styles/vocab.txt @@ -274,6 +274,7 @@ touchpoints transpilation transpiled truthy +typeahead ui unbreak unmanaged diff --git a/plugins/search-backend-node/src/engines/LunrSearchEngine.test.ts b/plugins/search-backend-node/src/engines/LunrSearchEngine.test.ts index 22c775da8e..3c7945cbc3 100644 --- a/plugins/search-backend-node/src/engines/LunrSearchEngine.test.ts +++ b/plugins/search-backend-node/src/engines/LunrSearchEngine.test.ts @@ -15,8 +15,9 @@ */ import { getVoidLogger } from '@backstage/backend-common'; -import { LunrSearchEngine } from './LunrSearchEngine'; +import { ConcreteLunrQuery, LunrSearchEngine } from './LunrSearchEngine'; import { SearchEngine } from '../types'; +import lunr from 'lunr'; /** * Just used to test the default translator shipped with LunrSearchEngine. @@ -68,11 +69,35 @@ describe('LunrSearchEngine', () => { term: 'testTerm', filters: {}, pageCursor: '', - }); + }) as ConcreteLunrQuery; expect(actualTranslatedQuery).toMatchObject({ - documentTypes: ['*'], - lunrQueryString: '+testTerm', + documentTypes: undefined, + lunrQueryBuilder: expect.any(Function), + }); + + const query: jest.Mocked = { + allFields: [], + clauses: [], + term: jest.fn(), + clause: jest.fn(), + }; + + actualTranslatedQuery.lunrQueryBuilder.bind(query)(query); + + expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), { + boost: 100, + usePipeline: true, + }); + expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), { + boost: 10, + usePipeline: false, + wildcard: lunr.Query.wildcard.TRAILING, + }); + expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), { + boost: 1, + usePipeline: false, + editDistance: 2, }); }); @@ -86,11 +111,39 @@ describe('LunrSearchEngine', () => { term: 'testTerm', filters: { kind: 'testKind' }, pageCursor: '', - }); + }) as ConcreteLunrQuery; expect(actualTranslatedQuery).toMatchObject({ - documentTypes: ['*'], - lunrQueryString: '+testTerm +kind:testKind', + documentTypes: undefined, + lunrQueryBuilder: expect.any(Function), + }); + + const query: jest.Mocked = { + allFields: ['kind'], + clauses: [], + term: jest.fn(), + clause: jest.fn(), + }; + + actualTranslatedQuery.lunrQueryBuilder.bind(query)(query); + + expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), { + boost: 100, + usePipeline: true, + }); + expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), { + boost: 10, + usePipeline: false, + wildcard: lunr.Query.wildcard.TRAILING, + }); + expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), { + boost: 1, + usePipeline: false, + editDistance: 2, + }); + expect(query.term).toBeCalledWith(lunr.tokenizer('testKind'), { + fields: ['kind'], + presence: lunr.Query.presence.REQUIRED, }); }); @@ -104,17 +157,78 @@ describe('LunrSearchEngine', () => { term: 'testTerm', filters: { kind: 'testKind', namespace: 'testNameSpace' }, pageCursor: '', - }); + }) as ConcreteLunrQuery; expect(actualTranslatedQuery).toMatchObject({ - documentTypes: ['*'], - lunrQueryString: '+testTerm +kind:testKind +namespace:testNameSpace', + documentTypes: undefined, + lunrQueryBuilder: expect.any(Function), }); + + const query: jest.Mocked = { + allFields: ['kind', 'namespace'], + clauses: [], + term: jest.fn(), + clause: jest.fn(), + }; + + actualTranslatedQuery.lunrQueryBuilder.bind(query)(query); + + expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), { + boost: 100, + usePipeline: true, + }); + expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), { + boost: 10, + usePipeline: false, + wildcard: lunr.Query.wildcard.TRAILING, + }); + expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), { + boost: 1, + usePipeline: false, + editDistance: 2, + }); + expect(query.term).toBeCalledWith(lunr.tokenizer('testKind'), { + fields: ['kind'], + presence: lunr.Query.presence.REQUIRED, + }); + expect(query.term).toBeCalledWith(lunr.tokenizer('testNameSpace'), { + fields: ['namespace'], + presence: lunr.Query.presence.REQUIRED, + }); + }); + + it('should throw if translated query references missing field', async () => { + const inspectableSearchEngine = new LunrSearchEngineForTranslatorTests({ + logger: getVoidLogger(), + }); + const translatorUnderTest = inspectableSearchEngine.getTranslator(); + + const actualTranslatedQuery = translatorUnderTest({ + term: 'testTerm', + filters: { kind: 'testKind' }, + pageCursor: '', + }) as ConcreteLunrQuery; + + expect(actualTranslatedQuery).toMatchObject({ + documentTypes: undefined, + lunrQueryBuilder: expect.any(Function), + }); + + const query: jest.Mocked = { + allFields: [], + clauses: [], + term: jest.fn(), + clause: jest.fn(), + }; + + expect(() => + actualTranslatedQuery.lunrQueryBuilder.bind(query)(query), + ).toThrow(); }); }); describe('query', () => { - it('should perform search query', async () => { + it('should perform search query and return 0 results on empty index', async () => { const querySpy = jest.spyOn(testLunrSearchEngine, 'query'); // Perform search query and ensure the query func was invoked. @@ -149,7 +263,7 @@ describe('LunrSearchEngine', () => { // Perform search query const mockedSearchResult = await testLunrSearchEngine.query({ - term: 'testTerm', + term: 'unknown', filters: {}, pageCursor: '', }); @@ -158,6 +272,39 @@ describe('LunrSearchEngine', () => { expect(mockedSearchResult).toMatchObject({ results: [] }); }); + it('should perform search query and return all results on empty term', async () => { + const mockDocuments = [ + { + title: 'testTitle', + text: 'testText', + location: 'test/location', + }, + ]; + + // Mock indexing of 1 document + testLunrSearchEngine.index('test-index', mockDocuments); + + // Perform search query + const mockedSearchResult = await testLunrSearchEngine.query({ + term: '', + filters: {}, + pageCursor: '', + }); + + expect(mockedSearchResult).toMatchObject({ + results: [ + { + document: { + title: 'testTitle', + text: 'testText', + location: 'test/location', + }, + type: 'test-index', + }, + ], + }); + }); + it('should perform search query and return search results on match', async () => { const mockDocuments = [ { @@ -177,6 +324,70 @@ describe('LunrSearchEngine', () => { pageCursor: '', }); + expect(mockedSearchResult).toMatchObject({ + results: [ + { + document: { + title: 'testTitle', + text: 'testText', + location: 'test/location', + }, + }, + ], + }); + }); + + it('should perform search query and return search results on partial match', async () => { + const mockDocuments = [ + { + title: 'testTitle', + text: 'testText', + location: 'test/location', + }, + ]; + + // Mock indexing of 1 document + testLunrSearchEngine.index('test-index', mockDocuments); + + // Perform search query + const mockedSearchResult = await testLunrSearchEngine.query({ + term: 'testTitle', + filters: {}, + pageCursor: '', + }); + + expect(mockedSearchResult).toMatchObject({ + results: [ + { + document: { + title: 'testTitle', + text: 'testText', + location: 'test/location', + }, + }, + ], + }); + }); + + it('should perform search query and return search results on fuzzy match', async () => { + const mockDocuments = [ + { + title: 'testTitle', + text: 'testText', + location: 'test/location', + }, + ]; + + // Mock indexing of 1 document + testLunrSearchEngine.index('test-index', mockDocuments); + + // Perform search query + const mockedSearchResult = await testLunrSearchEngine.query({ + term: 'testTitel', // Intentional typo + filters: {}, + pageCursor: '', + }); + // Should return 1 result as we are mocking the indexing of 1 document with match on the title field expect(mockedSearchResult).toMatchObject({ results: [ diff --git a/plugins/search-backend-node/src/engines/LunrSearchEngine.ts b/plugins/search-backend-node/src/engines/LunrSearchEngine.ts index a1c12f019d..0979fa4c90 100644 --- a/plugins/search-backend-node/src/engines/LunrSearchEngine.ts +++ b/plugins/search-backend-node/src/engines/LunrSearchEngine.ts @@ -23,9 +23,9 @@ import lunr from 'lunr'; import { Logger } from 'winston'; import { SearchEngine, QueryTranslator } from '../types'; -type ConcreteLunrQuery = { - lunrQueryString: string; - documentTypes: string[]; +export type ConcreteLunrQuery = { + lunrQueryBuilder: lunr.Index.QueryBuilder; + documentTypes?: string[]; }; type LunrResultEnvelope = { @@ -50,40 +50,62 @@ export class LunrSearchEngine implements SearchEngine { filters, types, }: SearchQuery): ConcreteLunrQuery => { - let lunrQueryFilters; - const lunrTerm = term ? `+${term}` : ''; - if (filters) { - lunrQueryFilters = Object.entries(filters) - .map(([field, value]) => { - // Require that the given field has the given value (with +). - if (['string', 'number', 'boolean'].includes(typeof value)) { - if (typeof value === 'string') { - return ` +${field}:${value.replace(':', '\\:')}`; - } - return ` +${field}:${value}`; - } - - // Illustrate how multi-value filters could work. - if (Array.isArray(value)) { - // But warn that Lurn supports this poorly. - this.logger.warn( - `Non-scalar filter value used for field ${field}. Consider using a different Search Engine for better results.`, - ); - return ` ${value.map(v => { - return `${field}:${v}`; - })}`; - } - - // Log a warning or something about unknown filter value - this.logger.warn(`Unknown filter type used on field ${field}`); - return ''; - }) - .join(''); - } - return { - lunrQueryString: `${lunrTerm}${lunrQueryFilters || ''}`, - documentTypes: types || ['*'], + lunrQueryBuilder: q => { + const termToken = lunr.tokenizer(term); + + // Support for typeahead seach is based on https://github.com/olivernn/lunr.js/issues/256#issuecomment-295407852 + // look for an exact match and apply a large positive boost + q.term(termToken, { + usePipeline: true, + boost: 100, + }); + // look for terms that match the beginning of this term and apply a + // medium boost + q.term(termToken, { + usePipeline: false, + boost: 10, + wildcard: lunr.Query.wildcard.TRAILING, + }); + // look for terms that match with an edit distance of 2 and apply a + // small boost + q.term(termToken, { + usePipeline: false, + editDistance: 2, + boost: 1, + }); + + if (filters) { + Object.entries(filters).forEach(([field, value]) => { + if (!q.allFields.includes(field)) { + // Throw for unknown field, as this will be a non match + throw new Error(`unrecognised field ${field}`); + } + + // Require that the given field has the given value + if (['string', 'number', 'boolean'].includes(typeof value)) { + q.term(lunr.tokenizer(value?.toString()), { + presence: lunr.Query.presence.REQUIRED, + fields: [field], + }); + } else if (Array.isArray(value)) { + // Illustrate how multi-value filters could work. + // But warn that Lurn supports this poorly. + this.logger.warn( + `Non-scalar filter value used for field ${field}. Consider using a different Search Engine for better results.`, + ); + q.term(lunr.tokenizer(value), { + presence: lunr.Query.presence.OPTIONAL, + fields: [field], + }); + } else { + // Log a warning or something about unknown filter value + this.logger.warn(`Unknown filter type used on field ${field}`); + } + }); + } + }, + documentTypes: types, }; }; @@ -118,18 +140,19 @@ export class LunrSearchEngine implements SearchEngine { } query(query: SearchQuery): Promise { - const { lunrQueryString, documentTypes } = this.translator( + const { lunrQueryBuilder, documentTypes } = this.translator( query, ) as ConcreteLunrQuery; const results: LunrResultEnvelope[] = []; - if (documentTypes.length === 1 && documentTypes[0] === '*') { - // Iterate over all this.lunrIndex values. - Object.keys(this.lunrIndices).forEach(type => { + // Iterate over the filtered list of this.lunrIndex keys. + Object.keys(this.lunrIndices) + .filter(type => !documentTypes || documentTypes.includes(type)) + .forEach(type => { try { results.push( - ...this.lunrIndices[type].search(lunrQueryString).map(result => { + ...this.lunrIndices[type].query(lunrQueryBuilder).map(result => { return { result: result, type: type, @@ -139,36 +162,14 @@ export class LunrSearchEngine implements SearchEngine { } catch (err) { // if a field does not exist on a index, we can see that as a no-match if ( - err instanceof lunr.QueryParseError && + err instanceof Error && err.message.startsWith('unrecognised field') - ) + ) { return; + } + throw err; } }); - } else { - // Iterate over the filtered list of this.lunrIndex keys. - Object.keys(this.lunrIndices) - .filter(type => documentTypes.includes(type)) - .forEach(type => { - try { - results.push( - ...this.lunrIndices[type].search(lunrQueryString).map(result => { - return { - result: result, - type: type, - }; - }), - ); - } catch (err) { - // if a field does not exist on a index, we can see that as a no-match - if ( - err instanceof lunr.QueryParseError && - err.message.startsWith('unrecognised field') - ) - return; - } - }); - } // Sort results. results.sort((doc1, doc2) => { diff --git a/plugins/search-backend-node/src/engines/index.ts b/plugins/search-backend-node/src/engines/index.ts index 16516bf66e..7e6fb86bd4 100644 --- a/plugins/search-backend-node/src/engines/index.ts +++ b/plugins/search-backend-node/src/engines/index.ts @@ -15,3 +15,4 @@ */ export { LunrSearchEngine } from './LunrSearchEngine'; +export type { ConcreteLunrQuery } from './LunrSearchEngine';