Build search queries using the query build in LunrSearchEngine

Signed-off-by: Oliver Sand <oliver.sand@sda-se.com>
This commit is contained in:
Oliver Sand
2021-07-07 15:30:36 +02:00
parent 5cbce346bb
commit 9f3ecb555a
6 changed files with 310 additions and 79 deletions
+7
View File
@@ -0,0 +1,7 @@
---
'@backstage/plugin-search-backend-node': patch
---
Enhance the search results of `LunrSearchEngine` to support a more natural
search experience. This is done by allowing typos (by using fuzzy search) and
supporting typeahead search (using wildcard queries to match incomplete words).
+10
View File
@@ -0,0 +1,10 @@
---
'@backstage/plugin-search-backend-node': minor
---
Build search queries using the query builder in `LunrSearchEngine`. This removes
the support for specifying custom queries with the lunr query syntax, but makes
sure that inputs are properly escaped. Supporting the full lunr syntax is still
possible by setting a custom query translator.
The interface of `LunrSearchEngine.setTranslator()` is changed to support
building lunr queries.
+1
View File
@@ -274,6 +274,7 @@ touchpoints
transpilation
transpiled
truthy
typeahead
ui
unbreak
unmanaged
@@ -15,8 +15,9 @@
*/
import { getVoidLogger } from '@backstage/backend-common';
import { LunrSearchEngine } from './LunrSearchEngine';
import { ConcreteLunrQuery, LunrSearchEngine } from './LunrSearchEngine';
import { SearchEngine } from '../types';
import lunr from 'lunr';
/**
* Just used to test the default translator shipped with LunrSearchEngine.
@@ -68,11 +69,35 @@ describe('LunrSearchEngine', () => {
term: 'testTerm',
filters: {},
pageCursor: '',
});
}) as ConcreteLunrQuery;
expect(actualTranslatedQuery).toMatchObject({
documentTypes: ['*'],
lunrQueryString: '+testTerm',
documentTypes: undefined,
lunrQueryBuilder: expect.any(Function),
});
const query: jest.Mocked<lunr.Query> = {
allFields: [],
clauses: [],
term: jest.fn(),
clause: jest.fn(),
};
actualTranslatedQuery.lunrQueryBuilder.bind(query)(query);
expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), {
boost: 100,
usePipeline: true,
});
expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), {
boost: 10,
usePipeline: false,
wildcard: lunr.Query.wildcard.TRAILING,
});
expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), {
boost: 1,
usePipeline: false,
editDistance: 2,
});
});
@@ -86,11 +111,39 @@ describe('LunrSearchEngine', () => {
term: 'testTerm',
filters: { kind: 'testKind' },
pageCursor: '',
});
}) as ConcreteLunrQuery;
expect(actualTranslatedQuery).toMatchObject({
documentTypes: ['*'],
lunrQueryString: '+testTerm +kind:testKind',
documentTypes: undefined,
lunrQueryBuilder: expect.any(Function),
});
const query: jest.Mocked<lunr.Query> = {
allFields: ['kind'],
clauses: [],
term: jest.fn(),
clause: jest.fn(),
};
actualTranslatedQuery.lunrQueryBuilder.bind(query)(query);
expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), {
boost: 100,
usePipeline: true,
});
expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), {
boost: 10,
usePipeline: false,
wildcard: lunr.Query.wildcard.TRAILING,
});
expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), {
boost: 1,
usePipeline: false,
editDistance: 2,
});
expect(query.term).toBeCalledWith(lunr.tokenizer('testKind'), {
fields: ['kind'],
presence: lunr.Query.presence.REQUIRED,
});
});
@@ -104,17 +157,78 @@ describe('LunrSearchEngine', () => {
term: 'testTerm',
filters: { kind: 'testKind', namespace: 'testNameSpace' },
pageCursor: '',
});
}) as ConcreteLunrQuery;
expect(actualTranslatedQuery).toMatchObject({
documentTypes: ['*'],
lunrQueryString: '+testTerm +kind:testKind +namespace:testNameSpace',
documentTypes: undefined,
lunrQueryBuilder: expect.any(Function),
});
const query: jest.Mocked<lunr.Query> = {
allFields: ['kind', 'namespace'],
clauses: [],
term: jest.fn(),
clause: jest.fn(),
};
actualTranslatedQuery.lunrQueryBuilder.bind(query)(query);
expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), {
boost: 100,
usePipeline: true,
});
expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), {
boost: 10,
usePipeline: false,
wildcard: lunr.Query.wildcard.TRAILING,
});
expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), {
boost: 1,
usePipeline: false,
editDistance: 2,
});
expect(query.term).toBeCalledWith(lunr.tokenizer('testKind'), {
fields: ['kind'],
presence: lunr.Query.presence.REQUIRED,
});
expect(query.term).toBeCalledWith(lunr.tokenizer('testNameSpace'), {
fields: ['namespace'],
presence: lunr.Query.presence.REQUIRED,
});
});
it('should throw if translated query references missing field', async () => {
const inspectableSearchEngine = new LunrSearchEngineForTranslatorTests({
logger: getVoidLogger(),
});
const translatorUnderTest = inspectableSearchEngine.getTranslator();
const actualTranslatedQuery = translatorUnderTest({
term: 'testTerm',
filters: { kind: 'testKind' },
pageCursor: '',
}) as ConcreteLunrQuery;
expect(actualTranslatedQuery).toMatchObject({
documentTypes: undefined,
lunrQueryBuilder: expect.any(Function),
});
const query: jest.Mocked<lunr.Query> = {
allFields: [],
clauses: [],
term: jest.fn(),
clause: jest.fn(),
};
expect(() =>
actualTranslatedQuery.lunrQueryBuilder.bind(query)(query),
).toThrow();
});
});
describe('query', () => {
it('should perform search query', async () => {
it('should perform search query and return 0 results on empty index', async () => {
const querySpy = jest.spyOn(testLunrSearchEngine, 'query');
// Perform search query and ensure the query func was invoked.
@@ -149,7 +263,7 @@ describe('LunrSearchEngine', () => {
// Perform search query
const mockedSearchResult = await testLunrSearchEngine.query({
term: 'testTerm',
term: 'unknown',
filters: {},
pageCursor: '',
});
@@ -158,6 +272,39 @@ describe('LunrSearchEngine', () => {
expect(mockedSearchResult).toMatchObject({ results: [] });
});
it('should perform search query and return all results on empty term', async () => {
const mockDocuments = [
{
title: 'testTitle',
text: 'testText',
location: 'test/location',
},
];
// Mock indexing of 1 document
testLunrSearchEngine.index('test-index', mockDocuments);
// Perform search query
const mockedSearchResult = await testLunrSearchEngine.query({
term: '',
filters: {},
pageCursor: '',
});
expect(mockedSearchResult).toMatchObject({
results: [
{
document: {
title: 'testTitle',
text: 'testText',
location: 'test/location',
},
type: 'test-index',
},
],
});
});
it('should perform search query and return search results on match', async () => {
const mockDocuments = [
{
@@ -177,6 +324,70 @@ describe('LunrSearchEngine', () => {
pageCursor: '',
});
expect(mockedSearchResult).toMatchObject({
results: [
{
document: {
title: 'testTitle',
text: 'testText',
location: 'test/location',
},
},
],
});
});
it('should perform search query and return search results on partial match', async () => {
const mockDocuments = [
{
title: 'testTitle',
text: 'testText',
location: 'test/location',
},
];
// Mock indexing of 1 document
testLunrSearchEngine.index('test-index', mockDocuments);
// Perform search query
const mockedSearchResult = await testLunrSearchEngine.query({
term: 'testTitle',
filters: {},
pageCursor: '',
});
expect(mockedSearchResult).toMatchObject({
results: [
{
document: {
title: 'testTitle',
text: 'testText',
location: 'test/location',
},
},
],
});
});
it('should perform search query and return search results on fuzzy match', async () => {
const mockDocuments = [
{
title: 'testTitle',
text: 'testText',
location: 'test/location',
},
];
// Mock indexing of 1 document
testLunrSearchEngine.index('test-index', mockDocuments);
// Perform search query
const mockedSearchResult = await testLunrSearchEngine.query({
term: 'testTitel', // Intentional typo
filters: {},
pageCursor: '',
});
// Should return 1 result as we are mocking the indexing of 1 document with match on the title field
expect(mockedSearchResult).toMatchObject({
results: [
@@ -23,9 +23,9 @@ import lunr from 'lunr';
import { Logger } from 'winston';
import { SearchEngine, QueryTranslator } from '../types';
type ConcreteLunrQuery = {
lunrQueryString: string;
documentTypes: string[];
export type ConcreteLunrQuery = {
lunrQueryBuilder: lunr.Index.QueryBuilder;
documentTypes?: string[];
};
type LunrResultEnvelope = {
@@ -50,40 +50,62 @@ export class LunrSearchEngine implements SearchEngine {
filters,
types,
}: SearchQuery): ConcreteLunrQuery => {
let lunrQueryFilters;
const lunrTerm = term ? `+${term}` : '';
if (filters) {
lunrQueryFilters = Object.entries(filters)
.map(([field, value]) => {
// Require that the given field has the given value (with +).
if (['string', 'number', 'boolean'].includes(typeof value)) {
if (typeof value === 'string') {
return ` +${field}:${value.replace(':', '\\:')}`;
}
return ` +${field}:${value}`;
}
// Illustrate how multi-value filters could work.
if (Array.isArray(value)) {
// But warn that Lurn supports this poorly.
this.logger.warn(
`Non-scalar filter value used for field ${field}. Consider using a different Search Engine for better results.`,
);
return ` ${value.map(v => {
return `${field}:${v}`;
})}`;
}
// Log a warning or something about unknown filter value
this.logger.warn(`Unknown filter type used on field ${field}`);
return '';
})
.join('');
}
return {
lunrQueryString: `${lunrTerm}${lunrQueryFilters || ''}`,
documentTypes: types || ['*'],
lunrQueryBuilder: q => {
const termToken = lunr.tokenizer(term);
// Support for typeahead seach is based on https://github.com/olivernn/lunr.js/issues/256#issuecomment-295407852
// look for an exact match and apply a large positive boost
q.term(termToken, {
usePipeline: true,
boost: 100,
});
// look for terms that match the beginning of this term and apply a
// medium boost
q.term(termToken, {
usePipeline: false,
boost: 10,
wildcard: lunr.Query.wildcard.TRAILING,
});
// look for terms that match with an edit distance of 2 and apply a
// small boost
q.term(termToken, {
usePipeline: false,
editDistance: 2,
boost: 1,
});
if (filters) {
Object.entries(filters).forEach(([field, value]) => {
if (!q.allFields.includes(field)) {
// Throw for unknown field, as this will be a non match
throw new Error(`unrecognised field ${field}`);
}
// Require that the given field has the given value
if (['string', 'number', 'boolean'].includes(typeof value)) {
q.term(lunr.tokenizer(value?.toString()), {
presence: lunr.Query.presence.REQUIRED,
fields: [field],
});
} else if (Array.isArray(value)) {
// Illustrate how multi-value filters could work.
// But warn that Lurn supports this poorly.
this.logger.warn(
`Non-scalar filter value used for field ${field}. Consider using a different Search Engine for better results.`,
);
q.term(lunr.tokenizer(value), {
presence: lunr.Query.presence.OPTIONAL,
fields: [field],
});
} else {
// Log a warning or something about unknown filter value
this.logger.warn(`Unknown filter type used on field ${field}`);
}
});
}
},
documentTypes: types,
};
};
@@ -118,18 +140,19 @@ export class LunrSearchEngine implements SearchEngine {
}
query(query: SearchQuery): Promise<SearchResultSet> {
const { lunrQueryString, documentTypes } = this.translator(
const { lunrQueryBuilder, documentTypes } = this.translator(
query,
) as ConcreteLunrQuery;
const results: LunrResultEnvelope[] = [];
if (documentTypes.length === 1 && documentTypes[0] === '*') {
// Iterate over all this.lunrIndex values.
Object.keys(this.lunrIndices).forEach(type => {
// Iterate over the filtered list of this.lunrIndex keys.
Object.keys(this.lunrIndices)
.filter(type => !documentTypes || documentTypes.includes(type))
.forEach(type => {
try {
results.push(
...this.lunrIndices[type].search(lunrQueryString).map(result => {
...this.lunrIndices[type].query(lunrQueryBuilder).map(result => {
return {
result: result,
type: type,
@@ -139,36 +162,14 @@ export class LunrSearchEngine implements SearchEngine {
} catch (err) {
// if a field does not exist on a index, we can see that as a no-match
if (
err instanceof lunr.QueryParseError &&
err instanceof Error &&
err.message.startsWith('unrecognised field')
)
) {
return;
}
throw err;
}
});
} else {
// Iterate over the filtered list of this.lunrIndex keys.
Object.keys(this.lunrIndices)
.filter(type => documentTypes.includes(type))
.forEach(type => {
try {
results.push(
...this.lunrIndices[type].search(lunrQueryString).map(result => {
return {
result: result,
type: type,
};
}),
);
} catch (err) {
// if a field does not exist on a index, we can see that as a no-match
if (
err instanceof lunr.QueryParseError &&
err.message.startsWith('unrecognised field')
)
return;
}
});
}
// Sort results.
results.sort((doc1, doc2) => {
@@ -15,3 +15,4 @@
*/
export { LunrSearchEngine } from './LunrSearchEngine';
export type { ConcreteLunrQuery } from './LunrSearchEngine';