Build search queries using the query build in LunrSearchEngine
Signed-off-by: Oliver Sand <oliver.sand@sda-se.com>
This commit is contained in:
@@ -0,0 +1,7 @@
|
||||
---
|
||||
'@backstage/plugin-search-backend-node': patch
|
||||
---
|
||||
|
||||
Enhance the search results of `LunrSearchEngine` to support a more natural
|
||||
search experience. This is done by allowing typos (by using fuzzy search) and
|
||||
supporting typeahead search (using wildcard queries to match incomplete words).
|
||||
@@ -0,0 +1,10 @@
|
||||
---
|
||||
'@backstage/plugin-search-backend-node': minor
|
||||
---
|
||||
|
||||
Build search queries using the query builder in `LunrSearchEngine`. This removes
|
||||
the support for specifying custom queries with the lunr query syntax, but makes
|
||||
sure that inputs are properly escaped. Supporting the full lunr syntax is still
|
||||
possible by setting a custom query translator.
|
||||
The interface of `LunrSearchEngine.setTranslator()` is changed to support
|
||||
building lunr queries.
|
||||
@@ -274,6 +274,7 @@ touchpoints
|
||||
transpilation
|
||||
transpiled
|
||||
truthy
|
||||
typeahead
|
||||
ui
|
||||
unbreak
|
||||
unmanaged
|
||||
|
||||
@@ -15,8 +15,9 @@
|
||||
*/
|
||||
|
||||
import { getVoidLogger } from '@backstage/backend-common';
|
||||
import { LunrSearchEngine } from './LunrSearchEngine';
|
||||
import { ConcreteLunrQuery, LunrSearchEngine } from './LunrSearchEngine';
|
||||
import { SearchEngine } from '../types';
|
||||
import lunr from 'lunr';
|
||||
|
||||
/**
|
||||
* Just used to test the default translator shipped with LunrSearchEngine.
|
||||
@@ -68,11 +69,35 @@ describe('LunrSearchEngine', () => {
|
||||
term: 'testTerm',
|
||||
filters: {},
|
||||
pageCursor: '',
|
||||
});
|
||||
}) as ConcreteLunrQuery;
|
||||
|
||||
expect(actualTranslatedQuery).toMatchObject({
|
||||
documentTypes: ['*'],
|
||||
lunrQueryString: '+testTerm',
|
||||
documentTypes: undefined,
|
||||
lunrQueryBuilder: expect.any(Function),
|
||||
});
|
||||
|
||||
const query: jest.Mocked<lunr.Query> = {
|
||||
allFields: [],
|
||||
clauses: [],
|
||||
term: jest.fn(),
|
||||
clause: jest.fn(),
|
||||
};
|
||||
|
||||
actualTranslatedQuery.lunrQueryBuilder.bind(query)(query);
|
||||
|
||||
expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), {
|
||||
boost: 100,
|
||||
usePipeline: true,
|
||||
});
|
||||
expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), {
|
||||
boost: 10,
|
||||
usePipeline: false,
|
||||
wildcard: lunr.Query.wildcard.TRAILING,
|
||||
});
|
||||
expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), {
|
||||
boost: 1,
|
||||
usePipeline: false,
|
||||
editDistance: 2,
|
||||
});
|
||||
});
|
||||
|
||||
@@ -86,11 +111,39 @@ describe('LunrSearchEngine', () => {
|
||||
term: 'testTerm',
|
||||
filters: { kind: 'testKind' },
|
||||
pageCursor: '',
|
||||
});
|
||||
}) as ConcreteLunrQuery;
|
||||
|
||||
expect(actualTranslatedQuery).toMatchObject({
|
||||
documentTypes: ['*'],
|
||||
lunrQueryString: '+testTerm +kind:testKind',
|
||||
documentTypes: undefined,
|
||||
lunrQueryBuilder: expect.any(Function),
|
||||
});
|
||||
|
||||
const query: jest.Mocked<lunr.Query> = {
|
||||
allFields: ['kind'],
|
||||
clauses: [],
|
||||
term: jest.fn(),
|
||||
clause: jest.fn(),
|
||||
};
|
||||
|
||||
actualTranslatedQuery.lunrQueryBuilder.bind(query)(query);
|
||||
|
||||
expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), {
|
||||
boost: 100,
|
||||
usePipeline: true,
|
||||
});
|
||||
expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), {
|
||||
boost: 10,
|
||||
usePipeline: false,
|
||||
wildcard: lunr.Query.wildcard.TRAILING,
|
||||
});
|
||||
expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), {
|
||||
boost: 1,
|
||||
usePipeline: false,
|
||||
editDistance: 2,
|
||||
});
|
||||
expect(query.term).toBeCalledWith(lunr.tokenizer('testKind'), {
|
||||
fields: ['kind'],
|
||||
presence: lunr.Query.presence.REQUIRED,
|
||||
});
|
||||
});
|
||||
|
||||
@@ -104,17 +157,78 @@ describe('LunrSearchEngine', () => {
|
||||
term: 'testTerm',
|
||||
filters: { kind: 'testKind', namespace: 'testNameSpace' },
|
||||
pageCursor: '',
|
||||
});
|
||||
}) as ConcreteLunrQuery;
|
||||
|
||||
expect(actualTranslatedQuery).toMatchObject({
|
||||
documentTypes: ['*'],
|
||||
lunrQueryString: '+testTerm +kind:testKind +namespace:testNameSpace',
|
||||
documentTypes: undefined,
|
||||
lunrQueryBuilder: expect.any(Function),
|
||||
});
|
||||
|
||||
const query: jest.Mocked<lunr.Query> = {
|
||||
allFields: ['kind', 'namespace'],
|
||||
clauses: [],
|
||||
term: jest.fn(),
|
||||
clause: jest.fn(),
|
||||
};
|
||||
|
||||
actualTranslatedQuery.lunrQueryBuilder.bind(query)(query);
|
||||
|
||||
expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), {
|
||||
boost: 100,
|
||||
usePipeline: true,
|
||||
});
|
||||
expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), {
|
||||
boost: 10,
|
||||
usePipeline: false,
|
||||
wildcard: lunr.Query.wildcard.TRAILING,
|
||||
});
|
||||
expect(query.term).toBeCalledWith(lunr.tokenizer('testTerm'), {
|
||||
boost: 1,
|
||||
usePipeline: false,
|
||||
editDistance: 2,
|
||||
});
|
||||
expect(query.term).toBeCalledWith(lunr.tokenizer('testKind'), {
|
||||
fields: ['kind'],
|
||||
presence: lunr.Query.presence.REQUIRED,
|
||||
});
|
||||
expect(query.term).toBeCalledWith(lunr.tokenizer('testNameSpace'), {
|
||||
fields: ['namespace'],
|
||||
presence: lunr.Query.presence.REQUIRED,
|
||||
});
|
||||
});
|
||||
|
||||
it('should throw if translated query references missing field', async () => {
|
||||
const inspectableSearchEngine = new LunrSearchEngineForTranslatorTests({
|
||||
logger: getVoidLogger(),
|
||||
});
|
||||
const translatorUnderTest = inspectableSearchEngine.getTranslator();
|
||||
|
||||
const actualTranslatedQuery = translatorUnderTest({
|
||||
term: 'testTerm',
|
||||
filters: { kind: 'testKind' },
|
||||
pageCursor: '',
|
||||
}) as ConcreteLunrQuery;
|
||||
|
||||
expect(actualTranslatedQuery).toMatchObject({
|
||||
documentTypes: undefined,
|
||||
lunrQueryBuilder: expect.any(Function),
|
||||
});
|
||||
|
||||
const query: jest.Mocked<lunr.Query> = {
|
||||
allFields: [],
|
||||
clauses: [],
|
||||
term: jest.fn(),
|
||||
clause: jest.fn(),
|
||||
};
|
||||
|
||||
expect(() =>
|
||||
actualTranslatedQuery.lunrQueryBuilder.bind(query)(query),
|
||||
).toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('query', () => {
|
||||
it('should perform search query', async () => {
|
||||
it('should perform search query and return 0 results on empty index', async () => {
|
||||
const querySpy = jest.spyOn(testLunrSearchEngine, 'query');
|
||||
|
||||
// Perform search query and ensure the query func was invoked.
|
||||
@@ -149,7 +263,7 @@ describe('LunrSearchEngine', () => {
|
||||
|
||||
// Perform search query
|
||||
const mockedSearchResult = await testLunrSearchEngine.query({
|
||||
term: 'testTerm',
|
||||
term: 'unknown',
|
||||
filters: {},
|
||||
pageCursor: '',
|
||||
});
|
||||
@@ -158,6 +272,39 @@ describe('LunrSearchEngine', () => {
|
||||
expect(mockedSearchResult).toMatchObject({ results: [] });
|
||||
});
|
||||
|
||||
it('should perform search query and return all results on empty term', async () => {
|
||||
const mockDocuments = [
|
||||
{
|
||||
title: 'testTitle',
|
||||
text: 'testText',
|
||||
location: 'test/location',
|
||||
},
|
||||
];
|
||||
|
||||
// Mock indexing of 1 document
|
||||
testLunrSearchEngine.index('test-index', mockDocuments);
|
||||
|
||||
// Perform search query
|
||||
const mockedSearchResult = await testLunrSearchEngine.query({
|
||||
term: '',
|
||||
filters: {},
|
||||
pageCursor: '',
|
||||
});
|
||||
|
||||
expect(mockedSearchResult).toMatchObject({
|
||||
results: [
|
||||
{
|
||||
document: {
|
||||
title: 'testTitle',
|
||||
text: 'testText',
|
||||
location: 'test/location',
|
||||
},
|
||||
type: 'test-index',
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('should perform search query and return search results on match', async () => {
|
||||
const mockDocuments = [
|
||||
{
|
||||
@@ -177,6 +324,70 @@ describe('LunrSearchEngine', () => {
|
||||
pageCursor: '',
|
||||
});
|
||||
|
||||
expect(mockedSearchResult).toMatchObject({
|
||||
results: [
|
||||
{
|
||||
document: {
|
||||
title: 'testTitle',
|
||||
text: 'testText',
|
||||
location: 'test/location',
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('should perform search query and return search results on partial match', async () => {
|
||||
const mockDocuments = [
|
||||
{
|
||||
title: 'testTitle',
|
||||
text: 'testText',
|
||||
location: 'test/location',
|
||||
},
|
||||
];
|
||||
|
||||
// Mock indexing of 1 document
|
||||
testLunrSearchEngine.index('test-index', mockDocuments);
|
||||
|
||||
// Perform search query
|
||||
const mockedSearchResult = await testLunrSearchEngine.query({
|
||||
term: 'testTitle',
|
||||
filters: {},
|
||||
pageCursor: '',
|
||||
});
|
||||
|
||||
expect(mockedSearchResult).toMatchObject({
|
||||
results: [
|
||||
{
|
||||
document: {
|
||||
title: 'testTitle',
|
||||
text: 'testText',
|
||||
location: 'test/location',
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('should perform search query and return search results on fuzzy match', async () => {
|
||||
const mockDocuments = [
|
||||
{
|
||||
title: 'testTitle',
|
||||
text: 'testText',
|
||||
location: 'test/location',
|
||||
},
|
||||
];
|
||||
|
||||
// Mock indexing of 1 document
|
||||
testLunrSearchEngine.index('test-index', mockDocuments);
|
||||
|
||||
// Perform search query
|
||||
const mockedSearchResult = await testLunrSearchEngine.query({
|
||||
term: 'testTitel', // Intentional typo
|
||||
filters: {},
|
||||
pageCursor: '',
|
||||
});
|
||||
|
||||
// Should return 1 result as we are mocking the indexing of 1 document with match on the title field
|
||||
expect(mockedSearchResult).toMatchObject({
|
||||
results: [
|
||||
|
||||
@@ -23,9 +23,9 @@ import lunr from 'lunr';
|
||||
import { Logger } from 'winston';
|
||||
import { SearchEngine, QueryTranslator } from '../types';
|
||||
|
||||
type ConcreteLunrQuery = {
|
||||
lunrQueryString: string;
|
||||
documentTypes: string[];
|
||||
export type ConcreteLunrQuery = {
|
||||
lunrQueryBuilder: lunr.Index.QueryBuilder;
|
||||
documentTypes?: string[];
|
||||
};
|
||||
|
||||
type LunrResultEnvelope = {
|
||||
@@ -50,40 +50,62 @@ export class LunrSearchEngine implements SearchEngine {
|
||||
filters,
|
||||
types,
|
||||
}: SearchQuery): ConcreteLunrQuery => {
|
||||
let lunrQueryFilters;
|
||||
const lunrTerm = term ? `+${term}` : '';
|
||||
if (filters) {
|
||||
lunrQueryFilters = Object.entries(filters)
|
||||
.map(([field, value]) => {
|
||||
// Require that the given field has the given value (with +).
|
||||
if (['string', 'number', 'boolean'].includes(typeof value)) {
|
||||
if (typeof value === 'string') {
|
||||
return ` +${field}:${value.replace(':', '\\:')}`;
|
||||
}
|
||||
return ` +${field}:${value}`;
|
||||
}
|
||||
|
||||
// Illustrate how multi-value filters could work.
|
||||
if (Array.isArray(value)) {
|
||||
// But warn that Lurn supports this poorly.
|
||||
this.logger.warn(
|
||||
`Non-scalar filter value used for field ${field}. Consider using a different Search Engine for better results.`,
|
||||
);
|
||||
return ` ${value.map(v => {
|
||||
return `${field}:${v}`;
|
||||
})}`;
|
||||
}
|
||||
|
||||
// Log a warning or something about unknown filter value
|
||||
this.logger.warn(`Unknown filter type used on field ${field}`);
|
||||
return '';
|
||||
})
|
||||
.join('');
|
||||
}
|
||||
|
||||
return {
|
||||
lunrQueryString: `${lunrTerm}${lunrQueryFilters || ''}`,
|
||||
documentTypes: types || ['*'],
|
||||
lunrQueryBuilder: q => {
|
||||
const termToken = lunr.tokenizer(term);
|
||||
|
||||
// Support for typeahead seach is based on https://github.com/olivernn/lunr.js/issues/256#issuecomment-295407852
|
||||
// look for an exact match and apply a large positive boost
|
||||
q.term(termToken, {
|
||||
usePipeline: true,
|
||||
boost: 100,
|
||||
});
|
||||
// look for terms that match the beginning of this term and apply a
|
||||
// medium boost
|
||||
q.term(termToken, {
|
||||
usePipeline: false,
|
||||
boost: 10,
|
||||
wildcard: lunr.Query.wildcard.TRAILING,
|
||||
});
|
||||
// look for terms that match with an edit distance of 2 and apply a
|
||||
// small boost
|
||||
q.term(termToken, {
|
||||
usePipeline: false,
|
||||
editDistance: 2,
|
||||
boost: 1,
|
||||
});
|
||||
|
||||
if (filters) {
|
||||
Object.entries(filters).forEach(([field, value]) => {
|
||||
if (!q.allFields.includes(field)) {
|
||||
// Throw for unknown field, as this will be a non match
|
||||
throw new Error(`unrecognised field ${field}`);
|
||||
}
|
||||
|
||||
// Require that the given field has the given value
|
||||
if (['string', 'number', 'boolean'].includes(typeof value)) {
|
||||
q.term(lunr.tokenizer(value?.toString()), {
|
||||
presence: lunr.Query.presence.REQUIRED,
|
||||
fields: [field],
|
||||
});
|
||||
} else if (Array.isArray(value)) {
|
||||
// Illustrate how multi-value filters could work.
|
||||
// But warn that Lurn supports this poorly.
|
||||
this.logger.warn(
|
||||
`Non-scalar filter value used for field ${field}. Consider using a different Search Engine for better results.`,
|
||||
);
|
||||
q.term(lunr.tokenizer(value), {
|
||||
presence: lunr.Query.presence.OPTIONAL,
|
||||
fields: [field],
|
||||
});
|
||||
} else {
|
||||
// Log a warning or something about unknown filter value
|
||||
this.logger.warn(`Unknown filter type used on field ${field}`);
|
||||
}
|
||||
});
|
||||
}
|
||||
},
|
||||
documentTypes: types,
|
||||
};
|
||||
};
|
||||
|
||||
@@ -118,18 +140,19 @@ export class LunrSearchEngine implements SearchEngine {
|
||||
}
|
||||
|
||||
query(query: SearchQuery): Promise<SearchResultSet> {
|
||||
const { lunrQueryString, documentTypes } = this.translator(
|
||||
const { lunrQueryBuilder, documentTypes } = this.translator(
|
||||
query,
|
||||
) as ConcreteLunrQuery;
|
||||
|
||||
const results: LunrResultEnvelope[] = [];
|
||||
|
||||
if (documentTypes.length === 1 && documentTypes[0] === '*') {
|
||||
// Iterate over all this.lunrIndex values.
|
||||
Object.keys(this.lunrIndices).forEach(type => {
|
||||
// Iterate over the filtered list of this.lunrIndex keys.
|
||||
Object.keys(this.lunrIndices)
|
||||
.filter(type => !documentTypes || documentTypes.includes(type))
|
||||
.forEach(type => {
|
||||
try {
|
||||
results.push(
|
||||
...this.lunrIndices[type].search(lunrQueryString).map(result => {
|
||||
...this.lunrIndices[type].query(lunrQueryBuilder).map(result => {
|
||||
return {
|
||||
result: result,
|
||||
type: type,
|
||||
@@ -139,36 +162,14 @@ export class LunrSearchEngine implements SearchEngine {
|
||||
} catch (err) {
|
||||
// if a field does not exist on a index, we can see that as a no-match
|
||||
if (
|
||||
err instanceof lunr.QueryParseError &&
|
||||
err instanceof Error &&
|
||||
err.message.startsWith('unrecognised field')
|
||||
)
|
||||
) {
|
||||
return;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
});
|
||||
} else {
|
||||
// Iterate over the filtered list of this.lunrIndex keys.
|
||||
Object.keys(this.lunrIndices)
|
||||
.filter(type => documentTypes.includes(type))
|
||||
.forEach(type => {
|
||||
try {
|
||||
results.push(
|
||||
...this.lunrIndices[type].search(lunrQueryString).map(result => {
|
||||
return {
|
||||
result: result,
|
||||
type: type,
|
||||
};
|
||||
}),
|
||||
);
|
||||
} catch (err) {
|
||||
// if a field does not exist on a index, we can see that as a no-match
|
||||
if (
|
||||
err instanceof lunr.QueryParseError &&
|
||||
err.message.startsWith('unrecognised field')
|
||||
)
|
||||
return;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Sort results.
|
||||
results.sort((doc1, doc2) => {
|
||||
|
||||
@@ -15,3 +15,4 @@
|
||||
*/
|
||||
|
||||
export { LunrSearchEngine } from './LunrSearchEngine';
|
||||
export type { ConcreteLunrQuery } from './LunrSearchEngine';
|
||||
|
||||
Reference in New Issue
Block a user