backend-common: implement UrlReader.search that does glob matching

This commit is contained in:
Fredrik Adelöw
2021-02-04 15:54:54 +01:00
parent 965e200c61
commit 5a51635197
20 changed files with 696 additions and 47 deletions
+5
View File
@@ -0,0 +1,5 @@
---
'@backstage/backend-common': patch
---
Implement `UrlReader.search` which implements glob matching.
+1
View File
@@ -50,6 +50,7 @@
"knex": "^0.21.6",
"lodash": "^4.17.15",
"logform": "^2.1.1",
"minimatch": "^3.0.4",
"minimist": "^1.2.5",
"morgan": "^1.10.0",
"selfsigned": "^1.10.7",
@@ -29,6 +29,7 @@ import {
ReaderFactory,
ReadTreeOptions,
ReadTreeResponse,
SearchResponse,
UrlReader,
} from './types';
import { ReadTreeResponseFactory } from './tree';
@@ -116,6 +117,10 @@ export class AzureUrlReader implements UrlReader {
});
}
async search(): Promise<SearchResponse> {
throw new Error('AzureUrlReader does not implement search');
}
toString() {
const { host, token } = this.options;
return `azure{host=${host},authed=${Boolean(token)}}`;
@@ -31,6 +31,7 @@ import {
ReaderFactory,
ReadTreeOptions,
ReadTreeResponse,
SearchResponse,
UrlReader,
} from './types';
@@ -129,6 +130,10 @@ export class BitbucketUrlReader implements UrlReader {
});
}
async search(): Promise<SearchResponse> {
throw new Error('BitbucketUrlReader does not implement search');
}
toString() {
const { host, token, username, appPassword } = this.config;
let authed = Boolean(token);
@@ -16,7 +16,12 @@
import fetch from 'cross-fetch';
import { NotFoundError } from '../errors';
import { ReaderFactory, ReadTreeResponse, UrlReader } from './types';
import {
ReaderFactory,
ReadTreeResponse,
SearchResponse,
UrlReader,
} from './types';
/**
* A UrlReader that does a plain fetch of the URL.
@@ -68,10 +73,14 @@ export class FetchUrlReader implements UrlReader {
throw new Error(message);
}
readTree(): Promise<ReadTreeResponse> {
async readTree(): Promise<ReadTreeResponse> {
throw new Error('FetchUrlReader does not implement readTree');
}
async search(): Promise<SearchResponse> {
throw new Error('FetchUrlReader does not implement search');
}
toString() {
return 'fetch{}';
}
@@ -23,7 +23,13 @@ import { rest } from 'msw';
import { setupServer } from 'msw/node';
import path from 'path';
import { NotFoundError, NotModifiedError } from '../errors';
import { GithubUrlReader } from './GithubUrlReader';
import {
GhBlobResponse,
GhBranchResponse,
GhRepoResponse,
GhTreeResponse,
GithubUrlReader,
} from './GithubUrlReader';
import { ReadTreeResponseFactory } from './tree';
const treeResponseFactory = ReadTreeResponseFactory.create({
@@ -69,6 +75,10 @@ describe('GithubUrlReader', () => {
});
});
/*
* read
*/
describe('read', () => {
it('should use the headers from the credentials provider to the fetch request when doing read', async () => {
expect.assertions(2);
@@ -107,6 +117,10 @@ describe('GithubUrlReader', () => {
});
});
/*
* readTree
*/
describe('readTree', () => {
beforeEach(() => {
mockFs({
@@ -128,29 +142,31 @@ describe('GithubUrlReader', () => {
);
const reposGithubApiResponse = {
id: '123',
id: 123,
full_name: 'backstage/mock',
default_branch: 'main',
branches_url:
'https://api.github.com/repos/backstage/mock/branches{/branch}',
archive_url:
'https://api.github.com/repos/backstage/mock/{archive_format}{/ref}',
};
} as Partial<GhRepoResponse>;
const reposGheApiResponse = {
...reposGithubApiResponse,
id: 123,
full_name: 'backstage/mock',
default_branch: 'main',
branches_url:
'https://ghe.github.com/api/v3/repos/backstage/mock/branches{/branch}',
archive_url:
'https://ghe.github.com/api/v3/repos/backstage/mock/{archive_format}{/ref}',
};
} as Partial<GhRepoResponse>;
const branchesApiResponse = {
name: 'main',
commit: {
sha: 'etag123abc',
},
};
} as Partial<GhBranchResponse>;
beforeEach(() => {
worker.use(
@@ -392,4 +408,359 @@ describe('GithubUrlReader', () => {
}).toThrowError('must configure an explicit apiBaseUrl');
});
});
/*
* search
*/
describe('search', () => {
beforeEach(() => {
mockFs({ '/tmp': mockFs.directory() });
});
afterEach(() => {
mockFs.restore();
});
const repoBuffer = fs.readFileSync(
path.resolve(
'src',
'reading',
'__fixtures__',
'backstage-mock-etag123.tar.gz',
),
);
const githubTreeContents: GhTreeResponse['tree'] = [
{
path: 'mkdocs.yml',
type: 'blob',
url: 'https://api.github.com/repos/backstage/mock/git/blobs/1',
},
{
path: 'docs',
type: 'tree',
url: 'https://api.github.com/repos/backstage/mock/git/trees/2',
},
{
path: 'docs/index.md',
type: 'blob',
url: 'https://api.github.com/repos/backstage/mock/git/blobs/3',
},
];
const gheTreeContents: GhTreeResponse['tree'] = [
{
path: 'mkdocs.yml',
type: 'blob',
url: 'https://ghe.github.com/api/v3/repos/backstage/mock/git/blobs/1',
},
{
path: 'docs',
type: 'tree',
url: 'https://ghe.github.com/api/v3/repos/backstage/mock/git/trees/2',
},
{
path: 'docs/index.md',
type: 'blob',
url: 'https://ghe.github.com/api/v3/repos/backstage/mock/git/blobs/3',
},
];
// Tarballs
beforeEach(() => {
worker.use(
rest.get(
'https://api.github.com/repos/backstage/mock/tarball/etag123abc',
(_, res, ctx) =>
res(
ctx.status(200),
ctx.set('Content-Type', 'application/x-gzip'),
ctx.set(
'content-disposition',
'attachment; filename=backstage-mock-etag123.tar.gz',
),
ctx.body(repoBuffer),
),
),
rest.get(
'https://ghe.github.com/api/v3/repos/backstage/mock/tarball/etag123abc',
(_, res, ctx) =>
res(
ctx.status(200),
ctx.set('Content-Type', 'application/x-gzip'),
ctx.set(
'content-disposition',
'attachment; filename=backstage-mock-etag123.tar.gz',
),
ctx.body(repoBuffer),
),
),
);
});
// Repo details
beforeEach(() => {
const githubResponse = {
id: 123,
full_name: 'backstage/mock',
default_branch: 'main',
branches_url:
'https://api.github.com/repos/backstage/mock/branches{/branch}',
archive_url:
'https://api.github.com/repos/backstage/mock/{archive_format}{/ref}',
trees_url:
'https://api.github.com/repos/backstage/mock/git/trees{/sha}',
} as Partial<GhRepoResponse>;
const gheResponse = {
id: 123,
full_name: 'backstage/mock',
default_branch: 'main',
branches_url:
'https://ghe.github.com/api/v3/repos/backstage/mock/branches{/branch}',
archive_url:
'https://ghe.github.com/api/v3/repos/backstage/mock/{archive_format}{/ref}',
trees_url:
'https://ghe.github.com/api/v3/repos/backstage/mock/git/trees{/sha}',
} as Partial<GhRepoResponse>;
worker.use(
rest.get('https://api.github.com/repos/backstage/mock', (_, res, ctx) =>
res(
ctx.status(200),
ctx.set('Content-Type', 'application/json'),
ctx.json(githubResponse),
),
),
rest.get(
'https://ghe.github.com/api/v3/repos/backstage/mock',
(_, res, ctx) =>
res(
ctx.status(200),
ctx.set('Content-Type', 'application/json'),
ctx.json(gheResponse),
),
),
);
});
// Branch details
beforeEach(() => {
const response = {
name: 'main',
commit: {
sha: 'etag123abc',
},
} as Partial<GhBranchResponse>;
worker.use(
rest.get(
'https://api.github.com/repos/backstage/mock/branches/main',
(_, res, ctx) =>
res(
ctx.status(200),
ctx.set('Content-Type', 'application/json'),
ctx.json(response),
),
),
rest.get(
'https://ghe.github.com/api/v3/repos/backstage/mock/branches/main',
(_, res, ctx) =>
res(
ctx.status(200),
ctx.set('Content-Type', 'application/json'),
ctx.json(response),
),
),
rest.get(
'https://api.github.com/repos/backstage/mock/branches/branchDoesNotExist',
(_, res, ctx) => res(ctx.status(404)),
),
);
});
// Blobs
beforeEach(() => {
const blob1Response = {
content: Buffer.from('site_name: Test\n').toString('base64'),
} as Partial<GhBlobResponse>;
const blob3Response = {
content: Buffer.from('# Test\n').toString('base64'),
} as Partial<GhBlobResponse>;
worker.use(
rest.get(
'https://api.github.com/repos/backstage/mock/git/blobs/1',
(_, res, ctx) =>
res(
ctx.status(200),
ctx.set('Content-Type', 'application/json'),
ctx.json(blob1Response),
),
),
rest.get(
'https://api.github.com/repos/backstage/mock/git/blobs/3',
(_, res, ctx) =>
res(
ctx.status(200),
ctx.set('Content-Type', 'application/json'),
ctx.json(blob3Response),
),
),
rest.get(
'https://ghe.github.com/api/v3/repos/backstage/mock/git/blobs/1',
(_, res, ctx) =>
res(
ctx.status(200),
ctx.set('Content-Type', 'application/json'),
ctx.json(blob1Response),
),
),
rest.get(
'https://ghe.github.com/api/v3/repos/backstage/mock/git/blobs/3',
(_, res, ctx) =>
res(
ctx.status(200),
ctx.set('Content-Type', 'application/json'),
ctx.json(blob3Response),
),
),
);
});
async function runTests(reader: GithubUrlReader, baseUrl: string) {
const r1 = await reader.search(
`${baseUrl}/backstage/mock/tree/main/**/*`,
);
expect(r1.etag).toBe('etag123abc');
expect(r1.files.length).toBe(2);
const r2 = await reader.search(
`${baseUrl}/backstage/mock/tree/main/**/*`,
{ etag: 'somethingElse' },
);
expect(r2.etag).toBe('etag123abc');
expect(r2.files.length).toBe(2);
const r3 = await reader.search(`${baseUrl}/backstage/mock/tree/main/o`);
expect(r3.files.length).toBe(0);
const r4 = await reader.search(
`${baseUrl}/backstage/mock/tree/main/*docs*`,
);
expect(r4.files.length).toBe(1);
expect(r4.files[0].url).toBe(
`${baseUrl}/backstage/mock/tree/main/mkdocs.yml`,
);
await expect(r4.files[0].content()).resolves.toEqual(
Buffer.from('site_name: Test\n'),
);
const r5 = await reader.search(
`${baseUrl}/backstage/mock/tree/main/*/index.*`,
);
expect(r5.files.length).toBe(1);
expect(r5.files[0].url).toBe(
`${baseUrl}/backstage/mock/tree/main/docs/index.md`,
);
await expect(r5.files[0].content()).resolves.toEqual(
Buffer.from('# Test\n'),
);
}
// eslint-disable-next-line jest/expect-expect
it('succeeds on github when going via repo listing', async () => {
worker.use(
rest.get(
'https://api.github.com/repos/backstage/mock/git/trees/etag123abc',
(_, res, ctx) =>
res(
ctx.status(200),
ctx.set('Content-Type', 'application/json'),
ctx.json({
truncated: false,
tree: githubTreeContents,
} as Partial<GhTreeResponse>),
),
),
);
await runTests(githubProcessor, 'https://github.com');
});
// eslint-disable-next-line jest/expect-expect
it('succeeds on github when going via readTree', async () => {
worker.use(
rest.get(
'https://api.github.com/repos/backstage/mock/git/trees/etag123abc',
(_, res, ctx) =>
res(
ctx.status(200),
ctx.set('Content-Type', 'application/json'),
ctx.json({
truncated: true,
tree: [],
} as Partial<GhTreeResponse>),
),
),
);
await runTests(githubProcessor, 'https://github.com');
});
// eslint-disable-next-line jest/expect-expect
it('succeeds on ghe when going via repo listing', async () => {
worker.use(
rest.get(
'https://ghe.github.com/api/v3/repos/backstage/mock/git/trees/etag123abc',
(_, res, ctx) =>
res(
ctx.status(200),
ctx.set('Content-Type', 'application/json'),
ctx.json({
truncated: false,
tree: gheTreeContents,
} as Partial<GhTreeResponse>),
),
),
);
await runTests(gheProcessor, 'https://ghe.github.com');
});
// eslint-disable-next-line jest/expect-expect
it('succeeds on ghe when going via readTree', async () => {
worker.use(
rest.get(
'https://ghe.github.com/api/v3/repos/backstage/mock/git/trees/etag123abc',
(_, res, ctx) =>
res(
ctx.status(200),
ctx.set('Content-Type', 'application/json'),
ctx.json({
truncated: true,
tree: [],
} as Partial<GhTreeResponse>),
),
),
);
await runTests(gheProcessor, 'https://ghe.github.com');
});
it('throws NotModifiedError when same etag', async () => {
await expect(
githubProcessor.search(
'https://githib.com/backstage/mock/tree/main/**/*',
{ etag: 'etag123abc' },
),
).rejects.toThrow(NotModifiedError);
});
it('throws NotFoundError when missing branch', async () => {
await expect(
githubProcessor.search(
'https://githib.com/backstage/mock/tree/branchDoesNotExist/**/*',
),
).rejects.toThrow(NotFoundError);
});
});
});
@@ -23,6 +23,7 @@ import {
import { RestEndpointMethodTypes } from '@octokit/rest';
import fetch from 'cross-fetch';
import parseGitUrl from 'git-url-parse';
import { Minimatch } from 'minimatch';
import { Readable } from 'stream';
import { NotFoundError, NotModifiedError } from '../errors';
import { ReadTreeResponseFactory } from './tree';
@@ -30,9 +31,17 @@ import {
ReaderFactory,
ReadTreeOptions,
ReadTreeResponse,
SearchOptions,
SearchResponse,
SearchResponseFile,
UrlReader,
} from './types';
export type GhRepoResponse = RestEndpointMethodTypes['repos']['get']['response']['data'];
export type GhBranchResponse = RestEndpointMethodTypes['repos']['getBranch']['response']['data'];
export type GhTreeResponse = RestEndpointMethodTypes['git']['getTree']['response']['data'];
export type GhBlobResponse = RestEndpointMethodTypes['git']['getBlob']['response']['data'];
/**
* A processor that adds the ability to read files from GitHub v3 APIs, such as
* the one exposed by GitHub itself.
@@ -106,27 +115,43 @@ export class GithubUrlReader implements UrlReader {
throw new NotModifiedError();
}
const { filepath } = parseGitUrl(url);
const { headers } = await this.deps.credentialsProvider.getCredentials({
url,
});
// archive_url looks like "https://api.github.com/repos/owner/repo/{archive_format}{/ref}"
const archive = await this.fetchResponse(
repoDetails.repo.archive_url
.replace('{archive_format}', 'tarball')
.replace('{/ref}', `/${commitSha}`),
return this.doReadTree(
repoDetails.repo.archive_url,
commitSha,
filepath,
{ headers },
options,
);
}
async search(url: string, options?: SearchOptions): Promise<SearchResponse> {
const repoDetails = await this.getRepoDetails(url);
const commitSha = repoDetails.branch.commit.sha!;
if (options?.etag && options.etag === commitSha) {
throw new NotModifiedError();
}
const { filepath } = parseGitUrl(url);
const { headers } = await this.deps.credentialsProvider.getCredentials({
url,
});
const files = await this.doSearch(
url,
repoDetails.repo.trees_url,
repoDetails.repo.archive_url,
commitSha,
filepath,
{ headers },
);
const { filepath } = parseGitUrl(url);
return await this.deps.treeResponseFactory.fromTarArchive({
// TODO(Rugvip): Underlying implementation of fetch will be node-fetch, we probably want
// to stick to using that in exclusively backend code.
stream: (archive.body as unknown) as Readable,
subpath: filepath,
etag: commitSha,
filter: options?.filter,
});
return { files, etag: commitSha };
}
toString() {
@@ -134,11 +159,92 @@ export class GithubUrlReader implements UrlReader {
return `github{host=${host},authed=${Boolean(token)}}`;
}
private async doReadTree(
archiveUrl: string,
sha: string,
subpath: string,
init: RequestInit,
options?: ReadTreeOptions,
): Promise<ReadTreeResponse> {
// archive_url looks like "https://api.github.com/repos/owner/repo/{archive_format}{/ref}"
const archive = await this.fetchResponse(
archiveUrl
.replace('{archive_format}', 'tarball')
.replace('{/ref}', `/${sha}`),
init,
);
return await this.deps.treeResponseFactory.fromTarArchive({
// TODO(Rugvip): Underlying implementation of fetch will be node-fetch, we probably want
// to stick to using that in exclusively backend code.
stream: (archive.body as unknown) as Readable,
subpath,
etag: sha,
filter: options?.filter,
});
}
private async doSearch(
url: string,
treesUrl: string,
archiveUrl: string,
sha: string,
query: string,
init: RequestInit,
): Promise<SearchResponseFile[]> {
function pathToUrl(path: string): string {
// TODO(freben): Use the integration package facility for this instead
// pathname starts as /backstage/backstage/blob/master/<path>
const updated = new URL(url);
const base = updated.pathname.split('/').slice(1, 5).join('/');
updated.pathname = `${base}/${path}`;
return updated.toString();
}
const matcher = new Minimatch(query.replace(/^\/+/, ''));
// trees_url looks like "https://api.github.com/repos/octocat/Hello-World/git/trees{/sha}"
const recursiveTree: GhTreeResponse = await this.fetchJson(
treesUrl.replace('{/sha}', `/${sha}?recursive=true`),
init,
);
// The simple case is that we got the entire tree in a single operation.
if (!recursiveTree.truncated) {
const matching = recursiveTree.tree.filter(
item =>
item.type === 'blob' &&
item.path &&
item.url &&
matcher.match(item.path),
);
return matching.map(item => ({
url: pathToUrl(item.path!),
content: async () => {
const blob: GhBlobResponse = await this.fetchJson(item.url!, init);
return Buffer.from(blob.content, 'base64');
},
}));
}
// For larger repos, we leverage readTree and filter through that instead
const tree = await this.doReadTree(archiveUrl, sha, '', init, {
filter: path => matcher.match(path),
});
const files = await tree.files();
return files.map(file => ({
url: pathToUrl(file.path),
content: file.content,
}));
}
private async getRepoDetails(
url: string,
): Promise<{
repo: RestEndpointMethodTypes['repos']['get']['response']['data'];
branch: RestEndpointMethodTypes['repos']['getBranch']['response']['data'];
repo: GhRepoResponse;
branch: GhBranchResponse;
}> {
const parsed = parseGitUrl(url);
const { ref, full_name } = parsed;
@@ -151,13 +257,13 @@ export class GithubUrlReader implements UrlReader {
url,
});
const repo: RestEndpointMethodTypes['repos']['get']['response']['data'] = await this.fetchJson(
const repo: GhRepoResponse = await this.fetchJson(
`${this.config.apiBaseUrl}/repos/${full_name}`,
{ headers },
);
// branches_url looks like "https://api.github.com/repos/owner/repo/branches{/branch}"
const branch: RestEndpointMethodTypes['repos']['getBranch']['response']['data'] = await this.fetchJson(
const branch: GhBranchResponse = await this.fetchJson(
repo.branches_url.replace('{/branch}', `/${ref || repo.default_branch}`),
{ headers },
);
@@ -21,16 +21,17 @@ import {
readGitLabIntegrationConfigs,
} from '@backstage/integration';
import fetch from 'cross-fetch';
import parseGitUrl from 'git-url-parse';
import { Readable } from 'stream';
import { NotFoundError, NotModifiedError } from '../errors';
import { ReadTreeResponseFactory } from './tree';
import {
ReaderFactory,
ReadTreeOptions,
ReadTreeResponse,
SearchResponse,
UrlReader,
} from './types';
import parseGitUrl from 'git-url-parse';
import { Readable } from 'stream';
export class GitlabUrlReader implements UrlReader {
private readonly treeResponseFactory: ReadTreeResponseFactory;
@@ -154,6 +155,10 @@ export class GitlabUrlReader implements UrlReader {
});
}
async search(): Promise<SearchResponse> {
throw new Error('GitlabUrlReader does not implement search');
}
toString() {
const { host, token } = this.config;
return `gitlab{host=${host},authed=${Boolean(token)}}`;
@@ -18,6 +18,8 @@ import { NotAllowedError } from '../errors';
import {
ReadTreeOptions,
ReadTreeResponse,
SearchOptions,
SearchResponse,
UrlReader,
UrlReaderPredicateTuple,
} from './types';
@@ -60,6 +62,18 @@ export class UrlReaderPredicateMux implements UrlReader {
throw new NotAllowedError(`Reading from '${url}' is not allowed`);
}
async search(url: string, options?: SearchOptions): Promise<SearchResponse> {
const parsed = new URL(url);
for (const { predicate, reader } of this.readers) {
if (predicate(parsed)) {
return await reader.search(url, options);
}
}
throw new NotAllowedError(`Reading from '${url}' is not allowed`);
}
toString() {
return `predicateMux{readers=${this.readers.map(t => t.reader).join(',')}`;
}
+1 -1
View File
@@ -14,7 +14,7 @@
* limitations under the License.
*/
export type { UrlReader, ReadTreeResponse } from './types';
export type { UrlReader, ReadTreeResponse, SearchResponse } from './types';
export { UrlReaders } from './UrlReaders';
export { AzureUrlReader } from './AzureUrlReader';
export { BitbucketUrlReader } from './BitbucketUrlReader';
@@ -24,6 +24,7 @@ import { ReadTreeResponseFactory } from './tree';
export type UrlReader = {
read(url: string): Promise<Buffer>;
readTree(url: string, options?: ReadTreeOptions): Promise<ReadTreeResponse>;
search(url: string, options?: SearchOptions): Promise<SearchResponse>;
};
export type UrlReaderPredicateTuple = {
@@ -103,3 +104,51 @@ export type ReadTreeResponseFile = {
path: string;
content(): Promise<Buffer>;
};
/**
* An options object for search operations.
*/
export type SearchOptions = {
/**
* An etag can be provided to check whether the search response has changed from a previous execution.
*
* In the search() response, an etag is returned along with the files. The etag is a unique identifer
* of the current tree, usually the commit SHA or etag from the target.
*
* When an etag is given in SearchOptions, search will first compare the etag against the etag
* on the target branch. If they match, search will throw a NotModifiedError indicating that the search
* response will not differ from the previous response which included this particular etag. If they mismatch,
* search will return the rest of SearchResponse along with a new etag.
*/
etag?: string;
};
/**
* The output of a search operation.
*/
export type SearchResponse = {
/**
* The files that matched the search query.
*/
files: SearchResponseFile[];
/**
* A unique identifer of the current remote tree, usually the commit SHA or etag from the target.
*/
etag: string;
};
/**
* Represents a single file in a search response.
*/
export type SearchResponseFile = {
/**
* The full URL to the file.
*/
url: string;
/**
* The binary contents of the file.
*/
content(): Promise<Buffer>;
};
+13 -2
View File
@@ -14,14 +14,18 @@
* limitations under the License.
*/
import {
ReadTreeResponse,
SearchResponse,
UrlReader,
} from '@backstage/backend-common';
import { Entity } from '@backstage/catalog-model';
import { Readable } from 'stream';
import {
getDocFilesFromRepository,
getLocationForEntity,
parseReferenceAnnotation,
} from './helpers';
import { UrlReader, ReadTreeResponse } from '@backstage/backend-common';
import { Entity } from '@backstage/catalog-model';
const entityBase: Entity = {
metadata: {
@@ -138,6 +142,13 @@ describe('getDocFilesFromRepository', () => {
etag: '',
};
}
async search(): Promise<SearchResponse> {
return {
etag: '',
files: [],
};
}
}
const output = await getDocFilesFromRepository(
@@ -49,6 +49,7 @@ const mockConfig = new ConfigReader({});
const mockUrlReader: jest.Mocked<UrlReader> = {
read: jest.fn(),
readTree: jest.fn(),
search: jest.fn(),
};
describe('directory preparer', () => {
@@ -139,6 +139,16 @@ export class LocationReaders implements LocationReader {
if (emitResult.type === 'relation') {
throw new Error('readLocation may not emit entity relations');
}
if (
emitResult.type === 'location' &&
emitResult.location.type === item.location.type &&
emitResult.location.target === item.location.target
) {
// Ignore self-referential locations silently (this can happen for
// example if you use a glob target like "**/*.yaml" in a Location
// entity)
return;
}
emit(emitResult);
};
@@ -160,7 +160,7 @@ describe('CodeOwnersProcessor', () => {
const read = jest
.fn()
.mockResolvedValue(mockReadResult({ data: ownersText }));
const reader = { read, readTree: jest.fn() };
const reader = { read, readTree: jest.fn(), search: jest.fn() };
const result = await findRawCodeOwners(mockLocation(), {
reader,
logger,
@@ -170,7 +170,7 @@ describe('CodeOwnersProcessor', () => {
it('should return undefined when no codeowner', async () => {
const read = jest.fn().mockRejectedValue(mockReadResult());
const reader = { read, readTree: jest.fn() };
const reader = { read, readTree: jest.fn(), search: jest.fn() };
await expect(
findRawCodeOwners(mockLocation(), { reader, logger }),
@@ -184,7 +184,7 @@ describe('CodeOwnersProcessor', () => {
.mockImplementationOnce(() => mockReadResult({ error: 'foo' }))
.mockImplementationOnce(() => mockReadResult({ error: 'bar' }))
.mockResolvedValue(mockReadResult({ data: ownersText }));
const reader = { read, readTree: jest.fn() };
const reader = { read, readTree: jest.fn(), search: jest.fn() };
const result = await findRawCodeOwners(mockLocation(), {
reader,
@@ -206,7 +206,7 @@ describe('CodeOwnersProcessor', () => {
const read = jest
.fn()
.mockResolvedValue(mockReadResult({ data: mockCodeOwnersText() }));
const reader = { read, readTree: jest.fn() };
const reader = { read, readTree: jest.fn(), search: jest.fn() };
const owner = await resolveCodeOwner(mockLocation(), { reader, logger });
expect(owner).toBe('backstage-core');
@@ -216,7 +216,7 @@ describe('CodeOwnersProcessor', () => {
const read = jest
.fn()
.mockImplementation(() => mockReadResult({ error: 'error: foo' }));
const reader = { read, readTree: jest.fn() };
const reader = { read, readTree: jest.fn(), search: jest.fn() };
await expect(
resolveCodeOwner(mockLocation(), { reader, logger }),
@@ -230,7 +230,7 @@ describe('CodeOwnersProcessor', () => {
const read = jest
.fn()
.mockResolvedValue(mockReadResult({ data: mockCodeOwnersText() }));
const reader = { read, readTree: jest.fn() };
const reader = { read, readTree: jest.fn(), search: jest.fn() };
const processor = new CodeOwnersProcessor({ reader, logger });
return { entity, processor, read };
@@ -27,7 +27,7 @@ import {
describe('PlaceholderProcessor', () => {
const read: jest.MockedFunction<ResolverRead> = jest.fn();
const reader: UrlReader = { read, readTree: jest.fn() };
const reader: UrlReader = { read, readTree: jest.fn(), search: jest.fn() };
beforeEach(() => {
jest.resetAllMocks();
@@ -14,24 +14,29 @@
* limitations under the License.
*/
import { UrlReaderProcessor } from './UrlReaderProcessor';
import { getVoidLogger, UrlReaders } from '@backstage/backend-common';
import {
getVoidLogger,
UrlReader,
UrlReaders,
} from '@backstage/backend-common';
import { ConfigReader } from '@backstage/config';
import { msw } from '@backstage/test-utils';
import { rest } from 'msw';
import { setupServer } from 'msw/node';
import { msw } from '@backstage/test-utils';
import {
CatalogProcessorEntityResult,
CatalogProcessorErrorResult,
CatalogProcessorResult,
} from './types';
import { UrlReaderProcessor } from './UrlReaderProcessor';
import { defaultEntityDataParser } from './util/parse';
describe('UrlReaderProcessor', () => {
const mockApiOrigin = 'http://localhost';
const server = setupServer();
const server = setupServer();
msw.setupDefaultHandlers(server);
it('should load from url', async () => {
const logger = getVoidLogger();
const reader = UrlReaders.default({
@@ -57,7 +62,7 @@ describe('UrlReaderProcessor', () => {
)) as CatalogProcessorEntityResult;
expect(generated.type).toBe('entity');
expect(generated.location).toBe(spec);
expect(generated.location).toEqual(spec);
expect(generated.entity).toEqual({ mock: 'entity' });
});
@@ -92,4 +97,27 @@ describe('UrlReaderProcessor', () => {
`Unable to read url, NotFoundError: could not read ${mockApiOrigin}/component-notfound.yaml, 404 Not Found`,
);
});
it('uses search when there are globs', async () => {
const logger = getVoidLogger();
const reader: jest.Mocked<UrlReader> = {
read: jest.fn(),
readTree: jest.fn(),
search: jest.fn().mockImplementation(async () => []),
};
const processor = new UrlReaderProcessor({ reader, logger });
const emit = jest.fn();
await processor.readLocation(
{ type: 'url', target: 'https://github.com/a/b/blob/x/**/b.yaml' },
false,
emit,
defaultEntityDataParser,
);
expect(reader.search).toBeCalledTimes(1);
});
});
@@ -16,6 +16,8 @@
import { UrlReader } from '@backstage/backend-common';
import { LocationSpec } from '@backstage/catalog-model';
import parseGitUrl from 'git-url-parse';
import limiterFactory from 'p-limit';
import { Logger } from 'winston';
import * as result from './results';
import {
@@ -59,10 +61,14 @@ export class UrlReaderProcessor implements CatalogProcessor {
}
try {
const data = await this.options.reader.read(location.target);
for await (const parseResult of parser({ data, location })) {
emit(parseResult);
const output = await this.doRead(location.target);
for (const item of output) {
for await (const parseResult of parser({
data: item.data,
location: { type: location.type, target: item.url },
})) {
emit(parseResult);
}
}
} catch (error) {
const message = `Unable to read ${location.type}, ${error}`;
@@ -78,4 +84,25 @@ export class UrlReaderProcessor implements CatalogProcessor {
return true;
}
private async doRead(
location: string,
): Promise<{ data: Buffer; url: string }[]> {
// Does it contain globs? I.e. does it contain asterisks or question marks
// (no curly braces for now)
const { filepath } = parseGitUrl(location);
if (filepath?.match(/[*?]/)) {
const limiter = limiterFactory(5);
const response = await this.options.reader.search(location);
const output = response.files.map(async file => ({
url: file.url,
data: await limiter(file.content),
}));
return Promise.all(output);
}
// Otherwise do a plain read
const data = await this.options.reader.read(location);
return [{ url: location, data }];
}
}
@@ -44,6 +44,7 @@ describe('CatalogBuilder', () => {
const reader: jest.Mocked<UrlReader> = {
read: jest.fn(),
readTree: jest.fn(),
search: jest.fn(),
};
const env: CatalogEnvironment = {
logger: getVoidLogger(),
@@ -53,6 +53,7 @@ export async function startStandaloneServer(
const mockUrlReader: jest.Mocked<UrlReader> = {
read: jest.fn(),
readTree: jest.fn(),
search: jest.fn(),
};
logger.debug('Creating application...');