feat(gerrit): support glob discovery

Signed-off-by: Thomas Cardonne <thomas.cardonne@adevinta.com>
This commit is contained in:
Thomas Cardonne
2025-02-03 18:46:42 +01:00
parent b5a82087a7
commit 89db8b8230
19 changed files with 520 additions and 60 deletions
+5
View File
@@ -0,0 +1,5 @@
---
'@backstage/integration': patch
---
Gerrit integration now exports `getGitilesAuthenticationUrl`. This enables its usage by the `GerritUrlReader`.
+22
View File
@@ -0,0 +1,22 @@
---
'@backstage/plugin-catalog-backend-module-gerrit': minor
---
**BREAKING** The optional `branch` configuration parameter now defaults to the default branch of the project (where `HEAD` points to).
This parameter was previously using `master` as the default value. In most cases this change should be transparent as Gerrit defaults to using `master`.
This change also allow to specify a custom `catalogPath` in the `catalog.providers.gerrit` configuration.
If not set, it defaults to `catalog-info.yaml` files at the root of repositories. This default was the value before this change.
With the changes made in the `GerritUrlReader`, `catalogPath` allows to use `minimatch`'s glob-patterns.
```diff
catalog:
providers:
gerrit:
all: # identifies your dataset / provider independent of config changes
host: gerrit.company.com
query: 'state=ACTIVE&type=CODE'
+ # This will search for catalog manifests anywhere in the repositories
+ catalogPath: '**/catalog-info.{yml,yaml}'
```
+7
View File
@@ -0,0 +1,7 @@
---
'@backstage/backend-defaults': patch
---
`GerritUrlReader` is now able to `search` files matching a given pattern URL (using `minimatch` glob patterns).
This allows the Gerrit Discovery to find all Backstage manifests inside a repository using the `**/catalog-info.yaml` pattern.
+7 -3
View File
@@ -45,8 +45,9 @@ catalog:
gerrit:
yourProviderId: # identifies your dataset / provider independent of config changes
host: gerrit-your-company.com
branch: master # Optional
branch: master # Optional, defaults to the repository's default branch
query: 'state=ACTIVE&prefix=webapps'
catalogPath: 'catalog-info.yaml' # Optional, defaults to catalog-info.yaml
schedule:
# supports cron, ISO duration, "human duration" as used in code
frequency: { minutes: 30 }
@@ -56,12 +57,15 @@ catalog:
host: gerrit-your-company.com
branch: master # Optional
query: 'state=ACTIVE&prefix=backend'
# catalogPath can be a glob-pattern supported by the minimatch library
catalogPath: '{**/catalog-info.{yml,yaml},**/.catalog-info/*.{yml,yaml}}'
```
The provider configuration is composed of three parts:
The provider configuration consists of the following parts:
- **`host`**: the host of the Gerrit integration to use.
- **`branch`** _(optional)_: the branch where we will look for catalog entities (defaults to "master").
- **`branch`** _(optional)_: the branch where we will look for catalog entities (defaults to the repository's default branch).
- **`query`**: this string is directly used as the argument to the "List Project" API.
Typically, you will want to have some filter here to exclude projects that will
never contain any catalog files.
- **`catalogPath`**: path relative to the root of the repository where the Backstage manifests are stored. It can also be a glob pattern supported by [`minimatch`](https://github.com/isaacs/minimatch) to load multiple files.
@@ -423,6 +423,58 @@ describe.skip('GerritUrlReader', () => {
describe('search', () => {
const responseBuffer = Buffer.from('Apache License');
const branchAPIUrl =
'https://gerrit.com/projects/app%2Fweb/branches/master';
const branchAPIresponse = fs.readFileSync(
path.resolve(__dirname, '__fixtures__/gerrit/branch-info-response.txt'),
);
const searchUrl =
'https://gerrit.com/gitiles/app/web/+/refs/heads/master/**/catalog-info.yaml';
const etag = '52432507a70b677b5674b019c9a46b2e9f29d0a1';
const treeRecursiveResponse = fs.readFileSync(
path.resolve(
__dirname,
'__fixtures__/gerrit/tree-recursive-response.txt',
),
);
beforeEach(async () => {
worker.use(
rest.get(
'https://gerrit.com/projects/app%2Fweb/branches/master/files/catalog-info.yaml/content',
(_, res, ctx) => {
return res(
ctx.status(200),
ctx.body(Buffer.from('Backstage manifest').toString('base64')),
);
},
),
);
worker.use(
rest.get(
'https://gerrit.com/gitiles/app/web/\\+/refs/heads/master/',
(req, res, ctx) => {
if (
req.url.searchParams.has('format', 'JSON') &&
req.url.searchParams.has('recursive')
) {
return res(
ctx.status(200),
ctx.set('Content-Type', 'application/json'),
ctx.set('content-disposition', 'attachment'),
ctx.body(treeRecursiveResponse),
);
}
return res(ctx.status(404));
},
),
);
});
afterEach(() => {
jest.clearAllMocks();
});
it('should return a single file when given an exact URL', async () => {
worker.use(
@@ -467,12 +519,53 @@ describe.skip('GerritUrlReader', () => {
expect(data.files.length).toBe(0);
});
it('throws if given URL with wildcard', async () => {
await expect(
gerritProcessor.search(
'https://gerrit.com/web/project/+/refs/heads/master/*.yaml',
),
).rejects.toThrow('Unsupported search pattern URL');
it('reads the wanted files correctly using gitiles.', async () => {
worker.use(
rest.get(branchAPIUrl, (_, res, ctx) => {
return res(ctx.status(200), ctx.body(branchAPIresponse));
}),
);
const response = await gerritProcessor.search(searchUrl);
expect(response.etag).toBe(etag);
expect(response.files.length).toBe(3);
expect(response.files[0].url).toEqual(
'https://gerrit.com/gitiles/app/web/+/refs/heads/master/catalog-info.yaml',
);
expect(response.files[1].url).toEqual(
'https://gerrit.com/gitiles/app/web/+/refs/heads/master/microservices/petstore-api/catalog-info.yaml',
);
expect(response.files[2].url).toEqual(
'https://gerrit.com/gitiles/app/web/+/refs/heads/master/microservices/petstore-consumer/catalog-info.yaml',
);
const docsYaml = await response.files[0].content();
expect(docsYaml.toString()).toBe('Backstage manifest');
});
it('throws NotModifiedError for matching etags.', async () => {
worker.use(
rest.get(branchAPIUrl, (_, res, ctx) => {
return res(ctx.status(200), ctx.body(branchAPIresponse));
}),
);
await expect(gerritProcessor.search(searchUrl, { etag })).rejects.toThrow(
NotModifiedError,
);
});
it('should throw on failures while getting branch info.', async () => {
worker.use(
rest.get(branchAPIUrl, (_, res, ctx) => {
return res(ctx.status(500, 'Error'));
}),
);
await expect(gerritProcessor.search(searchUrl)).rejects.toThrow(Error);
});
});
});
@@ -43,6 +43,8 @@ import {
assertError,
} from '@backstage/errors';
import { ReadTreeResponseFactory, ReaderFactory } from './types';
import { Minimatch } from 'minimatch';
import { getGitilesAuthenticationUrl } from '@backstage/integration';
/**
* Implements a {@link @backstage/backend-plugin-api#UrlReaderService} for files in Gerrit.
@@ -150,35 +152,39 @@ export class GerritUrlReader implements UrlReaderService {
url: string,
options?: UrlReaderServiceSearchOptions,
): Promise<UrlReaderServiceSearchResponse> {
const { pathname } = new URL(url);
const { path } = parseGitilesUrlRef(this.integration.config, url);
if (pathname.match(/[*?]/)) {
throw new Error('Unsupported search pattern URL');
}
if (!path.match(/[*?]/)) {
try {
const data = await this.readUrl(url, options);
try {
const data = await this.readUrl(url, options);
return {
files: [
{
url: url,
content: data.buffer,
lastModifiedAt: data.lastModifiedAt,
},
],
etag: data.etag ?? '',
};
} catch (error) {
assertError(error);
if (error.name === 'NotFoundError') {
return {
files: [],
etag: '',
files: [
{
url: url,
content: data.buffer,
lastModifiedAt: data.lastModifiedAt,
},
],
etag: data.etag ?? '',
};
} catch (error) {
assertError(error);
if (error.name === 'NotFoundError') {
return {
files: [],
etag: '',
};
}
throw error;
}
throw error;
}
const urlRevision = await this.getRevisionForUrl(url, options);
const files = await this.searchFilesFromGitiles(url, options);
return { files, etag: urlRevision };
}
toString() {
@@ -260,4 +266,66 @@ export class GerritUrlReader implements UrlReaderService {
}
return branchInfo.revision;
}
private async searchFilesFromGitiles(
url: string,
options?: UrlReaderServiceReadTreeOptions,
): Promise<UrlReaderServiceSearchResponse['files']> {
const { path, basePath } = parseGitilesUrlRef(this.integration.config, url);
const treeUrl = `${basePath}/?format=JSON&recursive`.replace(
this.integration.config.gitilesBaseUrl,
getGitilesAuthenticationUrl(this.integration.config),
);
console.log(treeUrl);
const treeResponse = await fetch(treeUrl, {
...getGerritRequestOptions(this.integration.config),
// TODO(freben): The signal cast is there because pre-3.x versions of
// node-fetch have a very slightly deviating AbortSignal type signature.
// The difference does not affect us in practice however. The cast can
// be removed after we support ESM for CLI dependencies and migrate to
// version 3 of node-fetch.
// https://github.com/backstage/backstage/issues/8242
signal: options?.signal as any,
});
if (!treeResponse.ok) {
throw await ResponseError.fromResponse(treeResponse);
}
const res = (await parseGerritJsonResponse(treeResponse as any)) as {
id: string;
entries: { mode: number; type: string; id: string; name: string }[];
};
const matcher = new Minimatch(decodeURIComponent(path).replace(/^\/+/, ''));
const matching = res.entries.filter(
item => item.type === 'blob' && item.name && matcher.match(item.name),
);
return matching.map(item => ({
url: `${basePath}/${item.name}`,
content: async () => {
const apiUrl = getGerritFileContentsApiUrl(
this.integration.config,
`${basePath}/${item.name}`,
);
const response = await fetch(apiUrl, {
method: 'GET',
...getGerritRequestOptions(this.integration.config),
// TODO(freben): The signal cast is there because pre-3.x versions of
// node-fetch have a very slightly deviating AbortSignal type signature.
// The difference does not affect us in practice however. The cast can
// be removed after we support ESM for CLI dependencies and migrate to
// version 3 of node-fetch.
// https://github.com/backstage/backstage/issues/8242
signal: options?.signal as any,
});
const responseBody = await response.text();
return Buffer.from(responseBody, 'base64');
},
}));
}
}
@@ -0,0 +1,30 @@
)]}'
{
"id": "d84874ac8e27a8592ece6eb3e61e1fc3cd668348",
"entries": [
{
"mode": 40960,
"type": "blob",
"id": "9abe6322fdb76e3c2d972f12aad46088fa785fbc",
"name": "catalog-info.yaml"
},
{
"mode": 33188,
"type": "blob",
"id": "7a472379c27f165a1472f0c3d455fa3b811585b9",
"name": "README.md"
},
{
"mode": 33199,
"type": "blob",
"id": "1a472379c27f165a1472f0c3d455fa3b811585b9",
"name": "microservices/petstore-api/catalog-info.yaml"
},
{
"mode": 32199,
"type": "blob",
"id": "2a472379c27f165a1472f0c3d455fa3b811585b9",
"name": "microservices/petstore-consumer/catalog-info.yaml"
}
]
}
+5
View File
@@ -561,6 +561,11 @@ export function getGitHubRequestOptions(
headers: Record<string, string>;
};
// @public
export function getGitilesAuthenticationUrl(
config: GerritIntegrationConfig,
): string;
// @public
export function getGitLabFileFetchUrl(
url: string,
+1
View File
@@ -292,6 +292,7 @@ export function getAuthenticationPrefix(
* be used.
*
* @param config - A Gerrit provider config.
* @public
*/
export function getGitilesAuthenticationUrl(
config: GerritIntegrationConfig,
+1
View File
@@ -21,6 +21,7 @@ export {
export {
buildGerritGitilesArchiveUrl,
buildGerritGitilesArchiveUrlFromLocation,
getGitilesAuthenticationUrl,
getGerritBranchApiUrl,
getGerritCloneRepoUrl,
getGerritFileContentsApiUrl,
+7 -1
View File
@@ -40,9 +40,15 @@ export interface Config {
query: string;
/**
* (Optional) Branch.
* The branch where the provider will try to find entities. Defaults to "master".
* The branch where the provider will try to find entities. Uses the default branch where HEAD points to.
*/
branch?: string;
/**
* (Optional) Path where the catalog YAML manifest file is expected in the repository.
* Can contain glob patterns supported by minimatch.
* Defaults to "catalog-info.yaml".
*/
catalogPath?: string;
/**
* (Optional) TaskScheduleDefinition for the discovery.
*/
@@ -51,8 +51,10 @@
"@backstage/config": "workspace:^",
"@backstage/errors": "workspace:^",
"@backstage/integration": "workspace:^",
"@backstage/plugin-catalog-common": "workspace:^",
"@backstage/plugin-catalog-node": "workspace:^",
"fs-extra": "^11.2.0",
"p-limit": "^3.1.0",
"uuid": "^11.0.0"
},
"devDependencies": {
@@ -57,35 +57,52 @@ class PersistingTaskRunner implements SchedulerServiceTaskRunner {
const logger = mockServices.logger.mock();
describe('GerritEntityProvider', () => {
let schedule: PersistingTaskRunner;
registerMswTestHooks(server);
afterEach(() => {
jest.clearAllMocks();
});
const config = new ConfigReader({
catalog: {
providers: {
gerrit: {
'active-training': {
host: 'g.com',
query: 'state=ACTIVE&prefix=training',
branch: 'main',
const config = mockServices.rootConfig({
data: {
catalog: {
providers: {
gerrit: {
'active-training': {
host: 'g.com',
query: 'state=ACTIVE&prefix=training',
branch: 'main',
},
'custom-catalog-file': {
host: 'g.com',
query: 'state=ACTIVE&prefix=training',
catalogPath: 'catalog-*.yaml',
branch: 'main',
},
'without-branch': {
host: 'g.com',
query: 'state=ACTIVE&prefix=training',
},
},
},
},
},
integrations: {
gerrit: [
{
host: 'g.com',
baseUrl: 'https://g.com/gerrit',
gitilesBaseUrl: 'https:/g.com/gitiles',
},
],
integrations: {
gerrit: [
{
host: 'g.com',
baseUrl: 'https://g.com/gerrit',
gitilesBaseUrl: 'https:/g.com/gitiles',
},
],
},
},
});
const schedule = new PersistingTaskRunner();
beforeEach(() => {
schedule = new PersistingTaskRunner();
});
const entityProviderConnection: EntityProviderConnection = {
applyMutation: jest.fn(),
@@ -127,6 +144,107 @@ describe('GerritEntityProvider', () => {
);
});
it('discovers projects from the api with a custom catalog file path.', async () => {
const repoBuffer = fs.readFileSync(
path.resolve(__dirname, '__fixtures__/listProjectsBody.txt'),
);
const expected = getJsonFixture(
'expectedProviderEntitiesCustomCatalogFile.json',
);
server.use(
rest.get('https://g.com/gerrit/projects/', (_, res, ctx) =>
res(
ctx.status(200),
ctx.set('Content-Type', 'application/json'),
ctx.body(repoBuffer),
),
),
);
const provider = GerritEntityProvider.fromConfig(config, {
logger,
schedule,
})[1];
expect(provider.getProviderName()).toEqual(
'gerrit-provider:custom-catalog-file',
);
await provider.connect(entityProviderConnection);
const taskDef = schedule.getTasks()[0];
expect(taskDef.id).toEqual('gerrit-provider:custom-catalog-file:refresh');
await (taskDef.fn as () => Promise<void>)();
expect(entityProviderConnection.applyMutation).toHaveBeenCalledWith(
expected,
);
});
it('discovers the default branch when not explicitly configured.', async () => {
const repoBuffer = fs.readFileSync(
path.resolve(__dirname, '__fixtures__/listProjectsBody.txt'),
);
const expected = getJsonFixture('expectedProviderEntities.json');
server.use(
rest.get('https://g.com/gerrit/projects/', (_, res, ctx) =>
res(
ctx.status(200),
ctx.set('Content-Type', 'application/json'),
ctx.body(repoBuffer),
),
),
rest.get('https://g.com/gerrit/projects/:project/HEAD', (_, res, ctx) =>
res(
ctx.status(200),
ctx.set('Content-Type', 'application/json'),
ctx.body(`)]}'\n"refs/heads/main"`),
),
),
);
const configWithoutBranch = new ConfigReader({
catalog: {
providers: {
gerrit: {
'active-training': {
host: 'g.com',
query: 'state=ACTIVE&prefix=training',
},
},
},
},
integrations: {
gerrit: [
{
host: 'g.com',
baseUrl: 'https://g.com/gerrit',
gitilesBaseUrl: 'https:/g.com/gitiles',
},
],
},
});
const provider = GerritEntityProvider.fromConfig(configWithoutBranch, {
logger,
schedule,
})[0];
expect(provider.getProviderName()).toEqual(
'gerrit-provider:active-training',
);
await provider.connect(entityProviderConnection);
const taskDef = schedule.getTasks()[0];
expect(taskDef.id).toEqual('gerrit-provider:active-training:refresh');
await (taskDef.fn as () => Promise<void>)();
expect(entityProviderConnection.applyMutation).toHaveBeenCalledWith(
expected,
);
});
it('handles api errors.', async () => {
const provider = GerritEntityProvider.fromConfig(config, {
logger,
@@ -15,13 +15,13 @@
*/
import { Config } from '@backstage/config';
import { InputError } from '@backstage/errors';
import { InputError, ResponseError } from '@backstage/errors';
import {
EntityProvider,
EntityProviderConnection,
LocationSpec,
locationSpecToLocationEntity,
} from '@backstage/plugin-catalog-node';
import { LocationSpec } from '@backstage/plugin-catalog-common';
import {
GerritIntegration,
getGerritProjectsApiUrl,
@@ -30,6 +30,7 @@ import {
ScmIntegrations,
} from '@backstage/integration';
import * as uuid from 'uuid';
import pLimit from 'p-limit';
import { readGerritConfigs } from './config';
import { GerritProjectQueryResult, GerritProviderConfig } from './types';
@@ -167,7 +168,11 @@ export class GerritEntityProvider implements EntityProvider {
)) as GerritProjectQueryResult;
const projects = Object.keys(gerritProjectsResponse);
const locations = projects.map(project => this.createLocationSpec(project));
const limit = pLimit(5);
const locations = await Promise.all(
projects.map(project => limit(() => this.createLocationSpec(project))),
);
await this.connection.applyMutation({
type: 'full',
entities: locations.map(location => ({
@@ -178,10 +183,44 @@ export class GerritEntityProvider implements EntityProvider {
logger.info(`Found ${locations.length} locations.`);
}
private createLocationSpec(project: string): LocationSpec {
private async createLocationSpec(project: string): Promise<LocationSpec> {
// If a branch has been configured, we can use it directly
if (this.config.branch) {
return {
type: 'url',
target: `${this.integration.config.gitilesBaseUrl}/${project}/+/refs/heads/${this.config.branch}/${this.config.catalogPath}`,
presence: 'optional',
};
}
// Else we call Gerrit API to know on which branch HEAD is pointing to
let response: Response;
const baseProjectApiUrl = getGerritProjectsApiUrl(this.integration.config);
const projectGetHeadUrl = `${baseProjectApiUrl}${encodeURIComponent(
project,
)}/HEAD`;
try {
response = await fetch(projectGetHeadUrl, {
method: 'GET',
...getGerritRequestOptions(this.integration.config),
});
} catch (e) {
throw new Error(`Failed to get project's HEAD for ${project}, ${e}`);
}
if (!response.ok) {
throw await ResponseError.fromResponse(response);
}
// Gerrit responds with something like `refs/heads/master`
const projectHeadResponse = (await parseGerritJsonResponse(
response as any,
)) as string;
return {
type: 'url',
target: `${this.integration.config.gitilesBaseUrl}/${project}/+/refs/heads/${this.config.branch}/catalog-info.yaml`,
target: `${this.integration.config.gitilesBaseUrl}/${project}/+/${projectHeadResponse}/${this.config.catalogPath}`,
presence: 'optional',
};
}
@@ -0,0 +1,43 @@
{
"entities": [
{
"entity": {
"apiVersion": "backstage.io/v1alpha1",
"kind": "Location",
"metadata": {
"annotations": {
"backstage.io/managed-by-location": "url:https:/g.com/gitiles/training/gerrit/+/refs/heads/main/catalog-*.yaml",
"backstage.io/managed-by-origin-location": "url:https:/g.com/gitiles/training/gerrit/+/refs/heads/main/catalog-*.yaml"
},
"name": "generated-d508f0837d33559852169b17417968df8fd0b1dc"
},
"spec": {
"presence": "optional",
"target": "https:/g.com/gitiles/training/gerrit/+/refs/heads/main/catalog-*.yaml",
"type": "url"
}
},
"locationKey": "gerrit-provider:custom-catalog-file"
},
{
"entity": {
"apiVersion": "backstage.io/v1alpha1",
"kind": "Location",
"metadata": {
"annotations": {
"backstage.io/managed-by-location": "url:https:/g.com/gitiles/training/sample/+/refs/heads/main/catalog-*.yaml",
"backstage.io/managed-by-origin-location": "url:https:/g.com/gitiles/training/sample/+/refs/heads/main/catalog-*.yaml"
},
"name": "generated-d3f234d9ff42610b6a8b1030795de466dbd0ee55"
},
"spec": {
"presence": "optional",
"target": "https:/g.com/gitiles/training/sample/+/refs/heads/main/catalog-*.yaml",
"type": "url"
}
},
"locationKey": "gerrit-provider:custom-catalog-file"
}
],
"type": "full"
}
@@ -28,6 +28,7 @@ describe('readGerritConfigs', () => {
host: 'gerrit2.com',
query: 'state=ACTIVE',
branch: 'main',
catalogPath: 'catalog-*.yaml',
};
const provider3 = {
host: 'gerrit1.com',
@@ -55,11 +56,20 @@ describe('readGerritConfigs', () => {
const actual = readGerritConfigs(new ConfigReader(config));
expect(actual).toHaveLength(3);
expect(actual[0]).toEqual({ ...provider1, id: 'active-g1' });
expect(actual[1]).toEqual({ ...provider2, id: 'active-g2' });
expect(actual[0]).toEqual({
...provider1,
id: 'active-g1',
catalogPath: 'catalog-info.yaml',
});
expect(actual[1]).toEqual({
...provider2,
id: 'active-g2',
catalogPath: 'catalog-*.yaml',
});
expect(actual[2]).toEqual({
...provider3,
id: 'active-g3',
catalogPath: 'catalog-info.yaml',
schedule: {
...provider3.schedule,
frequency: { minutes: 30 },
@@ -84,7 +94,7 @@ describe('readGerritConfigs', () => {
const actual = readGerritConfigs(new ConfigReader(config));
expect(actual).toHaveLength(1);
expect(actual[0]).toEqual({
branch: 'master',
catalogPath: 'catalog-info.yaml',
id: 'active-g1',
...provider,
});
@@ -19,7 +19,9 @@ import { Config } from '@backstage/config';
import { GerritProviderConfig } from './types';
function readGerritConfig(id: string, config: Config): GerritProviderConfig {
const branch = config.getOptionalString('branch') ?? 'master';
const branch = config.getOptionalString('branch');
const catalogPath =
config.getOptionalString('catalogPath') ?? 'catalog-info.yaml';
const host = config.getString('host');
const query = config.getString('query');
@@ -31,6 +33,7 @@ function readGerritConfig(id: string, config: Config): GerritProviderConfig {
return {
branch,
catalogPath,
host,
id,
query,
@@ -30,5 +30,6 @@ export type GerritProviderConfig = {
query: string;
id: string;
branch?: string;
catalogPath?: string;
schedule?: SchedulerServiceTaskScheduleDefinition;
};
+2
View File
@@ -5591,11 +5591,13 @@ __metadata:
"@backstage/config": "workspace:^"
"@backstage/errors": "workspace:^"
"@backstage/integration": "workspace:^"
"@backstage/plugin-catalog-common": "workspace:^"
"@backstage/plugin-catalog-node": "workspace:^"
"@types/fs-extra": ^11.0.0
fs-extra: ^11.2.0
luxon: ^3.0.0
msw: ^1.0.0
p-limit: ^3.1.0
uuid: ^11.0.0
languageName: unknown
linkType: soft