feat(gerrit-integration): use gitiles archive for readTree if configured

If Gitiles is configured, leverage it to fetch an archive instead
of cloning the repository.

Co-Authored-By: Andy Ladjadj <andy.ladjadj@adevinta.com>
Signed-off-by: Thomas Cardonne <thomas.cardonne@adevinta.com>
This commit is contained in:
Thomas Cardonne
2023-04-24 14:42:22 +02:00
parent b3021ef892
commit 443afcf7f5
14 changed files with 339 additions and 23 deletions
+9
View File
@@ -0,0 +1,9 @@
---
'@backstage/backend-common': patch
---
To improve performance, `GerritUrlReader.readTree()` now uses Gitiles to fetch an archive instead of cloning the repository.
If `gitilesBaseUrl` is not configured, `readTree` still uses Git to clone the repository.
Added `tarStripFirstDirectory` to `ReadTreeResponseFactoryOptions`, allowing to disable stripping first directory
for `tar` archives.
+6
View File
@@ -0,0 +1,6 @@
---
'@backstage/integration': minor
---
Added `buildGerritGitilesArchiveUrl()` to construct a Gitiles URL to download an archive.
Gitiles URL that uses an authenticated prefix (`/a/`) can now be parsed by the integration.
+1
View File
@@ -629,6 +629,7 @@ export type ReadTreeResponseFactoryOptions = {
size: number;
},
) => boolean;
tarStripFirstDirectory?: boolean;
};
export { ReadTreeResponseFile };
@@ -37,14 +37,16 @@ const treeResponseFactory = DefaultReadTreeResponseFactory.create({
config: new ConfigReader({}),
});
const cloneMock = jest.fn(() => Promise.resolve());
jest.mock('../scm', () => ({
Git: {
fromAuth: () => ({
clone: jest.fn(() => Promise.resolve({})),
clone: cloneMock,
}),
},
}));
// Gerrit processor without a gitilesBaseUrl configured
const gerritProcessor = new GerritUrlReader(
new GerritIntegration(
readGerritIntegrationConfig(
@@ -57,6 +59,21 @@ const gerritProcessor = new GerritUrlReader(
'/tmp',
);
// Gerrit processor with a gitilesBaseUrl configured.
// Use to test readTree with Gitiles archive download.
const gerritProcessorWithGitiles = new GerritUrlReader(
new GerritIntegration(
readGerritIntegrationConfig(
new ConfigReader({
host: 'gerrit.com',
gitilesBaseUrl: 'https://gerrit.com/gitiles',
}),
),
),
{ treeResponseFactory },
'/tmp',
);
const createReader = (config: JsonObject): UrlReaderPredicateTuple[] => {
return GerritUrlReader.factory({
config: new ConfigReader(config),
@@ -217,9 +234,17 @@ describe('GerritUrlReader', () => {
path.resolve(__dirname, '__fixtures__/gerrit/branch-info-response.txt'),
);
const treeUrl = 'https://gerrit.com/app/web/+/refs/heads/master/';
const treeUrlGitiles =
'https://gerrit.com/gitiles/app/web/+/refs/heads/master/';
const etag = '52432507a70b677b5674b019c9a46b2e9f29d0a1';
const mkdocsContent = 'great content';
const mkdocsContent = 'a repo fetched using git clone';
const mdContent = 'doc';
const repoArchiveBuffer = fs.readFileSync(
path.resolve(__dirname, '__fixtures__/gerrit/gerrit-master.tar.gz'),
);
const repoArchiveDocsBuffer = fs.readFileSync(
path.resolve(__dirname, '__fixtures__/gerrit/gerrit-master-docs.tar.gz'),
);
beforeEach(() => {
mockFs({
@@ -229,6 +254,39 @@ describe('GerritUrlReader', () => {
});
const spy = jest.spyOn(fs, 'mkdtemp');
spy.mockImplementation(() => '/tmp/gerrit-clone-123abc');
worker.use(
rest.get(
new RegExp(
'https://gerrit.com/gitiles/app/web/\\+archive/refs/heads/master.tar.gz',
),
(_, res, ctx) =>
res(
ctx.status(200),
ctx.set('Content-Type', 'application/x-gzip'),
ctx.set(
'content-disposition',
'attachment; filename=web-refs/heads/master.tar.gz',
),
ctx.body(repoArchiveBuffer),
),
),
rest.get(
new RegExp(
'https://gerrit.com/gitiles/app/web/\\+archive/refs/heads/master/docs.tar.gz',
),
(_, res, ctx) =>
res(
ctx.status(200),
ctx.set('Content-Type', 'application/x-gzip'),
ctx.set(
'content-disposition',
'attachment; filename=web-refs/heads/master-docs.tar.gz',
),
ctx.body(repoArchiveDocsBuffer),
),
),
);
});
afterEach(() => {
@@ -236,7 +294,32 @@ describe('GerritUrlReader', () => {
jest.clearAllMocks();
});
it('reads the wanted files correctly.', async () => {
it('reads the wanted files correctly using gitiles.', async () => {
worker.use(
rest.get(branchAPIUrl, (_, res, ctx) => {
return res(ctx.status(200), ctx.body(branchAPIresponse));
}),
);
const response = await gerritProcessorWithGitiles.readTree(
treeUrlGitiles,
);
expect(response.etag).toBe(etag);
const files = await response.files();
expect(files.length).toBe(2);
const docsYaml = await files[0].content();
expect(docsYaml.toString()).toBe('# Test\n');
const mdFile = await files[1].content();
expect(mdFile.toString()).toBe('site_name: Test\n');
expect(cloneMock).not.toHaveBeenCalled();
});
it('reads the wanted files correctly using git clone.', async () => {
worker.use(
rest.get(branchAPIUrl, (_, res, ctx) => {
return res(ctx.status(200), ctx.body(branchAPIresponse));
@@ -255,6 +338,8 @@ describe('GerritUrlReader', () => {
const mdFile = await files[1].content();
expect(mdFile.toString()).toBe(mdContent);
expect(cloneMock).toHaveBeenCalled();
});
it('throws NotModifiedError for matching etags.', async () => {
@@ -291,7 +376,29 @@ describe('GerritUrlReader', () => {
await expect(gerritProcessor.readTree(treeUrl)).rejects.toThrow(Error);
});
it('should returns wanted files with a subpath', async () => {
it('should returns wanted files with a subpath using gitiles', async () => {
worker.use(
rest.get(branchAPIUrl, (_, res, ctx) => {
return res(ctx.status(200), ctx.body(branchAPIresponse));
}),
);
const response = await gerritProcessorWithGitiles.readTree(
`${treeUrlGitiles}/docs`,
);
expect(response.etag).toBe(etag);
const files = await response.files();
expect(files.length).toBe(1);
const mdFile = await files[0].content();
expect(mdFile.toString()).toBe('# Test\n');
expect(cloneMock).not.toHaveBeenCalled();
});
it('should returns wanted files with a subpath using git clone', async () => {
worker.use(
rest.get(branchAPIUrl, (_, res, ctx) => {
return res(ctx.status(200), ctx.body(branchAPIresponse));
@@ -307,6 +414,8 @@ describe('GerritUrlReader', () => {
const mdFile = await files[0].content();
expect(mdFile.toString()).toBe(mdContent);
expect(cloneMock).toHaveBeenCalled();
});
});
});
@@ -14,16 +14,17 @@
* limitations under the License.
*/
import { Git } from '../scm';
import { NotFoundError, NotModifiedError } from '@backstage/errors';
import {
GerritIntegration,
getGerritCloneRepoUrl,
ScmIntegrations,
buildGerritGitilesArchiveUrl,
getGerritBranchApiUrl,
getGerritCloneRepoUrl,
getGerritFileContentsApiUrl,
getGerritRequestOptions,
parseGerritJsonResponse,
parseGerritGitilesUrl,
parseGerritJsonResponse,
} from '@backstage/integration';
import { Base64Decode } from 'base64-stream';
import concatStream from 'concat-stream';
@@ -31,20 +32,20 @@ import fs from 'fs-extra';
import fetch, { Response } from 'node-fetch';
import os from 'os';
import { join as joinPath } from 'path';
import { Readable, pipeline as pipelineCb } from 'stream';
import tar from 'tar';
import { pipeline as pipelineCb, Readable } from 'stream';
import { promisify } from 'util';
import { Git } from '../scm';
import {
ReaderFactory,
ReadTreeOptions,
ReadTreeResponse,
ReadTreeResponseFactory,
ReadUrlOptions,
ReadUrlResponse,
ReaderFactory,
SearchResponse,
UrlReader,
} from './types';
import { ScmIntegrations } from '@backstage/integration';
const pipeline = promisify(pipelineCb);
@@ -59,6 +60,8 @@ const createTemporaryDirectory = async (workDir: string): Promise<string> =>
* way we are depending on that there is a Gitiles installation somewhere
* that we can link to. It is perfectly possible to integrate Gerrit with
* Backstage without Gitiles since all API calls goes directly to Gerrit.
* However if Gitiles is configured, readTree will use it to fetch
* an archive instead of cloning the repository.
*
* The "host" variable in the config is the Gerrit host. The address where
* Gitiles is installed may be on the same host but it could be on a
@@ -125,6 +128,7 @@ export class GerritUrlReader implements UrlReader {
} catch (e) {
throw new Error(`Unable to read gerrit file ${url}, ${e}`);
}
if (response.ok) {
let responseBody: string;
return {
@@ -152,7 +156,6 @@ export class GerritUrlReader implements UrlReader {
url: string,
options?: ReadTreeOptions,
): Promise<ReadTreeResponse> {
const { filePath } = parseGerritGitilesUrl(this.integration.config, url);
const apiUrl = getGerritBranchApiUrl(this.integration.config, url);
let response: Response;
try {
@@ -180,6 +183,30 @@ export class GerritUrlReader implements UrlReader {
throw new NotModifiedError();
}
if (
this.integration.config.gitilesBaseUrl !== this.integration.config.baseUrl
) {
return this.readTreeFromGitiles(url, branchInfo.revision, options);
}
return this.readTreeFromGitClone(url, branchInfo.revision, options);
}
async search(): Promise<SearchResponse> {
throw new Error('GerritReader does not implement search');
}
toString() {
const { host, password } = this.integration.config;
return `gerrit{host=${host},authed=${Boolean(password)}}`;
}
private async readTreeFromGitClone(
url: string,
revision: string,
options?: ReadTreeOptions,
) {
const { filePath } = parseGerritGitilesUrl(this.integration.config, url);
const git = Git.fromAuth({
username: this.integration.config.username,
password: this.integration.config.password,
@@ -192,7 +219,7 @@ export class GerritUrlReader implements UrlReader {
await git.clone({
url: cloneUrl,
dir: joinPath(tempDir, 'repo'),
ref: branchInfo.revision,
ref: revision,
depth: 1,
});
@@ -206,7 +233,7 @@ export class GerritUrlReader implements UrlReader {
return await this.deps.treeResponseFactory.fromTarArchive({
stream: tarArchive,
subpath: filePath === '/' ? undefined : filePath,
etag: branchInfo.revision,
etag: revision,
filter: options?.filter,
});
} catch (error) {
@@ -216,12 +243,47 @@ export class GerritUrlReader implements UrlReader {
}
}
async search(): Promise<SearchResponse> {
throw new Error('GerritReader does not implement search');
}
private async readTreeFromGitiles(
url: string,
revision: string,
options?: ReadTreeOptions,
) {
const { branch, filePath, project } = parseGerritGitilesUrl(
this.integration.config,
url,
);
const archiveUrl = buildGerritGitilesArchiveUrl(
this.integration.config,
project,
branch,
filePath,
);
const archiveResponse = await fetch(archiveUrl, {
...getGerritRequestOptions(this.integration.config),
// TODO(freben): The signal cast is there because pre-3.x versions of
// node-fetch have a very slightly deviating AbortSignal type signature.
// The difference does not affect us in practice however. The cast can
// be removed after we support ESM for CLI dependencies and migrate to
// version 3 of node-fetch.
// https://github.com/backstage/backstage/issues/8242
signal: options?.signal as any,
});
toString() {
const { host, password } = this.integration.config;
return `gerrit{host=${host},authed=${Boolean(password)}}`;
if (archiveResponse.status === 404) {
throw new NotFoundError(`Not found: ${archiveUrl}`);
}
if (!archiveResponse.ok) {
throw new Error(
`${url} could not be read as ${archiveUrl}, ${archiveResponse.status} ${archiveResponse.statusText}`,
);
}
return await this.deps.treeResponseFactory.fromTarArchive({
stream: archiveResponse.body as unknown as Readable,
etag: revision,
filter: options?.filter,
tarStripFirstDirectory: false,
});
}
}
@@ -45,6 +45,7 @@ export class DefaultReadTreeResponseFactory implements ReadTreeResponseFactory {
this.workDir,
options.etag,
options.filter,
options.tarStripFirstDirectory ?? true,
);
}
@@ -44,6 +44,7 @@ export class TarArchiveResponse implements ReadTreeResponse {
private readonly workDir: string,
public readonly etag: string,
private readonly filter?: (path: string, info: { size: number }) => boolean,
private readonly stripFirstDirectory: boolean = true,
) {
if (subPath) {
if (!subPath.endsWith('/')) {
@@ -81,7 +82,9 @@ export class TarArchiveResponse implements ReadTreeResponse {
// File path relative to the root extracted directory. Will remove the
// top level dir name from the path since its name is hard to predetermine.
const relativePath = stripFirstDirectoryFromPath(entry.path);
const relativePath = this.stripFirstDirectory
? stripFirstDirectoryFromPath(entry.path)
: entry.path;
if (this.subPath) {
if (!relativePath.startsWith(this.subPath)) {
@@ -148,7 +151,10 @@ export class TarArchiveResponse implements ReadTreeResponse {
// Equivalent of tar --strip-components=N
// When no subPath is given, remove just 1 top level directory
const strip = this.subPath ? this.subPath.split('/').length : 1;
let strip = this.subPath ? this.subPath.split('/').length : 1;
if (!this.stripFirstDirectory) {
strip--;
}
let filterError: Error | undefined = undefined;
await pipeline(
@@ -164,7 +170,9 @@ export class TarArchiveResponse implements ReadTreeResponse {
// File path relative to the root extracted directory. Will remove the
// top level dir name from the path since its name is hard to predetermine.
const relativePath = stripFirstDirectoryFromPath(path);
const relativePath = this.stripFirstDirectory
? stripFirstDirectoryFromPath(path)
: path;
if (this.subPath && !relativePath.startsWith(this.subPath)) {
return false;
}
@@ -83,6 +83,8 @@ export type ReadTreeResponseFactoryOptions = {
etag: string;
// Filter passed on from the ReadTreeOptions
filter?: (path: string, info?: { size: number }) => boolean;
// First directory of a tar archive is stripped when set to true
tarStripFirstDirectory?: boolean;
};
/**
+8
View File
@@ -172,6 +172,14 @@ export type BitbucketServerIntegrationConfig = {
password?: string;
};
// @public
export function buildGerritGitilesArchiveUrl(
config: GerritIntegrationConfig,
project: string,
branch: string,
filePath: string,
): string;
// @public
export class DefaultGithubCredentialsProvider
implements GithubCredentialsProvider
@@ -20,6 +20,7 @@ import fetch from 'cross-fetch';
import { setupRequestMockHandlers } from '../helpers';
import { GerritIntegrationConfig } from './config';
import {
buildGerritGitilesArchiveUrl,
buildGerritGitilesUrl,
getGerritBranchApiUrl,
getGerritCloneRepoUrl,
@@ -33,6 +34,41 @@ describe('gerrit core', () => {
const worker = setupServer();
setupRequestMockHandlers(worker);
describe('buildGerritGitilesArchiveUrl', () => {
const config: GerritIntegrationConfig = {
host: 'gerrit.com',
gitilesBaseUrl: 'https://gerrit.com/gitiles',
};
it('can create an archive url for a branch', () => {
expect(buildGerritGitilesArchiveUrl(config, 'repo', 'dev', '')).toEqual(
'https://gerrit.com/gitiles/repo/+archive/refs/heads/dev.tar.gz',
);
expect(buildGerritGitilesArchiveUrl(config, 'repo', 'dev', '/')).toEqual(
'https://gerrit.com/gitiles/repo/+archive/refs/heads/dev.tar.gz',
);
});
it('can create an archive url for a specific directory', () => {
expect(
buildGerritGitilesArchiveUrl(config, 'repo', 'dev', 'docs'),
).toEqual(
'https://gerrit.com/gitiles/repo/+archive/refs/heads/dev/docs.tar.gz',
);
});
it('can create an authenticated url when auth is enabled', () => {
const authConfig = {
...config,
username: 'username',
password: 'password',
};
expect(
buildGerritGitilesArchiveUrl(authConfig, 'repo', 'dev', 'docs'),
).toEqual(
'https://gerrit.com/a/gitiles/repo/+archive/refs/heads/dev/docs.tar.gz',
);
});
});
describe('buildGerritGitilesUrl', () => {
it('can create an url from arguments', () => {
const config: GerritIntegrationConfig = {
@@ -86,6 +122,25 @@ describe('gerrit core', () => {
);
expect(rootPath).toEqual('/');
});
it('can parse a valid authenticated gitiles url.', () => {
const config: GerritIntegrationConfig = {
host: 'gerrit.com',
gitilesBaseUrl: 'https://gerrit.com/gitiles',
};
const { branch, filePath, project } = parseGerritGitilesUrl(
config,
'https://gerrit.com/a/gitiles/web/project/+/refs/heads/master/README.md',
);
expect(project).toEqual('web/project');
expect(branch).toEqual('master');
expect(filePath).toEqual('README.md');
const { filePath: rootPath } = parseGerritGitilesUrl(
config,
'https://gerrit.com/gitiles/web/project/+/refs/heads/master',
);
expect(rootPath).toEqual('/');
});
it('throws on incorrect gitiles urls.', () => {
const config: GerritIntegrationConfig = {
host: 'gerrit.com',
+55 -1
View File
@@ -37,6 +37,7 @@ const GERRIT_BODY_PREFIX = ")]}'";
*
* Gitiles url:
* https://g.com/optional_path/\{project\}/+/refs/heads/\{branch\}/\{filePath\}
* https://g.com/a/optional_path/\{project\}/+/refs/heads/\{branch\}/\{filePath\}
*
*
* @param url - An URL pointing to a file stored in git.
@@ -47,7 +48,17 @@ export function parseGerritGitilesUrl(
config: GerritIntegrationConfig,
url: string,
): { branch: string; filePath: string; project: string } {
const urlPath = url.replace(config.gitilesBaseUrl!, '');
const baseUrlParse = new URL(config.gitilesBaseUrl!);
const urlParse = new URL(url);
// Remove the gerrit authentication prefix '/a/' from the url
// In case of the gitilesBaseUrl is https://review.gerrit.com/plugins/gitiles
// and the url provided is https://review.gerrit.com/a/plugins/gitiles/...
// remove the prefix only if the pathname start with '/a/'
const urlPath = urlParse.pathname
.substring(urlParse.pathname.startsWith('/a/') ? 2 : 0)
.replace(baseUrlParse.pathname, '');
const parts = urlPath.split('/').filter(p => !!p);
const projectEndIndex = parts.indexOf('+');
@@ -91,6 +102,28 @@ export function buildGerritGitilesUrl(
}/${project}/+/refs/heads/${branch}/${trimStart(filePath, '/')}`;
}
/**
* Build a Gerrit Gitiles archive url that targets a specific branch and path
*
* @param config - A Gerrit provider config.
* @param project - The name of the git project
* @param branch - The branch we will target.
* @param filePath - The absolute file path.
* @public
*/
export function buildGerritGitilesArchiveUrl(
config: GerritIntegrationConfig,
project: string,
branch: string,
filePath: string,
): string {
const archiveName =
filePath === '/' || filePath === '' ? '.tar.gz' : `/${filePath}.tar.gz`;
return `${getGitilesAuthenticationUrl(
config,
)}/${project}/+archive/refs/heads/${branch}${archiveName}`;
}
/**
* Return the authentication prefix.
*
@@ -109,6 +142,27 @@ export function getAuthenticationPrefix(
return config.password ? '/a/' : '/';
}
/**
* Return the authentication gitiles url.
*
* @remarks
*
* To authenticate with a password the API url must be prefixed with "/a/".
* If no password is set anonymous access (without the prefix) will
* be used.
*
* @param config - A Gerrit provider config.
* @public
*/
export function getGitilesAuthenticationUrl(
config: GerritIntegrationConfig,
): string {
const parsedUrl = new URL(config.gitilesBaseUrl!);
return `${parsedUrl.protocol}//${parsedUrl.host}${getAuthenticationPrefix(
config,
)}${parsedUrl.pathname.substring(1)}`;
}
/**
* Return the url to get branch info from the Gerrit API.
*
+1
View File
@@ -19,6 +19,7 @@ export {
readGerritIntegrationConfigs,
} from './config';
export {
buildGerritGitilesArchiveUrl,
getGerritBranchApiUrl,
getGerritCloneRepoUrl,
getGerritFileContentsApiUrl,