Initial identity-awareness implementation for GA.

Signed-off-by: Eric Peterson <ericpeterson@spotify.com>
This commit is contained in:
Eric Peterson
2022-01-16 18:34:40 +01:00
committed by Eric Peterson
parent 2b06ccadb3
commit b40a0ccc4d
13 changed files with 602 additions and 36 deletions
+6
View File
@@ -0,0 +1,6 @@
---
'@backstage/plugin-analytics-module-ga': patch
---
Added the ability to capture and set user IDs from Backstage's `identityApi`. For full instructions on how to
set this up, see [the User ID section of its README](https://github.com/backstage/backstage/tree/master/plugins/analytics-module-ga#user-ids)
+1
View File
@@ -223,6 +223,7 @@ productional
Protobuf
proxying
Proxying
pseudonymized
pubsub
pygments
pymdownx
+71 -3
View File
@@ -14,15 +14,22 @@ This plugin contains no other functionality.
```tsx
// packages/app/src/apis.ts
import { analyticsApiRef, configApiRef } from '@backstage/core-plugin-api';
import {
analyticsApiRef,
configApiRef,
identityApiRef,
} from '@backstage/core-plugin-api';
import { GoogleAnalytics } from '@backstage/plugin-analytics-module-ga';
export const apis: AnyApiFactory[] = [
// Instantiate and register the GA Analytics API Implementation.
createApiFactory({
api: analyticsApiRef,
deps: { configApi: configApiRef },
factory: ({ configApi }) => GoogleAnalytics.fromConfig(configApi),
deps: { configApi: configApiRef, identityApi: identityApiRef },
factory: ({ configApi, identityApi }) =>
GoogleAnalytics.fromConfig(configApi, {
identityApi,
}),
}),
];
```
@@ -92,6 +99,66 @@ app:
key: someEventContextAttr
```
### User IDs
This plugin supports accurately deriving user-oriented metrics (like monthly
active users) using Google Analytics' [user ID views][ga-user-id-view]. To
enable this...
1. Be sure you've gone through the process of setting up a user ID view in your
Backstage instance's Google Analytics property (see docs linked above).
2. Make sure you instantiate `GoogleAnalytics` with an `identityApi` instance
passed to it, as shown in the installation section above.
3. Set `app.analytics.ga.identity` to either `required` or `optional` in your
`app.config.yaml`, like this:
```yaml
app:
analytics:
ga:
trackingId: UA-0000000-0
identity: optional
```
Set `identity` to `optional` if you need accurate session counts, including
cases where users do not sign in at all. Use `required` if you need all hits
to be associated with a user ID without exception (and don't mind if some
sessions are not captured, such as those where no sign in occur).
Note that, to comply with GA policies, the value of the User ID is
pseudonymized before being sent to GA. By default, it is a `sha256` hash of the
current user's `userEntityRef` as returned by the `identityApi`. To set a
different value, provide a custom implementation of the `identityApi` that
resolves a `userEntityRef` of the form `PrivateUser:namespace/YOUR-VALUE`. For
example:
```typescript
export const apis: AnyApiFactory[] = [
createApiFactory({
api: analyticsApiRef,
deps: { config: configApiRef, identityApi: identityApiRef },
factory: ({ identityApi, config }) => {
return new PseudononymizedIdentity(identityApi);
},
}),
];
class PseudononymizedIdentity implements IdentityApi {
constructor(private actualApi: IdentityApi) {}
async getBackstageIdentity(): Promise<BackstageUserIdentity> {
const { email = 'someone' } = await this.actualApi.getProfileInfo();
const hashedEmail = customHashingFunction(email);
return {
type: 'user',
userEntityRef: `PrivateUser:default/${hashedEmail}`,
ownershipEntityRefs: [],
};
}
// ...
}
```
### Debugging and Testing
In pre-production environments, you may wish to set additional configurations
@@ -147,3 +214,4 @@ app:
[what-is-a-custom-dimension]: https://support.google.com/analytics/answer/2709828
[configure-custom-dimension]: https://support.google.com/analytics/answer/2709828#configuration
[ga-user-id-view]: https://support.google.com/analytics/answer/3123669
+7 -5
View File
@@ -7,18 +7,20 @@ import { AnalyticsApi } from '@backstage/core-plugin-api';
import { AnalyticsEvent } from '@backstage/core-plugin-api';
import { BackstagePlugin } from '@backstage/core-plugin-api';
import { Config } from '@backstage/config';
import { IdentityApi } from '@backstage/core-plugin-api';
// Warning: (ae-missing-release-tag) "analyticsModuleGA" is exported by the package, but it is missing a release tag (@alpha, @beta, @public, or @internal)
//
// @public (undocumented)
export const analyticsModuleGA: BackstagePlugin<{}, {}>;
// Warning: (ae-missing-release-tag) "GoogleAnalytics" is exported by the package, but it is missing a release tag (@alpha, @beta, @public, or @internal)
//
// @public
export class GoogleAnalytics implements AnalyticsApi {
captureEvent(event: AnalyticsEvent): void;
static fromConfig(config: Config): GoogleAnalytics;
static fromConfig(
config: Config,
options?: {
identityApi?: IdentityApi;
},
): GoogleAnalytics;
}
// (No @packageDocumentation comment for this package)
+19
View File
@@ -34,6 +34,25 @@ export interface Config {
*/
scriptSrc?: string;
/**
* Controls how the identityApi is used when sending data to GA:
*
* - `disabled`: (Default) Explicitly prevents a user's identity from
* being used when capturing events in GA.
* - `optional`: Pageviews and hits are forwarded to GA as they happen
* and only include user identity metadata once known. Guarantees
* that hits are captured for all sessions, even if no sign in
* occurs, but may result in dropped hits in User ID views.
* - `required`: All pageviews and hits are deferred until an identity
* is known. Guarantees that all data sent to GA correlates to a user
* identity, but prevents GA from receiving events for sessions in
* which a user does not sign in. An `identityApi` instance must be
* passed during instantiation when set to this value.
*
* @visibility frontend
*/
identity?: 'disabled' | 'optional' | 'required';
/**
* Whether or not to log analytics debug statements to the console.
* Defaults to false.
+1
View File
@@ -21,6 +21,7 @@
"clean": "backstage-cli clean"
},
"dependencies": {
"@backstage/catalog-model": "^0.9.10",
"@backstage/config": "^0.1.13",
"@backstage/core-components": "^0.8.6",
"@backstage/core-plugin-api": "^0.6.0",
@@ -15,10 +15,17 @@
*/
import { ConfigReader } from '@backstage/config';
import { IdentityApi } from '@backstage/core-plugin-api';
import ReactGA from 'react-ga';
import { GoogleAnalytics } from './GoogleAnalytics';
describe('GoogleAnalytics', () => {
const context = {
extension: 'App',
pluginId: 'some-plugin',
routeRef: 'unknown',
releaseNum: 1337,
};
const trackingId = 'UA-000000-0';
const basicValidConfig = new ConfigReader({
app: { analytics: { ga: { trackingId, testMode: true } } },
@@ -50,12 +57,6 @@ describe('GoogleAnalytics', () => {
});
describe('integration', () => {
const context = {
extension: 'App',
pluginId: 'some-plugin',
routeRef: 'unknown',
releaseNum: 1337,
};
const advancedConfig = new ConfigReader({
app: {
analytics: {
@@ -141,20 +142,13 @@ describe('GoogleAnalytics', () => {
context,
});
// Expect a set command first.
const [setCommand, setData] = ReactGA.testModeAPI.calls[1];
expect(setCommand).toBe('set');
expect(setData).toMatchObject({
dimension1: context.pluginId,
metric1: context.releaseNum,
});
// Followed by a send command.
const [sendCommand, sendData] = ReactGA.testModeAPI.calls[2];
expect(sendCommand).toBe('send');
expect(sendData).toMatchObject({
const [command, data] = ReactGA.testModeAPI.calls[1];
expect(command).toBe('send');
expect(data).toMatchObject({
hitType: 'pageview',
page: '/a-page',
dimension1: context.pluginId,
metric1: context.releaseNum,
});
});
@@ -208,4 +202,212 @@ describe('GoogleAnalytics', () => {
});
});
});
describe('identityApi', () => {
const identityApi = {
getBackstageIdentity: jest.fn().mockResolvedValue({
userEntityRef: 'User:default/someone',
}),
} as unknown as IdentityApi;
it('does not set userId unless explicitly configured', async () => {
// Instantiate with identityApi and default configs.
const api = GoogleAnalytics.fromConfig(basicValidConfig, { identityApi });
api.captureEvent({
action: 'navigate',
subject: '/',
context,
});
// Wait for any/all promises involved to settle.
await new Promise(resolve => setImmediate(resolve));
// A pageview should have been fired immediately.
const [command, data] = ReactGA.testModeAPI.calls[1];
expect(command).toBe('send');
expect(data).toMatchObject({
hitType: 'pageview',
page: '/',
});
// There should not have been a UserID set.
expect(ReactGA.testModeAPI.calls).toHaveLength(2);
});
it('sets hashed userId when identityApi is provided', async () => {
// Instantiate with identityApi and identity set to optional
const optionalConfig = new ConfigReader({
app: {
analytics: {
ga: { trackingId, testMode: true, identity: 'optional' },
},
},
});
const api = GoogleAnalytics.fromConfig(optionalConfig, { identityApi });
api.captureEvent({
action: 'navigate',
subject: '/',
context,
});
// Wait for any/all promises involved to settle.
await new Promise(resolve => setImmediate(resolve));
// A pageview should have been fired immediately.
const [command, data] = ReactGA.testModeAPI.calls[1];
expect(command).toBe('send');
expect(data).toMatchObject({
hitType: 'pageview',
page: '/',
});
// User ID should have been set after the pageview.
const [setCommand, setData] = ReactGA.testModeAPI.calls[2];
expect(setCommand).toBe('set');
expect(setData).toMatchObject({
// String indicating userEntityRef went through expected hashing.
userId: '557365723a64656661756c742f736f6d656f6e65',
});
});
it('sets pre-hashed userId when PrivateUser entity ref is provided', async () => {
(identityApi.getBackstageIdentity as jest.Mock).mockResolvedValueOnce({
userEntityRef: 'PrivateUser:hashed/s0m3hash3dvalu3',
});
const optionalConfig = new ConfigReader({
app: {
analytics: {
ga: { trackingId, testMode: true, identity: 'optional' },
},
},
});
const api = GoogleAnalytics.fromConfig(optionalConfig, { identityApi });
api.captureEvent({
action: 'navigate',
subject: '/',
context,
});
// Wait for any/all promises involved to settle.
await new Promise(resolve => setImmediate(resolve));
// User ID should have been set after the pageview.
const [setCommand, setData] = ReactGA.testModeAPI.calls[2];
expect(setCommand).toBe('set');
expect(setData).toMatchObject({
userId: 's0m3hash3dvalu3',
});
});
it('does not set userId when identityApi is provided and ga.identity is explicitly disabled', async () => {
// Instantiate with identityApi and identity explicitly disabled.
const disabledConfig = new ConfigReader({
app: {
analytics: {
ga: { trackingId, testMode: true, identity: 'disabled' },
},
},
});
const api = GoogleAnalytics.fromConfig(disabledConfig, { identityApi });
api.captureEvent({
action: 'navigate',
subject: '/',
context,
});
// Wait for any/all promises involved to settle.
await new Promise(resolve => setImmediate(resolve));
// A pageview should have been fired immediately.
const [command, data] = ReactGA.testModeAPI.calls[1];
expect(command).toBe('send');
expect(data).toMatchObject({
hitType: 'pageview',
page: '/',
});
// There should not have been a UserID set.
expect(ReactGA.testModeAPI.calls).toHaveLength(2);
});
it('throws error when ga.identity is required but no identityApi is provided', async () => {
// Instantiate without identityApi and identity explicitly disabled.
const requiredConfig = new ConfigReader({
app: {
analytics: {
ga: { trackingId, testMode: true, identity: 'required' },
},
},
});
expect(() => GoogleAnalytics.fromConfig(requiredConfig)).toThrow();
});
it('defers event capture when ga.identity is required', async () => {
// Instantiate with identityApi and identity explicitly required.
const requiredConfig = new ConfigReader({
app: {
analytics: {
ga: { trackingId, testMode: true, identity: 'required' },
},
},
});
const api = GoogleAnalytics.fromConfig(requiredConfig, { identityApi });
// Fire a pageview and an event.
api.captureEvent({
action: 'navigate',
subject: '/',
context,
});
api.captureEvent({
action: 'test',
subject: 'some label',
context,
});
// Wait for any/all promises involved to settle.
await new Promise(resolve => setImmediate(resolve));
// User ID should have been set first.
const [setCommand, setData] = ReactGA.testModeAPI.calls[1];
expect(setCommand).toBe('set');
expect(setData).toMatchObject({
// String indicating userEntityRef went through expected hashing.
userId: '557365723a64656661756c742f736f6d656f6e65',
});
// Then a pageview should have been fired with a queue time.
const [pageCommand, pageData] = ReactGA.testModeAPI.calls[2];
expect(pageCommand).toBe('send');
expect(pageData).toMatchObject({
hitType: 'pageview',
page: '/',
queueTime: expect.any(Number),
});
// Then an event should have been fired with a queue time.
const [eventCommand, eventData] = ReactGA.testModeAPI.calls[3];
expect(eventCommand).toBe('send');
expect(eventData).toMatchObject({
hitType: 'event',
queueTime: expect.any(Number),
});
// And subsequent hits should not have a queue time.
api.captureEvent({
action: 'navigate',
subject: '/page-2',
context,
});
const [lastCommand, lastData] = ReactGA.testModeAPI.calls[4];
expect(lastCommand).toBe('send');
expect(lastData).toMatchObject({
hitType: 'pageview',
page: '/page-2',
});
expect(lastData.queueTime).toBeUndefined();
});
});
});
@@ -15,13 +15,16 @@
*/
import ReactGA from 'react-ga';
import { parseEntityRef } from '@backstage/catalog-model';
import {
AnalyticsApi,
AnalyticsContextValue,
AnalyticsEventAttributes,
AnalyticsEvent,
IdentityApi,
} from '@backstage/core-plugin-api';
import { Config } from '@backstage/config';
import { DeferredCapture } from '../../../util';
type CustomDimensionOrMetricConfig = {
type: 'dimension' | 'metric';
@@ -32,21 +35,33 @@ type CustomDimensionOrMetricConfig = {
/**
* Google Analytics API provider for the Backstage Analytics API.
* @public
*/
export class GoogleAnalytics implements AnalyticsApi {
private readonly cdmConfig: CustomDimensionOrMetricConfig[];
private readonly capture: DeferredCapture;
/**
* Instantiate the implementation and initialize ReactGA.
*/
private constructor(options: {
identityApi?: IdentityApi;
cdmConfig: CustomDimensionOrMetricConfig[];
identity: string;
trackingId: string;
scriptSrc?: string;
testMode: boolean;
debug: boolean;
}) {
const { cdmConfig, trackingId, scriptSrc, testMode, debug } = options;
const {
cdmConfig,
identity,
trackingId,
identityApi,
scriptSrc,
testMode,
debug,
} = options;
this.cdmConfig = cdmConfig;
@@ -57,15 +72,28 @@ export class GoogleAnalytics implements AnalyticsApi {
gaAddress: scriptSrc,
titleCase: false,
});
// If identity is required, defer event capture until identity is known.
this.capture = new DeferredCapture({ defer: identity === 'required' });
// Capture user only when explicitly enabled and provided.
if (identity !== 'disabled' && identityApi) {
this.setUserFrom(identityApi);
}
}
/**
* Instantiate a fully configured GA Analytics API implementation.
*/
static fromConfig(config: Config) {
static fromConfig(
config: Config,
options: { identityApi?: IdentityApi } = {},
) {
// Get all necessary configuration.
const trackingId = config.getString('app.analytics.ga.trackingId');
const scriptSrc = config.getOptionalString('app.analytics.ga.scriptSrc');
const identity =
config.getOptionalString('app.analytics.ga.identity') || 'disabled';
const debug = config.getOptionalBoolean('app.analytics.ga.debug') ?? false;
const testMode =
config.getOptionalBoolean('app.analytics.ga.testMode') ?? false;
@@ -83,8 +111,16 @@ export class GoogleAnalytics implements AnalyticsApi {
};
}) ?? [];
if (identity === 'required' && !options.identityApi) {
throw new Error(
'Invalid config: identity API must be provided to deps when ga.identity is required',
);
}
// Return an implementation instance.
return new GoogleAnalytics({
...options,
identity,
trackingId,
scriptSrc,
cdmConfig,
@@ -103,16 +139,11 @@ export class GoogleAnalytics implements AnalyticsApi {
const customMetadata = this.getCustomDimensionMetrics(context, attributes);
if (action === 'navigate' && context.extension === 'App') {
// Set any/all custom dimensions.
if (Object.keys(customMetadata).length) {
ReactGA.set(customMetadata);
}
ReactGA.pageview(subject);
this.capture.pageview(subject, customMetadata);
return;
}
ReactGA.event({
this.capture.event({
category: context.extension || 'App',
action,
label: subject,
@@ -151,4 +182,63 @@ export class GoogleAnalytics implements AnalyticsApi {
return customDimensionsMetrics;
}
/**
* Sets the GA userId, based on the `userEntityRef` set on the backstage
* identity loaded from a given Backstage Identity API instance. Because
* Google forbids sending any PII (including on the userId field), we hash
* the entire `userEntityRef` on behalf of integrators:
*
* - With value `User:default/name`, userId becomes `sha256(User:default/name)`
*
* If an integrator wishes to use an alternative hashing mechanism or an
* entirely different value, they may do so by passing a dummy Identity API
* implementation which returns a `userEntityRef` whose kind is the literal
* string `PrivateUser`, whose namespace is anything (it will be ignored) and
* whose name is the pre-hashed ID value.
*
* - With value `PrivateUser:default/a0n3b4n3`, userId becomes `a0n3b4n3`
* - With `PrivateUser:xyz/a0n3b4n3`, userId is `a0n3b4n3`
*
* Note: this feature requires that an integrator has set up a Google
* Analytics User ID view in the property used to track Backstage.
*/
private async setUserFrom(identityApi: IdentityApi) {
const { userEntityRef } = await identityApi.getBackstageIdentity();
// Prevent PII from being passed to Google Analytics.
const userId = await this.getPrivateUserId(userEntityRef);
// Set the user ID.
ReactGA.set({ userId });
// Notify the deferred capture mechanism that it may proceed.
this.capture.setReady();
}
/**
* Returns a PII-free user ID for use in Google Analytics.
*/
private getPrivateUserId(userEntityRef: string): Promise<string> {
const entity = parseEntityRef(userEntityRef);
// Mechanism allowing integrators to provide their own hashed values.
if (entity.kind === 'PrivateUser') {
return Promise.resolve(entity.name);
}
return this.hash(userEntityRef);
}
/**
* Simple hash function; relies on web cryptography + the sha-256 algorithm.
*/
private async hash(value: string): Promise<string> {
const digest = await crypto.subtle.digest(
'sha-256',
new TextEncoder().encode(value),
);
const hashArray = Array.from(new Uint8Array(digest));
return hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
}
}
+1 -1
View File
@@ -15,4 +15,4 @@
*/
export { analyticsModuleGA } from './plugin';
export { GoogleAnalytics } from './apis/implementations/AnalyticsApi';
export * from './apis/implementations/AnalyticsApi';
@@ -15,6 +15,9 @@
*/
import { createPlugin } from '@backstage/core-plugin-api';
/**
* @public
*/
export const analyticsModuleGA = createPlugin({
id: 'analytics-provider-ga',
});
@@ -15,3 +15,20 @@
*/
import '@testing-library/jest-dom';
import 'cross-fetch/polyfill';
// eslint-disable-next-line no-restricted-imports
import { TextEncoder } from 'util';
// Mock browser crypto.subtle.digest method for sha-256 hashing.
Object.defineProperty(global.self, 'crypto', {
value: {
subtle: {
digest: (_algo: string, data: Uint8Array): ArrayBuffer => data.buffer,
},
},
});
// Also used in browser-based APIs for hashing.
Object.defineProperty(global.self, 'TextEncoder', {
value: TextEncoder,
});
@@ -0,0 +1,140 @@
/*
* Copyright 2022 The Backstage Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import ReactGA from 'react-ga';
type Hit = {
timestamp: number;
data: {
hitType: 'pageview' | 'event';
[x: string]: any;
};
};
/**
* A wrapper around ReactGA that can optionally handle latent capture logic.
*
* - When defer is `false`, event data is sent directly to GA.
* - When defer is `true`, event data is queued (with a timestamp), so that it
* can be sent to GA once externally indicated to be ready. This relies on
* the `qt` or `queueTime` parameter of the Measurement Protocol.
*
* @see https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters#qt
*/
export class DeferredCapture {
/**
* Queue of deferred hits to be processed when ready.
*/
private queue: Hit[] = [];
/**
* Marker indicating when it's okay to revert to synchronous capture.
*/
private doneDeferring = false;
/**
* Whether or not deferred capture is desired.
*/
private defer: boolean;
/**
* Holds a reference to the internal promise's resolver. When called, it will
* begin processing hits in the queue.
*/
private isReady: () => void = () => {};
constructor({ defer = false }: { defer: boolean }) {
this.defer = defer;
// Set up a readiness promise that, when resolved from the outside, goes
// through all queued hits and sends them.
new Promise<void>(resolve => {
this.isReady = resolve;
}).then(() => {
this.queue.forEach(this.sendDeferred);
});
}
/**
* Indicates that deferred capture may now proceed.
*/
setReady() {
if (!this.doneDeferring) {
this.isReady();
this.doneDeferring = true;
}
}
/**
* Either forwards the pageview directly to GA, or (if configured) enqueues
* the pageview hit to be captured when ready.
*/
pageview(path: string, metadata: ReactGA.FieldsObject = {}) {
if (this.shouldDefer()) {
this.queue.push({
timestamp: Date.now(),
data: {
hitType: 'pageview',
page: path,
...metadata,
},
});
return;
}
ReactGA.send({
hitType: 'pageview',
page: path,
...metadata,
});
}
/**
* Either forwards the event directly to GA, or (if configured) enqueues the
* event hit to be captured when ready.
*/
event(eventDetails: ReactGA.EventArgs) {
if (this.shouldDefer()) {
this.queue.push({
timestamp: Date.now(),
data: {
...eventDetails,
hitType: 'event',
},
});
return;
}
ReactGA.event(eventDetails);
}
/**
* Only defer if configured and if we are still not ready.
*/
private shouldDefer() {
return this.defer && !this.doneDeferring;
}
/**
* Sends a given hit to GA, decorated with the correct queue time.
*/
private sendDeferred(hit: Hit) {
// Send the hit with the appropriate queue time (`qt`).
ReactGA.send({
...hit.data,
queueTime: Date.now() - hit.timestamp,
});
}
}
@@ -0,0 +1,17 @@
/*
* Copyright 2022 The Backstage Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
export { DeferredCapture } from './DeferredCapture';