Skip to content

Commit

Permalink
scaleDown lamnda tries to send metrics 10s before timing out (#830)
Browse files Browse the repository at this point in the history
  • Loading branch information
jeanschmidt authored Oct 3, 2022
1 parent a00488d commit c191dd8
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ describe('Config', () => {
process.env.GITHUB_APP_CLIENT_SECRET = 'GITHUB_APP_CLIENT_SECRET';
process.env.GITHUB_APP_ID = 'GITHUB_APP_ID';
process.env.KMS_KEY_ID = 'KMS_KEY_ID';
process.env.LAMBDA_TIMEOUT = '113';
process.env.LAUNCH_TEMPLATE_NAME_LINUX = 'LAUNCH_TEMPLATE_NAME_LINUX';
process.env.LAUNCH_TEMPLATE_NAME_WINDOWS = 'LAUNCH_TEMPLATE_NAME_WINDOWS';
process.env.LAUNCH_TEMPLATE_VERSION_LINUX = 'LAUNCH_TEMPLATE_VERSION_LINUX';
Expand Down Expand Up @@ -52,6 +53,7 @@ describe('Config', () => {
expect(Config.Instance.githubAppClientSecret).toBe('GITHUB_APP_CLIENT_SECRET');
expect(Config.Instance.githubAppId).toBe('GITHUB_APP_ID');
expect(Config.Instance.kmsKeyId).toBe('KMS_KEY_ID');
expect(Config.Instance.lambdaTimeout).toBe(113);
expect(Config.Instance.launchTemplateNameLinux).toBe('LAUNCH_TEMPLATE_NAME_LINUX');
expect(Config.Instance.launchTemplateNameWindows).toBe('LAUNCH_TEMPLATE_NAME_WINDOWS');
expect(Config.Instance.launchTemplateVersionLinux).toBe('LAUNCH_TEMPLATE_VERSION_LINUX');
Expand Down Expand Up @@ -88,6 +90,7 @@ describe('Config', () => {
delete process.env.GITHUB_APP_CLIENT_SECRET;
delete process.env.GITHUB_APP_ID;
delete process.env.KMS_KEY_ID;
delete process.env.LAMBDA_TIMEOUT;
process.env.LAUNCH_TEMPLATE_NAME_LINUX = 'LAUNCH_TEMPLATE_NAME_LINUX';
process.env.LAUNCH_TEMPLATE_NAME_WINDOWS = 'LAUNCH_TEMPLATE_NAME_WINDOWS';
process.env.LAUNCH_TEMPLATE_VERSION_LINUX = 'LAUNCH_TEMPLATE_VERSION_LINUX';
Expand Down Expand Up @@ -115,6 +118,7 @@ describe('Config', () => {
expect(Config.Instance.githubAppClientSecret).toBeUndefined();
expect(Config.Instance.githubAppId).toBeUndefined();
expect(Config.Instance.kmsKeyId).toBeUndefined();
expect(Config.Instance.lambdaTimeout).toEqual(600);
expect(Config.Instance.launchTemplateNameLinux).toBe('LAUNCH_TEMPLATE_NAME_LINUX');
expect(Config.Instance.launchTemplateNameWindows).toBe('LAUNCH_TEMPLATE_NAME_WINDOWS');
expect(Config.Instance.launchTemplateVersionLinux).toBe('LAUNCH_TEMPLATE_VERSION_LINUX');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export class Config {
readonly githubAppClientSecret: string | undefined;
readonly githubAppId: string | undefined;
readonly kmsKeyId: string | undefined;
readonly lambdaTimeout: number;
readonly launchTemplateNameLinux: string | undefined;
readonly launchTemplateNameWindows: string | undefined;
readonly launchTemplateVersionLinux: string | undefined;
Expand Down Expand Up @@ -43,6 +44,7 @@ export class Config {
this.githubAppClientSecret = process.env.GITHUB_APP_CLIENT_SECRET;
this.githubAppId = process.env.GITHUB_APP_ID;
this.kmsKeyId = process.env.KMS_KEY_ID;
this.lambdaTimeout = Number(process.env.LAMBDA_TIMEOUT || '600');
this.launchTemplateNameLinux = process.env.LAUNCH_TEMPLATE_NAME_LINUX;
this.launchTemplateNameWindows = process.env.LAUNCH_TEMPLATE_NAME_WINDOWS;
this.launchTemplateVersionLinux = process.env.LAUNCH_TEMPLATE_VERSION_LINUX;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,11 @@ export class Metrics {
getRunnerTypesFailure() {
this.countEntry(`run.getRunnerTypes.failure`, 1);
}

/* istanbul ignore next */
lambdaTimeout() {
this.countEntry(`run.timeout`, 1);
}
}

export class ScaleUpMetrics extends Metrics {
Expand Down Expand Up @@ -709,3 +714,23 @@ export class ScaleDownMetrics extends Metrics {
}
}
}

export interface sendMetricsTimeoutVars {
metrics?: Metrics;
setTimeout?: ReturnType<typeof setTimeout>;
}

/* istanbul ignore next */
export function sendMetricsAtTimeout(metricsTimeouts: sendMetricsTimeoutVars) {
return () => {
if (metricsTimeouts.setTimeout) {
clearTimeout(metricsTimeouts.setTimeout);
metricsTimeouts.setTimeout = undefined;
}
if (metricsTimeouts.metrics) {
metricsTimeouts.metrics.lambdaTimeout();
metricsTimeouts.metrics.sendMetrics();
metricsTimeouts.metrics = undefined;
}
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import { RunnerInfo, getRepo } from './utils';
import { Config } from './config';
import moment from 'moment';
import { resetSecretCache } from './gh-auth';
import { ScaleDownMetrics } from './metrics';
import { ScaleDownMetrics, sendMetricsTimeoutVars, sendMetricsAtTimeout } from './metrics';

function runnerMinimumTimeExceeded(runner: RunnerInfo): boolean {
const launchTimePlusMinimum = moment(runner.launchTime)
Expand All @@ -27,6 +27,13 @@ function runnerMinimumTimeExceeded(runner: RunnerInfo): boolean {
export default async function scaleDown(): Promise<void> {
// list and sort runners, newest first. This ensure we keep the newest runners longer.
const metrics = new ScaleDownMetrics();
const sndMetricsTimout: sendMetricsTimeoutVars = {
metrics: metrics,
};
sndMetricsTimout.setTimeout = setTimeout(
sendMetricsAtTimeout(sndMetricsTimout),
(Config.Instance.lambdaTimeout - 10) * 1000,
);

try {
// Ensure a clean cache before attempting each scale down event
Expand Down Expand Up @@ -88,6 +95,9 @@ export default async function scaleDown(): Promise<void> {
}
}
} finally {
clearTimeout(sndMetricsTimout.setTimeout);
sndMetricsTimout.metrics = undefined;
sndMetricsTimout.setTimeout = undefined;
metrics.sendMetrics();
}
}
Expand Down
3 changes: 2 additions & 1 deletion terraform-aws-github-runner/modules/runners/scale-down.tf
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ resource "aws_lambda_function" "scale_down" {

environment {
variables = {
AWS_REGION_INSTANCES = join(",", var.aws_region_instances)
ENABLE_ORGANIZATION_RUNNERS = var.enable_organization_runners
ENVIRONMENT = var.environment
GHES_URL = var.ghes_url
Expand All @@ -37,9 +36,11 @@ resource "aws_lambda_function" "scale_down" {
GITHUB_APP_ID = var.github_app.id
GITHUB_APP_KEY_BASE64 = local.github_app_key_base64
KMS_KEY_ID = var.encryption.kms_key_id
LAMBDA_TIMEOUT = var.lambda_timeout_scale_down
MINIMUM_RUNNING_TIME_IN_MINUTES = var.minimum_running_time_in_minutes
SCALE_DOWN_CONFIG = jsonencode(var.idle_config)
SECRETSMANAGER_SECRETS_ID = var.secretsmanager_secrets_id
AWS_REGION_INSTANCES = join(",", var.aws_region_instances)
}
}

Expand Down
1 change: 1 addition & 0 deletions terraform-aws-github-runner/modules/runners/scale-up.tf
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ resource "aws_lambda_function" "scale_up" {
GITHUB_APP_ID = var.github_app.id
GITHUB_APP_KEY_BASE64 = local.github_app_key_base64
KMS_KEY_ID = var.encryption.kms_key_id
LAMBDA_TIMEOUT = var.lambda_timeout_scale_up
LAUNCH_TEMPLATE_NAME_LINUX = aws_launch_template.linux_runner.name
LAUNCH_TEMPLATE_NAME_WINDOWS = aws_launch_template.windows_runner.name
LAUNCH_TEMPLATE_VERSION_LINUX = aws_launch_template.linux_runner.latest_version
Expand Down

0 comments on commit c191dd8

Please sign in to comment.