-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for launching engine images
- Loading branch information
1 parent
141aa41
commit f80e55b
Showing
5 changed files
with
323 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
import {isAbortError} from 'abort-controller-x' | ||
import {execa} from 'execa' | ||
import * as fsp from 'fs/promises' | ||
import {onShutdown, onShutdownError} from 'node-graceful-shutdown' | ||
import {RegisterMachineResponse, RegisterMachineResponse_EngineTask} from '../gen/ts/depot/cloud/v3/machine_pb' | ||
import {ensureMounted, unmapBlockDevice, unmountDevice} from '../utils/mounts' | ||
import {reportEngineHealth} from './engineHealth' | ||
|
||
export async function startEngine(message: RegisterMachineResponse, task: RegisterMachineResponse_EngineTask) { | ||
console.log('Starting engine') | ||
|
||
let useCeph = false | ||
for (const mount of task.mounts) { | ||
await ensureMounted(mount.device, mount.path, mount.fsType, mount.cephVolume, mount.options) | ||
if (mount.cephVolume) useCeph = true | ||
} | ||
|
||
const {machineId, token} = message | ||
const headers = {Authorization: `Bearer ${token}`} | ||
|
||
await fsp.writeFile('/etc/engine/tls.crt', task.cert!.cert, {mode: 0o644}) | ||
await fsp.writeFile('/etc/engine/tls.key', task.cert!.key, {mode: 0o644}) | ||
await fsp.writeFile('/etc/engine/tlsca.crt', task.caCert!.cert, {mode: 0o644}) | ||
|
||
const cacheSizeMB = task.cacheSize * 1000000 | ||
|
||
const args = [ | ||
'run', | ||
'--rm', | ||
'--privileged', | ||
'--name', | ||
'engine', | ||
'-v', | ||
'/etc/engine:/etc/engine:ro', | ||
'-v', | ||
'/var/lib/engine:/var/lib/engine', | ||
'-p', | ||
'443:443', | ||
task.image, | ||
'--addr', | ||
'tcp://0.0.0.0:443', | ||
'--root', | ||
'/var/lib/engine', | ||
'--tlscert', | ||
'/etc/engine/tls.crt', | ||
'--tlskey', | ||
'/etc/engine/tls.key', | ||
'--tlscacert', | ||
'/etc/engine/tlsca.crt', | ||
'--oci-worker-gc', | ||
'--oci-worker-gc-keepstorage', | ||
`${cacheSizeMB}`, | ||
'--oci-max-parallelism', | ||
'num-cpu', | ||
] | ||
|
||
const controller = new AbortController() | ||
const signal = controller.signal | ||
|
||
async function runEngine() { | ||
try { | ||
console.log('Execing engine') | ||
await execa('/usr/bin/docker', args, {stdio: 'inherit', signal}) | ||
} catch (error) { | ||
if (error instanceof Error && error.message.includes('Command failed with exit code 1')) { | ||
// Ignore this error, it's expected when the process is killed. | ||
} else if (isAbortError(error)) { | ||
// Ignore this error, it's expected when the process is killed. | ||
} else { | ||
throw error | ||
} | ||
} finally { | ||
controller.abort() | ||
} | ||
} | ||
|
||
const engine = runEngine() | ||
|
||
onShutdownError(async (error) => { | ||
console.error('Error shutting down:', error) | ||
}) | ||
|
||
onShutdown(async () => { | ||
setTimeout(() => { | ||
console.log('Shutdown timed out, killing process') | ||
process.exit(1) | ||
}, 1000 * 60).unref() | ||
|
||
controller.abort() | ||
try { | ||
await engine | ||
console.log('Engine exited') | ||
} catch (error) { | ||
console.log(`Engine exited with error: ${error}`) | ||
} | ||
|
||
for (const mount of task.mounts) { | ||
if (mount.cephVolume) { | ||
await unmountDevice(mount.path) | ||
await unmapBlockDevice(mount.cephVolume.volumeName) | ||
} else { | ||
await unmountDevice(mount.path) | ||
} | ||
} | ||
}) | ||
|
||
try { | ||
await Promise.all([ | ||
engine, | ||
reportEngineHealth({machineId, signal, headers, mounts: task.mounts}), | ||
// reportUsage({machineId, signal, headers}), | ||
]) | ||
} catch (error) { | ||
throw error | ||
} finally { | ||
controller.abort() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
import {PlainMessage} from '@bufbuild/protobuf' | ||
import {execa} from 'execa' | ||
import {DiskSpace} from '../gen/ts/depot/cloud/v3/machine_pb' | ||
import {sleep} from '../utils/common' | ||
import {DiskStats, stats} from '../utils/disk' | ||
import {client} from '../utils/grpc' | ||
|
||
export interface ReportHealthParams { | ||
machineId: string | ||
signal: AbortSignal | ||
headers: HeadersInit | ||
mounts: Mount[] | ||
} | ||
|
||
export interface Mount { | ||
device: string | ||
path: string | ||
} | ||
|
||
export async function reportEngineHealth({machineId, signal, headers, mounts}: ReportHealthParams) { | ||
while (true) { | ||
if (signal.aborted) return | ||
|
||
await waitForWorkers(signal) | ||
|
||
try { | ||
while (true) { | ||
if (signal.aborted) return | ||
|
||
const disk_stats = await Promise.all(mounts.map(({device, path}) => stats(device, path))) | ||
const disks: PlainMessage<DiskSpace>[] = disk_stats | ||
.filter((item: DiskStats | undefined): item is DiskStats => { | ||
return item !== undefined | ||
}) | ||
.map(({device, path, freeMb, totalMb, freeInodes, totalInodes}) => { | ||
return { | ||
device, | ||
path, | ||
freeMb, | ||
totalMb, | ||
freeInodes, | ||
totalInodes, | ||
} | ||
}) | ||
|
||
await client.pingMachineHealth({machineId, disks}, {headers, signal}) | ||
await sleep(1000) | ||
} | ||
} catch (error) { | ||
console.log('Error reporting health:', error) | ||
} | ||
await sleep(1000) | ||
} | ||
} | ||
|
||
export async function waitForWorkers(signal: AbortSignal) { | ||
let workers: EngineWorker[] = await listEngineWorkers() | ||
while (!signal.aborted && workers.length === 0) { | ||
console.log('Waiting for engine workers to start') | ||
await sleep(250) | ||
workers = await listEngineWorkers() | ||
} | ||
} | ||
|
||
interface EngineWorker { | ||
id: string | ||
labels: Record<string, string> | ||
platforms: {architecture: string; os: string; variant?: string}[] | ||
gcPolicy: { | ||
filter: string[] | null | ||
all: boolean | ||
keepDuration: number | ||
keepBytes: number | ||
}[] | ||
buildkitVersion: { | ||
package: string | ||
version: string | ||
revision: string | ||
} | ||
} | ||
|
||
async function listEngineWorkers(): Promise<EngineWorker[]> { | ||
try { | ||
const res = await execa( | ||
'buildctl', | ||
[ | ||
'--tlsservername', | ||
'localhost', | ||
'--tlscert', | ||
'/etc/engine/tls.crt', | ||
'--tlskey', | ||
'/etc/engine/tls.key', | ||
'--tlscacert', | ||
'/etc/engine/tlsca.crt', | ||
'--timeout', | ||
'1', | ||
'--addr', | ||
'tcp://localhost:443', | ||
'debug', | ||
'workers', | ||
'--format', | ||
'{{json .}}', | ||
], | ||
{reject: false}, | ||
) | ||
if (res.exitCode !== 0) return [] | ||
const workers = JSON.parse(res.stdout) | ||
return workers | ||
} catch (err) { | ||
console.log('Error listing engine workers', err) | ||
return [] | ||
} | ||
} |