Skip to content

Commit

Permalink
Detect ffmpeg OOM errors, added manual OutOfMemoryError (#1694)
Browse files Browse the repository at this point in the history
* Detect ffmpeg OOM errors, added manual OutOfMemoryError

* Create eighty-spies-knock.md
  • Loading branch information
matt-aitken authored Feb 12, 2025
1 parent 6017c52 commit efd2d21
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 13 deletions.
5 changes: 5 additions & 0 deletions .changeset/eighty-spies-knock.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@trigger.dev/sdk": patch
---

Detect ffmpeg OOM errors, added manual OutOfMemoryError
43 changes: 31 additions & 12 deletions apps/webapp/app/v3/services/completeAttempt.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
exceptionEventEnhancer,
flattenAttributes,
internalErrorFromUnexpectedExit,
isManualOutOfMemoryError,
sanitizeError,
shouldRetryError,
taskRunErrorEnhancer,
Expand Down Expand Up @@ -691,20 +692,38 @@ async function findAttempt(prismaClient: PrismaClientOrTransaction, friendlyId:
}

function isOOMError(error: TaskRunError) {
if (error.type !== "INTERNAL_ERROR") return false;
if (error.code === "TASK_PROCESS_OOM_KILLED" || error.code === "TASK_PROCESS_MAYBE_OOM_KILLED") {
return true;
if (error.type === "INTERNAL_ERROR") {
if (
error.code === "TASK_PROCESS_OOM_KILLED" ||
error.code === "TASK_PROCESS_MAYBE_OOM_KILLED"
) {
return true;
}

// For the purposes of retrying on a larger machine, we're going to treat this is an OOM error.
// This is what they look like if we're executing using k8s. They then get corrected later, but it's too late.
// {"code": "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE", "type": "INTERNAL_ERROR", "message": "Process exited with code -1 after signal SIGKILL."}
if (
error.code === "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE" &&
error.message &&
error.message.includes("SIGKILL") &&
error.message.includes("-1")
) {
return true;
}
}

if (error.type === "BUILT_IN_ERROR") {
// ffmpeg also does weird stuff
// { "name": "Error", "type": "BUILT_IN_ERROR", "message": "ffmpeg was killed with signal SIGKILL" }
if (error.message && error.message.includes("ffmpeg was killed with signal SIGKILL")) {
return true;
}
}

// For the purposes of retrying on a larger machine, we're going to treat this is an OOM error.
// This is what they look like if we're executing using k8s. They then get corrected later, but it's too late.
// {"code": "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE", "type": "INTERNAL_ERROR", "message": "Process exited with code -1 after signal SIGKILL."}
if (
error.code === "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE" &&
error.message &&
error.message.includes("SIGKILL") &&
error.message.includes("-1")
) {
// Special `OutOfMemoryError` for doing a manual OOM kill.
// Useful if a native library does an OOM but doesn't actually crash the run and you want to manually
if (isManualOutOfMemoryError(error)) {
return true;
}

Expand Down
29 changes: 29 additions & 0 deletions packages/core/src/v3/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,28 @@ export class AbortTaskRunError extends Error {
}
}

const MANUAL_OOM_KILL_ERROR_MESSAGE = "MANUAL_OOM_KILL_ERROR";

/**
* This causes an Out Of Memory error on the run (if it's uncaught).
* This can be useful if you use a native package that detects it's run out of memory but doesn't kill Node.js
*/
export class OutOfMemoryError extends Error {
constructor() {
super(MANUAL_OOM_KILL_ERROR_MESSAGE);
this.name = "OutOfMemoryError";
}
}

export function isManualOutOfMemoryError(error: TaskRunError) {
if (error.type === "BUILT_IN_ERROR") {
if (error.message && error.message === MANUAL_OOM_KILL_ERROR_MESSAGE) {
return true;
}
}
return false;
}

export class TaskPayloadParsedError extends Error {
public readonly cause: unknown;

Expand Down Expand Up @@ -562,6 +584,13 @@ export function taskRunErrorEnhancer(error: TaskRunError): EnhanceError<TaskRunE
};
}
}

if (isManualOutOfMemoryError(error)) {
return {
...getPrettyTaskRunError("TASK_PROCESS_OOM_KILLED"),
};
}

break;
}
case "STRING_ERROR": {
Expand Down
1 change: 1 addition & 0 deletions packages/trigger-sdk/src/v3/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ export {
RateLimitError,
UnprocessableEntityError,
AbortTaskRunError,
OutOfMemoryError,
logger,
type LogLevel,
} from "@trigger.dev/core/v3";
Expand Down
18 changes: 17 additions & 1 deletion references/hello-world/src/trigger/oom.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { OutOfMemoryError } from "@trigger.dev/sdk/v3";
import { logger, task } from "@trigger.dev/sdk/v3";
import { setTimeout } from "timers/promises";

Expand All @@ -9,7 +10,14 @@ export const oomTask = task({
machine: "small-1x",
},
},
run: async ({ succeedOnLargerMachine }: { succeedOnLargerMachine: boolean }, { ctx }) => {
run: async (
{
succeedOnLargerMachine = false,
ffmpeg = false,
manual = false,
}: { succeedOnLargerMachine?: boolean; ffmpeg?: boolean; manual?: boolean },
{ ctx }
) => {
logger.info("running out of memory below this line");

logger.info(`Running on ${ctx.machine?.name}`);
Expand All @@ -23,6 +31,14 @@ export const oomTask = task({
};
}

if (manual) {
throw new OutOfMemoryError();
}

if (ffmpeg) {
throw new Error("ffmpeg was killed with signal SIGKILL");
}

let a = "a";

try {
Expand Down

0 comments on commit efd2d21

Please sign in to comment.