Skip to content

Commit 1839c05

Browse files
committed
Handle CPU limit exceeded in Python workers
If we call `TerminateExecution()`, it will exit Python execution without unwinding the stack or cleaning up the runtime state. This leaves the Python runtime in a permanently messed up state and all further requests will fail. This adds a new `cpuLimitNearlyExceededCallback` to the limit enforcer and hooks it up so that it triggers a SIGINT inside of Python. This can be used to raise a `CpuLimitExceeded` Python error into the runtime. If this error is ignored, then we'll hit the hard limit and be terminated. If we ever do call `TerminateExecution()` on a Python isolate, we should condemn the isolate, but that is left as a TODO. To trigger the SIGINT inside of Python, we have to set two addresses: 1. we set `emscripten_signal_clock` to `0` to make the Python eval breaker check for a signal on the next tick. 2. we set `_Py_EMSCRIPTEN_SIGNAL_HANDLING` to 1 to make Python check the signal clock. We also have to set `Module.Py_EmscriptenSignalBuffer` to a buffer with the number of the signal we wish to trip in it (`SIGINT` aka 2). When we start a request we set `_Py_EMSCRIPTEN_SIGNAL_HANDLING` to 0 to avoid ongoing costs of calling out to JavaScript to check the buffer when no signal is set, and we put a 2 into `Py_EmscriptenSignalBuffer`. The most annoying aspect of this is that the symbol `emscripten_signal_clock` is not exported. For Pyodide 0.28.2, I manually located the address of this symbol and hard coded it. For the next Pyodide, we'll make sure to export it.
1 parent dfcde42 commit 1839c05

File tree

15 files changed

+190
-20
lines changed

15 files changed

+190
-20
lines changed

src/pyodide/internal/introspection.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import signal
12
from inspect import isawaitable, isclass
23
from types import FunctionType
34

@@ -86,3 +87,14 @@ async def wrapper_func(relaxed, inst, prop, *args, **kwargs):
8687
return python_to_rpc(await result)
8788
else:
8889
return python_to_rpc(result)
90+
91+
92+
class CpuLimitExceeded(BaseException):
93+
pass
94+
95+
96+
def raise_cpu_limit_exceeded(signum, frame):
97+
raise CpuLimitExceeded("Python Worker exceeded CPU time limit")
98+
99+
100+
signal.signal(signal.SIGXCPU, raise_cpu_limit_exceeded)

src/pyodide/internal/metadata.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,6 @@ export const LEGACY_GLOBAL_HANDLERS = !NO_GLOBAL_HANDLERS;
6363
export const LEGACY_VENDOR_PATH = !FORCE_NEW_VENDOR_PATH;
6464
export const LEGACY_INCLUDE_SDK = !EXTERNAL_SDK;
6565
export const CHECK_RNG_STATE = !!COMPATIBILITY_FLAGS.python_check_rng_state;
66+
67+
export const setCpuLimitNearlyExceededCallback =
68+
MetadataReader.setCpuLimitNearlyExceededCallback.bind(MetadataReader);

src/pyodide/internal/python.ts

Lines changed: 90 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@ import {
1616
getRandomValues,
1717
entropyBeforeRequest,
1818
} from 'pyodide-internal:topLevelEntropy/lib';
19-
import { LEGACY_VENDOR_PATH } from 'pyodide-internal:metadata';
19+
import {
20+
LEGACY_VENDOR_PATH,
21+
setCpuLimitNearlyExceededCallback,
22+
} from 'pyodide-internal:metadata';
2023
import type { PyodideEntrypointHelper } from 'pyodide:python-entrypoint-helper';
2124

2225
/**
@@ -27,7 +30,11 @@ import type { PyodideEntrypointHelper } from 'pyodide:python-entrypoint-helper';
2730
import { default as SetupEmscripten } from 'internal:setup-emscripten';
2831

2932
import { default as UnsafeEval } from 'internal:unsafe-eval';
30-
import { PythonWorkersInternalError, reportError } from 'pyodide-internal:util';
33+
import {
34+
PythonWorkersInternalError,
35+
reportError,
36+
unreachable,
37+
} from 'pyodide-internal:util';
3138
import { loadPackages } from 'pyodide-internal:loadPackage';
3239
import { default as MetadataReader } from 'pyodide-internal:runtime-generated/metadata';
3340
import { TRANSITIVE_REQUIREMENTS } from 'pyodide-internal:metadata';
@@ -116,20 +123,87 @@ function validatePyodideVersion(pyodide: Pyodide): void {
116123
}
117124

118125
const origSetTimeout = globalThis.setTimeout.bind(this);
119-
function setTimeoutTopLevelPatch(
120-
handler: () => void,
121-
timeout: number | undefined
122-
): number {
123-
// Redirect top level setTimeout(cb, 0) to queueMicrotask().
124-
// If we don't know how to handle it, call normal setTimeout() to force failure.
125-
if (typeof handler === 'string') {
126-
return origSetTimeout(handler, timeout);
126+
127+
function makeSetTimeout(Module: Module): typeof setTimeout {
128+
return function setTimeoutTopLevelPatch(
129+
handler: () => void,
130+
timeout: number | undefined
131+
): number {
132+
// Redirect top level setTimeout(cb, 0) to queueMicrotask().
133+
// If we don't know how to handle it, call normal setTimeout() to force failure.
134+
if (typeof handler === 'string') {
135+
return origSetTimeout(handler, timeout);
136+
}
137+
function wrappedHandler() {
138+
// In case an Exceeded CPU occurred just as Python was exiting, there may be one waiting that
139+
// will interrupt the wrong task. Clear signals before entering the task.
140+
// This is covered by cpu-limit-exceeded.ew-test "async_trip" test.
141+
clearSignals(Module);
142+
handler();
143+
}
144+
if (timeout) {
145+
return origSetTimeout(wrappedHandler, timeout);
146+
}
147+
queueMicrotask(wrappedHandler);
148+
return 0;
149+
} as typeof setTimeout;
150+
}
151+
152+
function getSignalClockAddr(Module: Module): number {
153+
if (Module.API.version !== '0.28.2') {
154+
throw new PythonWorkersInternalError(
155+
'getSignalClockAddr only supported in 0.28.2'
156+
);
127157
}
128-
if (timeout) {
129-
return origSetTimeout(handler, timeout);
158+
// This is the address here:
159+
// https://github.com/python/cpython/blob/main/Python/emscripten_signal.c#L42
160+
//
161+
// Since the symbol isn't exported, we can't access it directly. Instead, we used wasm-objdump and
162+
// searched for the call site to _Py_CheckEmscriptenSignals_Helper(), then read the offset out of
163+
// the assembly code.
164+
//
165+
// TODO: Export this symbol in the next Pyodide release so we can stop using the magic number.
166+
const emscripten_signal_clock_offset = 3171536;
167+
return Module.___memory_base.value + emscripten_signal_clock_offset;
168+
}
169+
170+
function setupRuntimeSignalHandling(Module: Module): void {
171+
Module.Py_EmscriptenSignalBuffer = new Uint8Array(1);
172+
const version = Module.API.version;
173+
if (version === '0.26.0a2') {
174+
return;
175+
}
176+
if (version === '0.28.2') {
177+
// The callback sets signal_clock to 0 and signal_handling to 1. It has to be in C++ because we
178+
// don't hold the isolate lock when we call it. JS code would be:
179+
//
180+
// function callback() { Module.HEAP8[getSignalClockAddr(Module)] = 0;
181+
// Module.HEAP8[Module._Py_EMSCRIPTEN_SIGNAL_HANDLING] = 1;
182+
// }
183+
setCpuLimitNearlyExceededCallback(
184+
Module.HEAP8,
185+
getSignalClockAddr(Module),
186+
Module._Py_EMSCRIPTEN_SIGNAL_HANDLING
187+
);
188+
return;
189+
}
190+
unreachable(version);
191+
}
192+
193+
const SIGXCPU = 24;
194+
195+
export function clearSignals(Module: Module): void {
196+
if (Module.API.version === '0.28.2') {
197+
// In case the previous request was aborted, make sure that:
198+
// 1. a sigint is waiting in the signal buffer
199+
// 2. signal handling is off
200+
//
201+
// We will turn signal handling on as part of triggering the interrupt, having it on otherwise
202+
// just wastes cycles.
203+
Module.Py_EmscriptenSignalBuffer[0] = SIGXCPU;
204+
Module.HEAPU32[getSignalClockAddr(Module)] = 1;
205+
Module.HEAPU32[Module._Py_EMSCRIPTEN_SIGNAL_HANDLING / 4] = 0;
130206
}
131-
queueMicrotask(handler);
132-
return 0;
133207
}
134208

135209
export function loadPyodide(
@@ -150,7 +224,7 @@ export function loadPyodide(
150224
Module.setUnsafeEval(UnsafeEval);
151225
Module.setGetRandomValues(getRandomValues);
152226
Module.setSetTimeout(
153-
setTimeoutTopLevelPatch as typeof setTimeout,
227+
makeSetTimeout(Module),
154228
clearTimeout,
155229
setInterval,
156230
clearInterval
@@ -193,6 +267,7 @@ export function loadPyodide(
193267
}
194268
);
195269
setupPythonSearchPath(pyodide);
270+
setupRuntimeSignalHandling(Module);
196271
return pyodide;
197272
} catch (e) {
198273
// In edgeworker test suite, without this we get the file name and line number of the exception

src/pyodide/internal/util.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,7 @@ export function invalidateCaches(Module: Module): void {
7878
`from importlib import invalidate_caches; invalidate_caches(); del invalidate_caches`
7979
);
8080
}
81+
82+
export function unreachable(msg: never): never {
83+
throw new PythonWorkersInternalError(`Unreachable: ${msg}`);
84+
}

src/pyodide/python-entrypoint-helper.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@
22
// This file is a BUILTIN module that provides the actual implementation for the
33
// python-entrypoint.js USER module.
44

5-
import { beforeRequest, loadPyodide } from 'pyodide-internal:python';
5+
import {
6+
beforeRequest,
7+
loadPyodide,
8+
clearSignals,
9+
} from 'pyodide-internal:python';
610
import { enterJaegerSpan } from 'pyodide-internal:jaeger';
711
import { patchLoadPackage } from 'pyodide-internal:setupPackages';
812
import {
@@ -292,6 +296,8 @@ async function doPyCallHelper(
292296
pyfunc: PyCallable,
293297
args: any[]
294298
): Promise<any> {
299+
const pyodide = await getPyodide();
300+
clearSignals(pyodide._module);
295301
try {
296302
if (pyfunc.callWithOptions) {
297303
return await pyfunc.callWithOptions(

src/pyodide/types/emscripten.d.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ interface API {
3838
};
3939
serializeHiwireState(serializer: (obj: any) => any): SnapshotConfig;
4040
pyVersionTuple: [number, number, number];
41+
scheduleCallback: (callback: () => void, timeout: number) => void;
4142
}
4243

4344
interface LDSO {
@@ -74,9 +75,7 @@ interface EmscriptenSettings {
7475
) => WebAssembly.Exports;
7576
reportUndefinedSymbolsNoOp: () => void;
7677
noInitialRun?: boolean;
77-
API: {
78-
config: API['config'];
79-
};
78+
API: Pick<API, 'config'>;
8079
readyPromise: Promise<Module>;
8180
rejectReadyPromise: (e: any) => void;
8281
}
@@ -132,4 +131,7 @@ interface Module {
132131
getEmptyTableSlot(): number;
133132
freeTableIndexes: number[];
134133
LD_LIBRARY_PATH: string;
134+
Py_EmscriptenSignalBuffer: Uint8Array;
135+
_Py_EMSCRIPTEN_SIGNAL_HANDLING: number;
136+
___memory_base: WebAssembly.Global<'i32'>;
135137
}

src/pyodide/types/runtime-generated/metadata.d.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ declare namespace MetadataReader {
3030
const read: (index: number, position: number, buffer: Uint8Array) => number;
3131
const getTransitiveRequirements: () => Set<string>;
3232
const getCompatibilityFlags: () => CompatibilityFlags;
33+
const setCpuLimitNearlyExceededCallback: (
34+
buf: Uint8Array,
35+
sig_clock: number,
36+
sig_flag: number
37+
) => void;
3338
const constructor: {
3439
getBaselineSnapshotImports(): string[];
3540
};

src/workerd/api/pyodide/pyodide.c++

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,19 @@ kj::Array<kj::StringPtr> PyodideMetadataReader::getNames(
9191
return builder.releaseAsArray();
9292
}
9393

94+
void PyodideMetadataReader::setCpuLimitNearlyExceededCallback(
95+
jsg::Lock& js, kj::Array<kj::byte> wasm_memory, int sig_clock, int sig_flag) {
96+
// This callback has to be implemented in C++ because we don't hold the isolate lock when we call
97+
// it. It also has to be signal safe since we call it from the cpu time limiter.
98+
Worker::Isolate::from(js).setCpuLimitNearlyExceededCallback(
99+
[wasm_memory = kj::mv(wasm_memory), sig_clock, sig_flag]() mutable {
100+
// Set signal handling clock to fire on the next check.
101+
wasm_memory[sig_clock] = 0;
102+
// Set signal handling to on
103+
wasm_memory[sig_flag] = 1;
104+
});
105+
}
106+
94107
kj::Array<kj::String> PythonModuleInfo::getPythonFileContents() {
95108
auto builder = kj::Vector<kj::String>(names.size());
96109
for (auto i: kj::zeroTo(names.size())) {

src/workerd/api/pyodide/pyodide.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,12 @@ class PyodideMetadataReader: public jsg::Object {
238238

239239
static kj::Array<kj::StringPtr> getBaselineSnapshotImports();
240240

241+
// We call this during Python setup with the wasm memory and the addresses of the signal clock and
242+
// the flag to indicate whether signal handling is on or off. It sets up the isolate
243+
// CpuLimitNearlyExceeded callback to trigger a signal in Python.
244+
void setCpuLimitNearlyExceededCallback(
245+
jsg::Lock& js, kj::Array<kj::byte> wasm_memory, int sig_clock, int sig_flag);
246+
241247
// Similar to Cloudflare::::getCompatibilityFlags in global-scope.c++, but the key difference is
242248
// that it returns experimental flags even if `experimental` is not enabled. This avoids a gotcha
243249
// where an experimental compat flag is enabled in our C++ code, but not in our JS code.
@@ -266,6 +272,7 @@ class PyodideMetadataReader: public jsg::Object {
266272
JSG_METHOD(getTransitiveRequirements);
267273
JSG_METHOD(getCompatibilityFlags);
268274
JSG_STATIC_METHOD(getBaselineSnapshotImports);
275+
JSG_METHOD(setCpuLimitNearlyExceededCallback);
269276
}
270277

271278
void visitForMemoryInfo(jsg::MemoryTracker& tracker) const {

src/workerd/io/io-context.c++

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,9 @@ IoContext::IoContext(ThreadContext& thread,
192192

193193
return promise;
194194
};
195+
KJ_IF_SOME(cb, this->worker->getIsolate().getCpuLimitNearlyExceededCallback()) {
196+
limitEnforcer->setCpuLimitNearlyExceededCallback(kj::mv(cb));
197+
}
195198

196199
// Arrange to abort when limits expire.
197200
abortWhen(makeLimitsPromise());

0 commit comments

Comments
 (0)