I’m diving deep into the internal of dynamo, and find a confusing snippet of code:
static int active_dynamo_threads = 0;
static PyObject* increment_working_threads(PyThreadState* tstate) {
active_dynamo_threads = active_dynamo_threads + 1;
if (active_dynamo_threads > 0) {
enable_eval_frame_shim(tstate);
}
Py_RETURN_NONE;
}
static PyObject* decrement_working_threads(PyThreadState* tstate) {
if (active_dynamo_threads > 0) {
active_dynamo_threads = active_dynamo_threads - 1;
if (active_dynamo_threads == 0) {
enable_eval_frame_default(tstate);
}
}
Py_RETURN_NONE;
}
static PyObject* set_eval_frame(PyObject* new_callback, PyThreadState* tstate) {
// Change the eval frame callback and return the old one
// - None: disables TorchDynamo
// - False: run-only mode (reuse existing compiles)
// - Python callable(): enables TorchDynamo
PyObject* old_callback = eval_frame_callback_get();
// owned by caller
Py_INCREF(old_callback);
if (old_callback != Py_None && new_callback == Py_None) {
decrement_working_threads(tstate);
} else if (old_callback == Py_None && new_callback != Py_None) {
increment_working_threads(tstate);
}
Py_INCREF(new_callback);
Py_DECREF(old_callback);
// Set thread local callback. This will drive behavior of our shim, if/when it
// is installed.
eval_frame_callback_set(new_callback);
is_dynamo_compiling = !(new_callback == Py_None);
return old_callback;
}
Per my understanding, both callback
and eval_frame
function are per-thread variables. Then each thread should turn on/off its own eval_frame
. Why it is controlled by a per-process variable active_dynamo_threads
? And if it is shared across threads, maybe it should be protected via lock to avoid race condition?