X Tutup
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 62 additions & 64 deletions crates/vm/src/builtins/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::{
bytecode,
class::PyClassImpl,
common::wtf8::{Wtf8Buf, wtf8_concat},
frame::Frame,
frame::{Frame, FrameRef},
function::{FuncArgs, OptionalArg, PyComparisonValue, PySetterValue},
scope::Scope,
types::{
Expand Down Expand Up @@ -673,27 +673,14 @@ impl Py<PyFunction> {
/// Returns `None` for generator/coroutine code paths that do not push a
/// regular datastack-backed frame in the fast call path.
pub(crate) fn datastack_frame_size_bytes(&self) -> Option<usize> {
let code: &Py<PyCode> = &self.code;
if code
.flags
.intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE)
{
return None;
}
let nlocalsplus = code
.varnames
.len()
.checked_add(code.cellvars.len())?
.checked_add(code.freevars.len())?;
let capacity = nlocalsplus.checked_add(code.max_stackdepth as usize)?;
capacity.checked_mul(core::mem::size_of::<usize>())
datastack_frame_size_bytes_for_code(&self.code)
}

/// Fast path for calling a simple function with exact positional args.
/// Skips FuncArgs allocation, prepend_arg, and fill_locals_from_args.
/// Only valid when: CO_OPTIMIZED, no VARARGS, no VARKEYWORDS, no kwonlyargs,
/// and nargs == co_argcount.
pub fn invoke_exact_args(&self, mut args: Vec<PyObjectRef>, vm: &VirtualMachine) -> PyResult {
pub(crate) fn prepare_exact_args_frame(
&self,
mut args: Vec<PyObjectRef>,
vm: &VirtualMachine,
) -> FrameRef {
let code: PyRef<PyCode> = (*self.code).to_owned();

debug_assert_eq!(args.len(), code.arg_count as usize);
Expand All @@ -704,16 +691,11 @@ impl Py<PyFunction> {
.intersects(bytecode::CodeFlags::VARARGS | bytecode::CodeFlags::VARKEYWORDS)
);
debug_assert_eq!(code.kwonlyarg_count, 0);

// Generator/coroutine code objects are SIMPLE_FUNCTION in call
// specialization classification, but their call path must still
// go through invoke() to produce generator/coroutine objects.
if code
.flags
.intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE)
{
return self.invoke(FuncArgs::from(args), vm);
}
debug_assert!(
!code
.flags
.intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE)
);

let locals = if code.flags.contains(bytecode::CodeFlags::NEWLOCALS) {
None
Expand All @@ -727,20 +709,18 @@ impl Py<PyFunction> {
self.builtins.clone(),
self.closure.as_ref().map_or(&[], |c| c.as_slice()),
Some(self.to_owned().into()),
true, // Always use datastack (invoke_exact_args is never gen/coro)
true, // Exact-args fast path is only used for non-gen/coro functions.
vm,
)
.into_ref(&vm.ctx);

// Move args directly into fastlocals (no clone/refcount needed)
{
let fastlocals = unsafe { frame.fastlocals_mut() };
for (slot, arg) in fastlocals.iter_mut().zip(args.drain(..)) {
*slot = Some(arg);
}
}

// Handle cell2arg
if let Some(cell2arg) = code.cell2arg.as_deref() {
let fastlocals = unsafe { frame.fastlocals_mut() };
for (cell_idx, arg_idx) in cell2arg.iter().enumerate().filter(|(_, i)| **i != -1) {
Expand All @@ -749,6 +729,36 @@ impl Py<PyFunction> {
}
}

frame
}

/// Fast path for calling a simple function with exact positional args.
/// Skips FuncArgs allocation, prepend_arg, and fill_locals_from_args.
/// Only valid when: CO_OPTIMIZED, no VARARGS, no VARKEYWORDS, no kwonlyargs,
/// and nargs == co_argcount.
pub fn invoke_exact_args(&self, args: Vec<PyObjectRef>, vm: &VirtualMachine) -> PyResult {
let code: PyRef<PyCode> = (*self.code).to_owned();

debug_assert_eq!(args.len(), code.arg_count as usize);
debug_assert!(code.flags.contains(bytecode::CodeFlags::OPTIMIZED));
debug_assert!(
!code
.flags
.intersects(bytecode::CodeFlags::VARARGS | bytecode::CodeFlags::VARKEYWORDS)
);
debug_assert_eq!(code.kwonlyarg_count, 0);

// Generator/coroutine code objects are SIMPLE_FUNCTION in call
// specialization classification, but their call path must still
// go through invoke() to produce generator/coroutine objects.
if code
.flags
.intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE)
{
return self.invoke(FuncArgs::from(args), vm);
}
let frame = self.prepare_exact_args_frame(args, vm);

let result = vm.run_frame(frame.clone());
unsafe {
if let Some(base) = frame.materialize_localsplus() {
Expand All @@ -759,6 +769,22 @@ impl Py<PyFunction> {
}
}

pub(crate) fn datastack_frame_size_bytes_for_code(code: &Py<PyCode>) -> Option<usize> {
if code
.flags
.intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE)
{
return None;
}
let nlocalsplus = code
.varnames
.len()
.checked_add(code.cellvars.len())?
.checked_add(code.freevars.len())?;
let capacity = nlocalsplus.checked_add(code.max_stackdepth as usize)?;
capacity.checked_mul(core::mem::size_of::<usize>())
}
Comment on lines +772 to +786
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Use the runtime closure length when sizing datastack frames.

Frame::new() sizes localsplus with closure.len(), but this helper hard-codes code.freevars.len(). Since set___code__ still allows swapping in a code object without revalidating the stored closure, callers that reserve datastack space via this helper can compute a smaller layout than prepare_exact_args_frame() will actually allocate.

Possible direction
 pub(crate) fn datastack_frame_size_bytes(&self) -> Option<usize> {
-    datastack_frame_size_bytes_for_code(&self.code)
+    datastack_frame_size_bytes_for_layout(
+        &self.code,
+        self.closure.as_ref().map_or(0, |c| c.len()),
+    )
 }
 
-pub(crate) fn datastack_frame_size_bytes_for_code(code: &Py<PyCode>) -> Option<usize> {
+pub(crate) fn datastack_frame_size_bytes_for_layout(
+    code: &Py<PyCode>,
+    nfrees: usize,
+) -> Option<usize> {
     if code
         .flags
         .intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE)
     {
         return None;
     }
     let nlocalsplus = code
         .varnames
         .len()
         .checked_add(code.cellvars.len())?
-        .checked_add(code.freevars.len())?;
+        .checked_add(nfrees)?;
     let capacity = nlocalsplus.checked_add(code.max_stackdepth as usize)?;
     capacity.checked_mul(core::mem::size_of::<usize>())
 }
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@crates/vm/src/builtins/function.rs` around lines 772 - 786, The helper
datastack_frame_size_bytes_for_code currently uses code.freevars.len() which can
differ from the actual runtime closure length used by Frame::new(), leading to
undersized reservations; change the function to take the runtime closure length
(e.g., add a closure_len: usize parameter or accept the closure slice) and use
that value instead of code.freevars.len(), keep the generator/coroutine
early-return behavior, and update all callers (including
prepare_exact_args_frame and any code that reserves datastack space) to pass the
actual closure.len() so sizing matches Frame::new().


impl PyPayload for PyFunction {
#[inline]
fn class(ctx: &Context) -> &'static Py<PyType> {
Expand Down Expand Up @@ -1351,6 +1377,7 @@ pub(crate) fn vectorcall_function(

let has_kwargs = kwnames.is_some_and(|kw| !kw.is_empty());
let is_simple = !has_kwargs
&& code.flags.contains(bytecode::CodeFlags::OPTIMIZED)
&& !code.flags.contains(bytecode::CodeFlags::VARARGS)
&& !code.flags.contains(bytecode::CodeFlags::VARKEYWORDS)
&& code.kwonlyarg_count == 0
Expand All @@ -1361,37 +1388,8 @@ pub(crate) fn vectorcall_function(
if is_simple && nargs == code.arg_count as usize {
// FAST PATH: simple positional-only call, exact arg count.
// Move owned args directly into fastlocals — no clone needed.
let locals = if code.flags.contains(bytecode::CodeFlags::NEWLOCALS) {
None // lazy allocation — most frames never access locals dict
} else {
Some(ArgMapping::from_dict_exact(zelf.globals.clone()))
};

let frame = Frame::new(
code.to_owned(),
Scope::new(locals, zelf.globals.clone()),
zelf.builtins.clone(),
zelf.closure.as_ref().map_or(&[], |c| c.as_slice()),
Some(zelf.to_owned().into()),
true, // Always use datastack (is_simple excludes gen/coro)
vm,
)
.into_ref(&vm.ctx);

{
let fastlocals = unsafe { frame.fastlocals_mut() };
for (slot, arg) in fastlocals.iter_mut().zip(args.drain(..nargs)) {
*slot = Some(arg);
}
}

if let Some(cell2arg) = code.cell2arg.as_deref() {
let fastlocals = unsafe { frame.fastlocals_mut() };
for (cell_idx, arg_idx) in cell2arg.iter().enumerate().filter(|(_, i)| **i != -1) {
let x = fastlocals[*arg_idx as usize].take();
frame.set_cell_contents(cell_idx, x);
}
}
args.truncate(nargs);
let frame = zelf.prepare_exact_args_frame(args, vm);

let result = vm.run_frame(frame.clone());
unsafe {
Expand Down
6 changes: 3 additions & 3 deletions crates/vm/src/builtins/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1691,7 +1691,7 @@ impl ToPyObject for char {
fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
let cp = self as u32;
if cp <= u8::MAX as u32 {
vm.ctx.latin1_char_cache[cp as usize].clone().into()
vm.ctx.latin1_char(cp as u8).into()
} else {
vm.ctx.new_str(self).into()
}
Expand All @@ -1702,7 +1702,7 @@ impl ToPyObject for CodePoint {
fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
let cp = self.to_u32();
if cp <= u8::MAX as u32 {
vm.ctx.latin1_char_cache[cp as usize].clone().into()
vm.ctx.latin1_char(cp as u8).into()
} else {
vm.ctx.new_str(self).into()
}
Expand Down Expand Up @@ -1747,7 +1747,7 @@ impl ToPyObject for AsciiString {

impl ToPyObject for AsciiChar {
fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
vm.ctx.new_str(self).into()
vm.ctx.latin1_char(u8::from(self)).into()
}
}

Expand Down
Loading
Loading
X Tutup