@@ -13,7 +13,7 @@ use crate::{
1313 bytecode,
1414 class:: PyClassImpl ,
1515 common:: wtf8:: { Wtf8Buf , wtf8_concat} ,
16- frame:: Frame ,
16+ frame:: { Frame , FrameRef } ,
1717 function:: { FuncArgs , OptionalArg , PyComparisonValue , PySetterValue } ,
1818 scope:: Scope ,
1919 types:: {
@@ -673,27 +673,14 @@ impl Py<PyFunction> {
673673 /// Returns `None` for generator/coroutine code paths that do not push a
674674 /// regular datastack-backed frame in the fast call path.
675675 pub ( crate ) fn datastack_frame_size_bytes ( & self ) -> Option < usize > {
676- let code: & Py < PyCode > = & self . code ;
677- if code
678- . flags
679- . intersects ( bytecode:: CodeFlags :: GENERATOR | bytecode:: CodeFlags :: COROUTINE )
680- {
681- return None ;
682- }
683- let nlocalsplus = code
684- . varnames
685- . len ( )
686- . checked_add ( code. cellvars . len ( ) ) ?
687- . checked_add ( code. freevars . len ( ) ) ?;
688- let capacity = nlocalsplus. checked_add ( code. max_stackdepth as usize ) ?;
689- capacity. checked_mul ( core:: mem:: size_of :: < usize > ( ) )
676+ datastack_frame_size_bytes_for_code ( & self . code )
690677 }
691678
692- /// Fast path for calling a simple function with exact positional args.
693- /// Skips FuncArgs allocation, prepend_arg, and fill_locals_from_args.
694- /// Only valid when: CO_OPTIMIZED, no VARARGS, no VARKEYWORDS, no kwonlyargs ,
695- /// and nargs == co_argcount.
696- pub fn invoke_exact_args ( & self , mut args : Vec < PyObjectRef > , vm : & VirtualMachine ) -> PyResult {
679+ pub ( crate ) fn prepare_exact_args_frame (
680+ & self ,
681+ mut args : Vec < PyObjectRef > ,
682+ vm : & VirtualMachine ,
683+ ) -> FrameRef {
697684 let code: PyRef < PyCode > = ( * self . code ) . to_owned ( ) ;
698685
699686 debug_assert_eq ! ( args. len( ) , code. arg_count as usize ) ;
@@ -704,16 +691,11 @@ impl Py<PyFunction> {
704691 . intersects( bytecode:: CodeFlags :: VARARGS | bytecode:: CodeFlags :: VARKEYWORDS )
705692 ) ;
706693 debug_assert_eq ! ( code. kwonlyarg_count, 0 ) ;
707-
708- // Generator/coroutine code objects are SIMPLE_FUNCTION in call
709- // specialization classification, but their call path must still
710- // go through invoke() to produce generator/coroutine objects.
711- if code
712- . flags
713- . intersects ( bytecode:: CodeFlags :: GENERATOR | bytecode:: CodeFlags :: COROUTINE )
714- {
715- return self . invoke ( FuncArgs :: from ( args) , vm) ;
716- }
694+ debug_assert ! (
695+ !code
696+ . flags
697+ . intersects( bytecode:: CodeFlags :: GENERATOR | bytecode:: CodeFlags :: COROUTINE )
698+ ) ;
717699
718700 let locals = if code. flags . contains ( bytecode:: CodeFlags :: NEWLOCALS ) {
719701 None
@@ -727,20 +709,18 @@ impl Py<PyFunction> {
727709 self . builtins . clone ( ) ,
728710 self . closure . as_ref ( ) . map_or ( & [ ] , |c| c. as_slice ( ) ) ,
729711 Some ( self . to_owned ( ) . into ( ) ) ,
730- true , // Always use datastack (invoke_exact_args is never gen/coro)
712+ true , // Exact-args fast path is only used for non- gen/coro functions.
731713 vm,
732714 )
733715 . into_ref ( & vm. ctx ) ;
734716
735- // Move args directly into fastlocals (no clone/refcount needed)
736717 {
737718 let fastlocals = unsafe { frame. fastlocals_mut ( ) } ;
738719 for ( slot, arg) in fastlocals. iter_mut ( ) . zip ( args. drain ( ..) ) {
739720 * slot = Some ( arg) ;
740721 }
741722 }
742723
743- // Handle cell2arg
744724 if let Some ( cell2arg) = code. cell2arg . as_deref ( ) {
745725 let fastlocals = unsafe { frame. fastlocals_mut ( ) } ;
746726 for ( cell_idx, arg_idx) in cell2arg. iter ( ) . enumerate ( ) . filter ( |( _, i) | * * i != -1 ) {
@@ -749,6 +729,36 @@ impl Py<PyFunction> {
749729 }
750730 }
751731
732+ frame
733+ }
734+
735+ /// Fast path for calling a simple function with exact positional args.
736+ /// Skips FuncArgs allocation, prepend_arg, and fill_locals_from_args.
737+ /// Only valid when: CO_OPTIMIZED, no VARARGS, no VARKEYWORDS, no kwonlyargs,
738+ /// and nargs == co_argcount.
739+ pub fn invoke_exact_args ( & self , args : Vec < PyObjectRef > , vm : & VirtualMachine ) -> PyResult {
740+ let code: PyRef < PyCode > = ( * self . code ) . to_owned ( ) ;
741+
742+ debug_assert_eq ! ( args. len( ) , code. arg_count as usize ) ;
743+ debug_assert ! ( code. flags. contains( bytecode:: CodeFlags :: OPTIMIZED ) ) ;
744+ debug_assert ! (
745+ !code
746+ . flags
747+ . intersects( bytecode:: CodeFlags :: VARARGS | bytecode:: CodeFlags :: VARKEYWORDS )
748+ ) ;
749+ debug_assert_eq ! ( code. kwonlyarg_count, 0 ) ;
750+
751+ // Generator/coroutine code objects are SIMPLE_FUNCTION in call
752+ // specialization classification, but their call path must still
753+ // go through invoke() to produce generator/coroutine objects.
754+ if code
755+ . flags
756+ . intersects ( bytecode:: CodeFlags :: GENERATOR | bytecode:: CodeFlags :: COROUTINE )
757+ {
758+ return self . invoke ( FuncArgs :: from ( args) , vm) ;
759+ }
760+ let frame = self . prepare_exact_args_frame ( args, vm) ;
761+
752762 let result = vm. run_frame ( frame. clone ( ) ) ;
753763 unsafe {
754764 if let Some ( base) = frame. materialize_localsplus ( ) {
@@ -759,6 +769,22 @@ impl Py<PyFunction> {
759769 }
760770}
761771
772+ pub ( crate ) fn datastack_frame_size_bytes_for_code ( code : & Py < PyCode > ) -> Option < usize > {
773+ if code
774+ . flags
775+ . intersects ( bytecode:: CodeFlags :: GENERATOR | bytecode:: CodeFlags :: COROUTINE )
776+ {
777+ return None ;
778+ }
779+ let nlocalsplus = code
780+ . varnames
781+ . len ( )
782+ . checked_add ( code. cellvars . len ( ) ) ?
783+ . checked_add ( code. freevars . len ( ) ) ?;
784+ let capacity = nlocalsplus. checked_add ( code. max_stackdepth as usize ) ?;
785+ capacity. checked_mul ( core:: mem:: size_of :: < usize > ( ) )
786+ }
787+
762788impl PyPayload for PyFunction {
763789 #[ inline]
764790 fn class ( ctx : & Context ) -> & ' static Py < PyType > {
@@ -1351,6 +1377,7 @@ pub(crate) fn vectorcall_function(
13511377
13521378 let has_kwargs = kwnames. is_some_and ( |kw| !kw. is_empty ( ) ) ;
13531379 let is_simple = !has_kwargs
1380+ && code. flags . contains ( bytecode:: CodeFlags :: OPTIMIZED )
13541381 && !code. flags . contains ( bytecode:: CodeFlags :: VARARGS )
13551382 && !code. flags . contains ( bytecode:: CodeFlags :: VARKEYWORDS )
13561383 && code. kwonlyarg_count == 0
@@ -1361,37 +1388,8 @@ pub(crate) fn vectorcall_function(
13611388 if is_simple && nargs == code. arg_count as usize {
13621389 // FAST PATH: simple positional-only call, exact arg count.
13631390 // Move owned args directly into fastlocals — no clone needed.
1364- let locals = if code. flags . contains ( bytecode:: CodeFlags :: NEWLOCALS ) {
1365- None // lazy allocation — most frames never access locals dict
1366- } else {
1367- Some ( ArgMapping :: from_dict_exact ( zelf. globals . clone ( ) ) )
1368- } ;
1369-
1370- let frame = Frame :: new (
1371- code. to_owned ( ) ,
1372- Scope :: new ( locals, zelf. globals . clone ( ) ) ,
1373- zelf. builtins . clone ( ) ,
1374- zelf. closure . as_ref ( ) . map_or ( & [ ] , |c| c. as_slice ( ) ) ,
1375- Some ( zelf. to_owned ( ) . into ( ) ) ,
1376- true , // Always use datastack (is_simple excludes gen/coro)
1377- vm,
1378- )
1379- . into_ref ( & vm. ctx ) ;
1380-
1381- {
1382- let fastlocals = unsafe { frame. fastlocals_mut ( ) } ;
1383- for ( slot, arg) in fastlocals. iter_mut ( ) . zip ( args. drain ( ..nargs) ) {
1384- * slot = Some ( arg) ;
1385- }
1386- }
1387-
1388- if let Some ( cell2arg) = code. cell2arg . as_deref ( ) {
1389- let fastlocals = unsafe { frame. fastlocals_mut ( ) } ;
1390- for ( cell_idx, arg_idx) in cell2arg. iter ( ) . enumerate ( ) . filter ( |( _, i) | * * i != -1 ) {
1391- let x = fastlocals[ * arg_idx as usize ] . take ( ) ;
1392- frame. set_cell_contents ( cell_idx, x) ;
1393- }
1394- }
1391+ args. truncate ( nargs) ;
1392+ let frame = zelf. prepare_exact_args_frame ( args, vm) ;
13951393
13961394 let result = vm. run_frame ( frame. clone ( ) ) ;
13971395 unsafe {
0 commit comments