Major fixes and new features
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2025-09-25 15:51:48 +09:00
parent dd7349bb4c
commit ddce9f5125
5586 changed files with 1470941 additions and 0 deletions

View File

@@ -0,0 +1,54 @@
"""Encode valid C string literals from Python strings.
If a character is not allowed in C string literals, it is either emitted
as a simple escape sequence (e.g. '\\n'), or an octal escape sequence
with exactly three digits ('\\oXXX'). Question marks are escaped to
prevent trigraphs in the string literal from being interpreted. Note
that '\\?' is an invalid escape sequence in Python.
Consider the string literal "AB\\xCDEF". As one would expect, Python
parses it as ['A', 'B', 0xCD, 'E', 'F']. However, the C standard
specifies that all hexadecimal digits immediately following '\\x' will
be interpreted as part of the escape sequence. Therefore, it is
unexpectedly parsed as ['A', 'B', 0xCDEF].
Emitting ("AB\\xCD" "EF") would avoid this behaviour. However, we opt
for simplicity and use octal escape sequences instead. They do not
suffer from the same issue as they are defined to parse at most three
octal digits.
"""
from __future__ import annotations
import string
from typing import Final
CHAR_MAP: Final = [f"\\{i:03o}" for i in range(256)]
# It is safe to use string.printable as it always uses the C locale.
for c in string.printable:
CHAR_MAP[ord(c)] = c
# These assignments must come last because we prioritize simple escape
# sequences over any other representation.
for c in ("'", '"', "\\", "a", "b", "f", "n", "r", "t", "v"):
escaped = f"\\{c}"
decoded = escaped.encode("ascii").decode("unicode_escape")
CHAR_MAP[ord(decoded)] = escaped
# This escape sequence is invalid in Python.
CHAR_MAP[ord("?")] = r"\?"
def encode_bytes_as_c_string(b: bytes) -> str:
"""Produce contents of a C string literal for a byte string, without quotes."""
escaped = "".join([CHAR_MAP[i] for i in b])
return escaped
def c_string_initializer(value: bytes) -> str:
"""Create initializer for a C char[]/ char * variable from a string.
For example, if value if b'foo', the result would be '"foo"'.
"""
return '"' + encode_bytes_as_c_string(value) + '"'

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,841 @@
"""Code generation for native function bodies."""
from __future__ import annotations
from typing import Final
from mypyc.analysis.blockfreq import frequently_executed_blocks
from mypyc.codegen.emit import DEBUG_ERRORS, Emitter, TracebackAndGotoHandler, c_array_initializer
from mypyc.common import MODULE_PREFIX, NATIVE_PREFIX, REG_PREFIX, STATIC_PREFIX, TYPE_PREFIX
from mypyc.ir.class_ir import ClassIR
from mypyc.ir.func_ir import FUNC_CLASSMETHOD, FUNC_STATICMETHOD, FuncDecl, FuncIR, all_values
from mypyc.ir.ops import (
ERR_FALSE,
NAMESPACE_MODULE,
NAMESPACE_STATIC,
NAMESPACE_TYPE,
Assign,
AssignMulti,
BasicBlock,
Box,
Branch,
Call,
CallC,
Cast,
ComparisonOp,
ControlOp,
DecRef,
Extend,
Float,
FloatComparisonOp,
FloatNeg,
FloatOp,
GetAttr,
GetElementPtr,
Goto,
IncRef,
InitStatic,
Integer,
IntOp,
KeepAlive,
LoadAddress,
LoadErrorValue,
LoadGlobal,
LoadLiteral,
LoadMem,
LoadStatic,
MethodCall,
Op,
OpVisitor,
RaiseStandardError,
Register,
Return,
SetAttr,
SetMem,
Truncate,
TupleGet,
TupleSet,
Unbox,
Unreachable,
Value,
)
from mypyc.ir.pprint import generate_names_for_ir
from mypyc.ir.rtypes import (
RArray,
RStruct,
RTuple,
RType,
is_int32_rprimitive,
is_int64_rprimitive,
is_int_rprimitive,
is_pointer_rprimitive,
is_tagged,
)
def native_function_type(fn: FuncIR, emitter: Emitter) -> str:
args = ", ".join(emitter.ctype(arg.type) for arg in fn.args) or "void"
ret = emitter.ctype(fn.ret_type)
return f"{ret} (*)({args})"
def native_function_header(fn: FuncDecl, emitter: Emitter) -> str:
args = []
for arg in fn.sig.args:
args.append(f"{emitter.ctype_spaced(arg.type)}{REG_PREFIX}{arg.name}")
return "{ret_type}{name}({args})".format(
ret_type=emitter.ctype_spaced(fn.sig.ret_type),
name=emitter.native_function_name(fn),
args=", ".join(args) or "void",
)
def generate_native_function(
fn: FuncIR, emitter: Emitter, source_path: str, module_name: str
) -> None:
declarations = Emitter(emitter.context)
names = generate_names_for_ir(fn.arg_regs, fn.blocks)
body = Emitter(emitter.context, names)
visitor = FunctionEmitterVisitor(body, declarations, source_path, module_name)
declarations.emit_line(f"{native_function_header(fn.decl, emitter)} {{")
body.indent()
for r in all_values(fn.arg_regs, fn.blocks):
if isinstance(r.type, RTuple):
emitter.declare_tuple_struct(r.type)
if isinstance(r.type, RArray):
continue # Special: declared on first assignment
if r in fn.arg_regs:
continue # Skip the arguments
ctype = emitter.ctype_spaced(r.type)
init = ""
declarations.emit_line(
"{ctype}{prefix}{name}{init};".format(
ctype=ctype, prefix=REG_PREFIX, name=names[r], init=init
)
)
# Before we emit the blocks, give them all labels
blocks = fn.blocks
for i, block in enumerate(blocks):
block.label = i
# Find blocks that are never jumped to or are only jumped to from the
# block directly above it. This allows for more labels and gotos to be
# eliminated during code generation.
for block in fn.blocks:
terminator = block.terminator
assert isinstance(terminator, ControlOp)
for target in terminator.targets():
is_next_block = target.label == block.label + 1
# Always emit labels for GetAttr error checks since the emit code that
# generates them will add instructions between the branch and the
# next label, causing the label to be wrongly removed. A better
# solution would be to change the IR so that it adds a basic block
# inbetween the calls.
is_problematic_op = isinstance(terminator, Branch) and any(
isinstance(s, GetAttr) for s in terminator.sources()
)
if not is_next_block or is_problematic_op:
fn.blocks[target.label].referenced = True
common = frequently_executed_blocks(fn.blocks[0])
for i in range(len(blocks)):
block = blocks[i]
visitor.rare = block not in common
next_block = None
if i + 1 < len(blocks):
next_block = blocks[i + 1]
body.emit_label(block)
visitor.next_block = next_block
ops = block.ops
visitor.ops = ops
visitor.op_index = 0
while visitor.op_index < len(ops):
ops[visitor.op_index].accept(visitor)
visitor.op_index += 1
body.emit_line("}")
emitter.emit_from_emitter(declarations)
emitter.emit_from_emitter(body)
class FunctionEmitterVisitor(OpVisitor[None]):
def __init__(
self, emitter: Emitter, declarations: Emitter, source_path: str, module_name: str
) -> None:
self.emitter = emitter
self.names = emitter.names
self.declarations = declarations
self.source_path = source_path
self.module_name = module_name
self.literals = emitter.context.literals
self.rare = False
# Next basic block to be processed after the current one (if any), set by caller
self.next_block: BasicBlock | None = None
# Ops in the basic block currently being processed, set by caller
self.ops: list[Op] = []
# Current index within ops; visit methods can increment this to skip/merge ops
self.op_index = 0
def temp_name(self) -> str:
return self.emitter.temp_name()
def visit_goto(self, op: Goto) -> None:
if op.label is not self.next_block:
self.emit_line("goto %s;" % self.label(op.label))
def visit_branch(self, op: Branch) -> None:
true, false = op.true, op.false
negated = op.negated
negated_rare = False
if true is self.next_block and op.traceback_entry is None:
# Switch true/false since it avoids an else block.
true, false = false, true
negated = not negated
negated_rare = True
neg = "!" if negated else ""
cond = ""
if op.op == Branch.BOOL:
expr_result = self.reg(op.value)
cond = f"{neg}{expr_result}"
elif op.op == Branch.IS_ERROR:
typ = op.value.type
compare = "!=" if negated else "=="
if isinstance(typ, RTuple):
# TODO: What about empty tuple?
cond = self.emitter.tuple_undefined_check_cond(
typ, self.reg(op.value), self.c_error_value, compare
)
else:
cond = f"{self.reg(op.value)} {compare} {self.c_error_value(typ)}"
else:
assert False, "Invalid branch"
# For error checks, tell the compiler the branch is unlikely
if op.traceback_entry is not None or op.rare:
if not negated_rare:
cond = f"unlikely({cond})"
else:
cond = f"likely({cond})"
if false is self.next_block:
if op.traceback_entry is None:
if true is not self.next_block:
self.emit_line(f"if ({cond}) goto {self.label(true)};")
else:
self.emit_line(f"if ({cond}) {{")
self.emit_traceback(op)
self.emit_lines("goto %s;" % self.label(true), "}")
else:
self.emit_line(f"if ({cond}) {{")
self.emit_traceback(op)
if true is not self.next_block:
self.emit_line("goto %s;" % self.label(true))
self.emit_lines("} else", " goto %s;" % self.label(false))
def visit_return(self, op: Return) -> None:
value_str = self.reg(op.value)
self.emit_line("return %s;" % value_str)
def visit_tuple_set(self, op: TupleSet) -> None:
dest = self.reg(op)
tuple_type = op.tuple_type
self.emitter.declare_tuple_struct(tuple_type)
if len(op.items) == 0: # empty tuple
self.emit_line(f"{dest}.empty_struct_error_flag = 0;")
else:
for i, item in enumerate(op.items):
self.emit_line(f"{dest}.f{i} = {self.reg(item)};")
self.emit_inc_ref(dest, tuple_type)
def visit_assign(self, op: Assign) -> None:
dest = self.reg(op.dest)
src = self.reg(op.src)
# clang whines about self assignment (which we might generate
# for some casts), so don't emit it.
if dest != src:
# We sometimes assign from an integer prepresentation of a pointer
# to a real pointer, and C compilers insist on a cast.
if op.src.type.is_unboxed and not op.dest.type.is_unboxed:
src = f"(void *){src}"
self.emit_line(f"{dest} = {src};")
def visit_assign_multi(self, op: AssignMulti) -> None:
typ = op.dest.type
assert isinstance(typ, RArray)
dest = self.reg(op.dest)
# RArray values can only be assigned to once, so we can always
# declare them on initialization.
self.emit_line(
"%s%s[%d] = %s;"
% (
self.emitter.ctype_spaced(typ.item_type),
dest,
len(op.src),
c_array_initializer([self.reg(s) for s in op.src], indented=True),
)
)
def visit_load_error_value(self, op: LoadErrorValue) -> None:
if isinstance(op.type, RTuple):
values = [self.c_undefined_value(item) for item in op.type.types]
tmp = self.temp_name()
self.emit_line("{} {} = {{ {} }};".format(self.ctype(op.type), tmp, ", ".join(values)))
self.emit_line(f"{self.reg(op)} = {tmp};")
else:
self.emit_line(f"{self.reg(op)} = {self.c_error_value(op.type)};")
def visit_load_literal(self, op: LoadLiteral) -> None:
index = self.literals.literal_index(op.value)
if not is_int_rprimitive(op.type):
self.emit_line("%s = CPyStatics[%d];" % (self.reg(op), index), ann=op.value)
else:
self.emit_line(
"%s = (CPyTagged)CPyStatics[%d] | 1;" % (self.reg(op), index), ann=op.value
)
def get_attr_expr(self, obj: str, op: GetAttr | SetAttr, decl_cl: ClassIR) -> str:
"""Generate attribute accessor for normal (non-property) access.
This either has a form like obj->attr_name for attributes defined in non-trait
classes, and *(obj + attr_offset) for attributes defined by traits. We also
insert all necessary C casts here.
"""
cast = f"({op.class_type.struct_name(self.emitter.names)} *)"
if decl_cl.is_trait and op.class_type.class_ir.is_trait:
# For pure trait access find the offset first, offsets
# are ordered by attribute position in the cl.attributes dict.
# TODO: pre-calculate the mapping to make this faster.
trait_attr_index = list(decl_cl.attributes).index(op.attr)
# TODO: reuse these names somehow?
offset = self.emitter.temp_name()
self.declarations.emit_line(f"size_t {offset};")
self.emitter.emit_line(
"{} = {};".format(
offset,
"CPy_FindAttrOffset({}, {}, {})".format(
self.emitter.type_struct_name(decl_cl),
f"({cast}{obj})->vtable",
trait_attr_index,
),
)
)
attr_cast = f"({self.ctype(op.class_type.attr_type(op.attr))} *)"
return f"*{attr_cast}((char *){obj} + {offset})"
else:
# Cast to something non-trait. Note: for this to work, all struct
# members for non-trait classes must obey monotonic linear growth.
if op.class_type.class_ir.is_trait:
assert not decl_cl.is_trait
cast = f"({decl_cl.struct_name(self.emitter.names)} *)"
return f"({cast}{obj})->{self.emitter.attr(op.attr)}"
def visit_get_attr(self, op: GetAttr) -> None:
dest = self.reg(op)
obj = self.reg(op.obj)
rtype = op.class_type
cl = rtype.class_ir
attr_rtype, decl_cl = cl.attr_details(op.attr)
prefer_method = cl.is_trait and attr_rtype.error_overlap
if cl.get_method(op.attr, prefer_method=prefer_method):
# Properties are essentially methods, so use vtable access for them.
version = "_TRAIT" if cl.is_trait else ""
self.emit_line(
"%s = CPY_GET_ATTR%s(%s, %s, %d, %s, %s); /* %s */"
% (
dest,
version,
obj,
self.emitter.type_struct_name(rtype.class_ir),
rtype.getter_index(op.attr),
rtype.struct_name(self.names),
self.ctype(rtype.attr_type(op.attr)),
op.attr,
)
)
else:
# Otherwise, use direct or offset struct access.
attr_expr = self.get_attr_expr(obj, op, decl_cl)
self.emitter.emit_line(f"{dest} = {attr_expr};")
always_defined = cl.is_always_defined(op.attr)
merged_branch = None
if not always_defined:
self.emitter.emit_undefined_attr_check(
attr_rtype, dest, "==", obj, op.attr, cl, unlikely=True
)
branch = self.next_branch()
if branch is not None:
if (
branch.value is op
and branch.op == Branch.IS_ERROR
and branch.traceback_entry is not None
and not branch.negated
):
# Generate code for the following branch here to avoid
# redundant branches in the generated code.
self.emit_attribute_error(branch, cl.name, op.attr)
self.emit_line("goto %s;" % self.label(branch.true))
merged_branch = branch
self.emitter.emit_line("}")
if not merged_branch:
exc_class = "PyExc_AttributeError"
self.emitter.emit_line(
'PyErr_SetString({}, "attribute {} of {} undefined");'.format(
exc_class, repr(op.attr), repr(cl.name)
)
)
if attr_rtype.is_refcounted and not op.is_borrowed:
if not merged_branch and not always_defined:
self.emitter.emit_line("} else {")
self.emitter.emit_inc_ref(dest, attr_rtype)
if merged_branch:
if merged_branch.false is not self.next_block:
self.emit_line("goto %s;" % self.label(merged_branch.false))
self.op_index += 1
elif not always_defined:
self.emitter.emit_line("}")
def next_branch(self) -> Branch | None:
if self.op_index + 1 < len(self.ops):
next_op = self.ops[self.op_index + 1]
if isinstance(next_op, Branch):
return next_op
return None
def visit_set_attr(self, op: SetAttr) -> None:
if op.error_kind == ERR_FALSE:
dest = self.reg(op)
obj = self.reg(op.obj)
src = self.reg(op.src)
rtype = op.class_type
cl = rtype.class_ir
attr_rtype, decl_cl = cl.attr_details(op.attr)
if cl.get_method(op.attr):
# Again, use vtable access for properties...
assert not op.is_init and op.error_kind == ERR_FALSE, "%s %d %d %s" % (
op.attr,
op.is_init,
op.error_kind,
rtype,
)
version = "_TRAIT" if cl.is_trait else ""
self.emit_line(
"%s = CPY_SET_ATTR%s(%s, %s, %d, %s, %s, %s); /* %s */"
% (
dest,
version,
obj,
self.emitter.type_struct_name(rtype.class_ir),
rtype.setter_index(op.attr),
src,
rtype.struct_name(self.names),
self.ctype(rtype.attr_type(op.attr)),
op.attr,
)
)
else:
# ...and struct access for normal attributes.
attr_expr = self.get_attr_expr(obj, op, decl_cl)
if not op.is_init and attr_rtype.is_refcounted:
# This is not an initialization (where we know that the attribute was
# previously undefined), so decref the old value.
always_defined = cl.is_always_defined(op.attr)
if not always_defined:
self.emitter.emit_undefined_attr_check(
attr_rtype, attr_expr, "!=", obj, op.attr, cl
)
self.emitter.emit_dec_ref(attr_expr, attr_rtype)
if not always_defined:
self.emitter.emit_line("}")
elif attr_rtype.error_overlap and not cl.is_always_defined(op.attr):
# If there is overlap with the error value, update bitmap to mark
# attribute as defined.
self.emitter.emit_attr_bitmap_set(src, obj, attr_rtype, cl, op.attr)
# This steals the reference to src, so we don't need to increment the arg
self.emitter.emit_line(f"{attr_expr} = {src};")
if op.error_kind == ERR_FALSE:
self.emitter.emit_line(f"{dest} = 1;")
PREFIX_MAP: Final = {
NAMESPACE_STATIC: STATIC_PREFIX,
NAMESPACE_TYPE: TYPE_PREFIX,
NAMESPACE_MODULE: MODULE_PREFIX,
}
def visit_load_static(self, op: LoadStatic) -> None:
dest = self.reg(op)
prefix = self.PREFIX_MAP[op.namespace]
name = self.emitter.static_name(op.identifier, op.module_name, prefix)
if op.namespace == NAMESPACE_TYPE:
name = "(PyObject *)%s" % name
self.emit_line(f"{dest} = {name};", ann=op.ann)
def visit_init_static(self, op: InitStatic) -> None:
value = self.reg(op.value)
prefix = self.PREFIX_MAP[op.namespace]
name = self.emitter.static_name(op.identifier, op.module_name, prefix)
if op.namespace == NAMESPACE_TYPE:
value = "(PyTypeObject *)%s" % value
self.emit_line(f"{name} = {value};")
self.emit_inc_ref(name, op.value.type)
def visit_tuple_get(self, op: TupleGet) -> None:
dest = self.reg(op)
src = self.reg(op.src)
self.emit_line(f"{dest} = {src}.f{op.index};")
self.emit_inc_ref(dest, op.type)
def get_dest_assign(self, dest: Value) -> str:
if not dest.is_void:
return self.reg(dest) + " = "
else:
return ""
def visit_call(self, op: Call) -> None:
"""Call native function."""
dest = self.get_dest_assign(op)
args = ", ".join(self.reg(arg) for arg in op.args)
lib = self.emitter.get_group_prefix(op.fn)
cname = op.fn.cname(self.names)
self.emit_line(f"{dest}{lib}{NATIVE_PREFIX}{cname}({args});")
def visit_method_call(self, op: MethodCall) -> None:
"""Call native method."""
dest = self.get_dest_assign(op)
obj = self.reg(op.obj)
rtype = op.receiver_type
class_ir = rtype.class_ir
name = op.method
method = rtype.class_ir.get_method(name)
assert method is not None
# Can we call the method directly, bypassing vtable?
is_direct = class_ir.is_method_final(name)
# The first argument gets omitted for static methods and
# turned into the class for class methods
obj_args = (
[]
if method.decl.kind == FUNC_STATICMETHOD
else [f"(PyObject *)Py_TYPE({obj})"]
if method.decl.kind == FUNC_CLASSMETHOD
else [obj]
)
args = ", ".join(obj_args + [self.reg(arg) for arg in op.args])
mtype = native_function_type(method, self.emitter)
version = "_TRAIT" if rtype.class_ir.is_trait else ""
if is_direct:
# Directly call method, without going through the vtable.
lib = self.emitter.get_group_prefix(method.decl)
self.emit_line(f"{dest}{lib}{NATIVE_PREFIX}{method.cname(self.names)}({args});")
else:
# Call using vtable.
method_idx = rtype.method_index(name)
self.emit_line(
"{}CPY_GET_METHOD{}({}, {}, {}, {}, {})({}); /* {} */".format(
dest,
version,
obj,
self.emitter.type_struct_name(rtype.class_ir),
method_idx,
rtype.struct_name(self.names),
mtype,
args,
op.method,
)
)
def visit_inc_ref(self, op: IncRef) -> None:
src = self.reg(op.src)
self.emit_inc_ref(src, op.src.type)
def visit_dec_ref(self, op: DecRef) -> None:
src = self.reg(op.src)
self.emit_dec_ref(src, op.src.type, is_xdec=op.is_xdec)
def visit_box(self, op: Box) -> None:
self.emitter.emit_box(self.reg(op.src), self.reg(op), op.src.type, can_borrow=True)
def visit_cast(self, op: Cast) -> None:
branch = self.next_branch()
handler = None
if branch is not None:
if (
branch.value is op
and branch.op == Branch.IS_ERROR
and branch.traceback_entry is not None
and not branch.negated
and branch.false is self.next_block
):
# Generate code also for the following branch here to avoid
# redundant branches in the generated code.
handler = TracebackAndGotoHandler(
self.label(branch.true),
self.source_path,
self.module_name,
branch.traceback_entry,
)
self.op_index += 1
self.emitter.emit_cast(
self.reg(op.src), self.reg(op), op.type, src_type=op.src.type, error=handler
)
def visit_unbox(self, op: Unbox) -> None:
self.emitter.emit_unbox(self.reg(op.src), self.reg(op), op.type)
def visit_unreachable(self, op: Unreachable) -> None:
self.emitter.emit_line("CPy_Unreachable();")
def visit_raise_standard_error(self, op: RaiseStandardError) -> None:
# TODO: Better escaping of backspaces and such
if op.value is not None:
if isinstance(op.value, str):
message = op.value.replace('"', '\\"')
self.emitter.emit_line(f'PyErr_SetString(PyExc_{op.class_name}, "{message}");')
elif isinstance(op.value, Value):
self.emitter.emit_line(
"PyErr_SetObject(PyExc_{}, {});".format(
op.class_name, self.emitter.reg(op.value)
)
)
else:
assert False, "op value type must be either str or Value"
else:
self.emitter.emit_line(f"PyErr_SetNone(PyExc_{op.class_name});")
self.emitter.emit_line(f"{self.reg(op)} = 0;")
def visit_call_c(self, op: CallC) -> None:
if op.is_void:
dest = ""
else:
dest = self.get_dest_assign(op)
args = ", ".join(self.reg(arg) for arg in op.args)
self.emitter.emit_line(f"{dest}{op.function_name}({args});")
def visit_truncate(self, op: Truncate) -> None:
dest = self.reg(op)
value = self.reg(op.src)
# for C backend the generated code are straight assignments
self.emit_line(f"{dest} = {value};")
def visit_extend(self, op: Extend) -> None:
dest = self.reg(op)
value = self.reg(op.src)
if op.signed:
src_cast = self.emit_signed_int_cast(op.src.type)
else:
src_cast = self.emit_unsigned_int_cast(op.src.type)
self.emit_line(f"{dest} = {src_cast}{value};")
def visit_load_global(self, op: LoadGlobal) -> None:
dest = self.reg(op)
self.emit_line(f"{dest} = {op.identifier};", ann=op.ann)
def visit_int_op(self, op: IntOp) -> None:
dest = self.reg(op)
lhs = self.reg(op.lhs)
rhs = self.reg(op.rhs)
if op.op == IntOp.RIGHT_SHIFT:
# Signed right shift
lhs = self.emit_signed_int_cast(op.lhs.type) + lhs
rhs = self.emit_signed_int_cast(op.rhs.type) + rhs
self.emit_line(f"{dest} = {lhs} {op.op_str[op.op]} {rhs};")
def visit_comparison_op(self, op: ComparisonOp) -> None:
dest = self.reg(op)
lhs = self.reg(op.lhs)
rhs = self.reg(op.rhs)
lhs_cast = ""
rhs_cast = ""
if op.op in (ComparisonOp.SLT, ComparisonOp.SGT, ComparisonOp.SLE, ComparisonOp.SGE):
# Always signed comparison op
lhs_cast = self.emit_signed_int_cast(op.lhs.type)
rhs_cast = self.emit_signed_int_cast(op.rhs.type)
elif op.op in (ComparisonOp.ULT, ComparisonOp.UGT, ComparisonOp.ULE, ComparisonOp.UGE):
# Always unsigned comparison op
lhs_cast = self.emit_unsigned_int_cast(op.lhs.type)
rhs_cast = self.emit_unsigned_int_cast(op.rhs.type)
elif isinstance(op.lhs, Integer) and op.lhs.value < 0:
# Force signed ==/!= with negative operand
rhs_cast = self.emit_signed_int_cast(op.rhs.type)
elif isinstance(op.rhs, Integer) and op.rhs.value < 0:
# Force signed ==/!= with negative operand
lhs_cast = self.emit_signed_int_cast(op.lhs.type)
self.emit_line(f"{dest} = {lhs_cast}{lhs} {op.op_str[op.op]} {rhs_cast}{rhs};")
def visit_float_op(self, op: FloatOp) -> None:
dest = self.reg(op)
lhs = self.reg(op.lhs)
rhs = self.reg(op.rhs)
if op.op != FloatOp.MOD:
self.emit_line(f"{dest} = {lhs} {op.op_str[op.op]} {rhs};")
else:
# TODO: This may set errno as a side effect, that is a little sketchy.
self.emit_line(f"{dest} = fmod({lhs}, {rhs});")
def visit_float_neg(self, op: FloatNeg) -> None:
dest = self.reg(op)
src = self.reg(op.src)
self.emit_line(f"{dest} = -{src};")
def visit_float_comparison_op(self, op: FloatComparisonOp) -> None:
dest = self.reg(op)
lhs = self.reg(op.lhs)
rhs = self.reg(op.rhs)
self.emit_line(f"{dest} = {lhs} {op.op_str[op.op]} {rhs};")
def visit_load_mem(self, op: LoadMem) -> None:
dest = self.reg(op)
src = self.reg(op.src)
# TODO: we shouldn't dereference to type that are pointer type so far
type = self.ctype(op.type)
self.emit_line(f"{dest} = *({type} *){src};")
def visit_set_mem(self, op: SetMem) -> None:
dest = self.reg(op.dest)
src = self.reg(op.src)
dest_type = self.ctype(op.dest_type)
# clang whines about self assignment (which we might generate
# for some casts), so don't emit it.
if dest != src:
self.emit_line(f"*({dest_type} *){dest} = {src};")
def visit_get_element_ptr(self, op: GetElementPtr) -> None:
dest = self.reg(op)
src = self.reg(op.src)
# TODO: support tuple type
assert isinstance(op.src_type, RStruct)
assert op.field in op.src_type.names, "Invalid field name."
self.emit_line(
"{} = ({})&(({} *){})->{};".format(
dest, op.type._ctype, op.src_type.name, src, op.field
)
)
def visit_load_address(self, op: LoadAddress) -> None:
typ = op.type
dest = self.reg(op)
if isinstance(op.src, Register):
src = self.reg(op.src)
elif isinstance(op.src, LoadStatic):
prefix = self.PREFIX_MAP[op.src.namespace]
src = self.emitter.static_name(op.src.identifier, op.src.module_name, prefix)
else:
src = op.src
self.emit_line(f"{dest} = ({typ._ctype})&{src};")
def visit_keep_alive(self, op: KeepAlive) -> None:
# This is a no-op.
pass
# Helpers
def label(self, label: BasicBlock) -> str:
return self.emitter.label(label)
def reg(self, reg: Value) -> str:
if isinstance(reg, Integer):
val = reg.value
if val == 0 and is_pointer_rprimitive(reg.type):
return "NULL"
s = str(val)
if val >= (1 << 31):
# Avoid overflowing signed 32-bit int
if val >= (1 << 63):
s += "ULL"
else:
s += "LL"
elif val == -(1 << 63):
# Avoid overflowing C integer literal
s = "(-9223372036854775807LL - 1)"
elif val <= -(1 << 31):
s += "LL"
return s
elif isinstance(reg, Float):
r = repr(reg.value)
if r == "inf":
return "INFINITY"
elif r == "-inf":
return "-INFINITY"
elif r == "nan":
return "NAN"
return r
else:
return self.emitter.reg(reg)
def ctype(self, rtype: RType) -> str:
return self.emitter.ctype(rtype)
def c_error_value(self, rtype: RType) -> str:
return self.emitter.c_error_value(rtype)
def c_undefined_value(self, rtype: RType) -> str:
return self.emitter.c_undefined_value(rtype)
def emit_line(self, line: str, *, ann: object = None) -> None:
self.emitter.emit_line(line, ann=ann)
def emit_lines(self, *lines: str) -> None:
self.emitter.emit_lines(*lines)
def emit_inc_ref(self, dest: str, rtype: RType) -> None:
self.emitter.emit_inc_ref(dest, rtype, rare=self.rare)
def emit_dec_ref(self, dest: str, rtype: RType, is_xdec: bool) -> None:
self.emitter.emit_dec_ref(dest, rtype, is_xdec=is_xdec, rare=self.rare)
def emit_declaration(self, line: str) -> None:
self.declarations.emit_line(line)
def emit_traceback(self, op: Branch) -> None:
if op.traceback_entry is not None:
self.emitter.emit_traceback(self.source_path, self.module_name, op.traceback_entry)
def emit_attribute_error(self, op: Branch, class_name: str, attr: str) -> None:
assert op.traceback_entry is not None
globals_static = self.emitter.static_name("globals", self.module_name)
self.emit_line(
'CPy_AttributeError("%s", "%s", "%s", "%s", %d, %s);'
% (
self.source_path.replace("\\", "\\\\"),
op.traceback_entry[0],
class_name,
attr,
op.traceback_entry[1],
globals_static,
)
)
if DEBUG_ERRORS:
self.emit_line('assert(PyErr_Occurred() != NULL && "failure w/o err!");')
def emit_signed_int_cast(self, type: RType) -> str:
if is_tagged(type):
return "(Py_ssize_t)"
else:
return ""
def emit_unsigned_int_cast(self, type: RType) -> str:
if is_int32_rprimitive(type):
return "(uint32_t)"
elif is_int64_rprimitive(type):
return "(uint64_t)"
else:
return ""

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,979 @@
"""Generate CPython API wrapper functions for native functions.
The wrapper functions are used by the CPython runtime when calling
native functions from interpreted code, and when the called function
can't be determined statically in compiled code. They validate, match,
unbox and type check function arguments, and box return values as
needed. All wrappers accept and return 'PyObject *' (boxed) values.
The wrappers aren't used for most calls between two native functions
or methods in a single compilation unit.
"""
from __future__ import annotations
from typing import Sequence
from mypy.nodes import ARG_NAMED, ARG_NAMED_OPT, ARG_OPT, ARG_POS, ARG_STAR, ARG_STAR2, ArgKind
from mypy.operators import op_methods_to_symbols, reverse_op_method_names, reverse_op_methods
from mypyc.codegen.emit import AssignHandler, Emitter, ErrorHandler, GotoHandler, ReturnHandler
from mypyc.common import (
BITMAP_BITS,
BITMAP_TYPE,
DUNDER_PREFIX,
NATIVE_PREFIX,
PREFIX,
bitmap_name,
use_vectorcall,
)
from mypyc.ir.class_ir import ClassIR
from mypyc.ir.func_ir import FUNC_STATICMETHOD, FuncIR, RuntimeArg
from mypyc.ir.rtypes import (
RInstance,
RType,
is_bool_rprimitive,
is_int_rprimitive,
is_object_rprimitive,
object_rprimitive,
)
from mypyc.namegen import NameGenerator
# Generic vectorcall wrapper functions (Python 3.7+)
#
# A wrapper function has a signature like this:
#
# PyObject *fn(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
#
# The function takes a self object, pointer to an array of arguments,
# the number of positional arguments, and a tuple of keyword argument
# names (that are stored starting in args[nargs]).
#
# It returns the returned object, or NULL on an exception.
#
# These are more efficient than legacy wrapper functions, since
# usually no tuple or dict objects need to be created for the
# arguments. Vectorcalls also use pre-constructed str objects for
# keyword argument names and other pre-computed information, instead
# of processing the argument format string on each call.
def wrapper_function_header(fn: FuncIR, names: NameGenerator) -> str:
"""Return header of a vectorcall wrapper function.
See comment above for a summary of the arguments.
"""
return (
"PyObject *{prefix}{name}("
"PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwnames)"
).format(prefix=PREFIX, name=fn.cname(names))
def generate_traceback_code(
fn: FuncIR, emitter: Emitter, source_path: str, module_name: str
) -> str:
# If we hit an error while processing arguments, then we emit a
# traceback frame to make it possible to debug where it happened.
# Unlike traceback frames added for exceptions seen in IR, we do this
# even if there is no `traceback_name`. This is because the error will
# have originated here and so we need it in the traceback.
globals_static = emitter.static_name("globals", module_name)
traceback_code = 'CPy_AddTraceback("%s", "%s", %d, %s);' % (
source_path.replace("\\", "\\\\"),
fn.traceback_name or fn.name,
fn.line,
globals_static,
)
return traceback_code
def make_arg_groups(args: list[RuntimeArg]) -> dict[ArgKind, list[RuntimeArg]]:
"""Group arguments by kind."""
return {k: [arg for arg in args if arg.kind == k] for k in ArgKind}
def reorder_arg_groups(groups: dict[ArgKind, list[RuntimeArg]]) -> list[RuntimeArg]:
"""Reorder argument groups to match their order in a format string."""
return groups[ARG_POS] + groups[ARG_OPT] + groups[ARG_NAMED_OPT] + groups[ARG_NAMED]
def make_static_kwlist(args: list[RuntimeArg]) -> str:
arg_names = "".join(f'"{arg.name}", ' for arg in args)
return f"static const char * const kwlist[] = {{{arg_names}0}};"
def make_format_string(func_name: str | None, groups: dict[ArgKind, list[RuntimeArg]]) -> str:
"""Return a format string that specifies the accepted arguments.
The format string is an extended subset of what is supported by
PyArg_ParseTupleAndKeywords(). Only the type 'O' is used, and we
also support some extensions:
- Required keyword-only arguments are introduced after '@'
- If the function receives *args or **kwargs, we add a '%' prefix
Each group requires the previous groups' delimiters to be present
first.
These are used by both vectorcall and legacy wrapper functions.
"""
format = ""
if groups[ARG_STAR] or groups[ARG_STAR2]:
format += "%"
format += "O" * len(groups[ARG_POS])
if groups[ARG_OPT] or groups[ARG_NAMED_OPT] or groups[ARG_NAMED]:
format += "|" + "O" * len(groups[ARG_OPT])
if groups[ARG_NAMED_OPT] or groups[ARG_NAMED]:
format += "$" + "O" * len(groups[ARG_NAMED_OPT])
if groups[ARG_NAMED]:
format += "@" + "O" * len(groups[ARG_NAMED])
if func_name is not None:
format += f":{func_name}"
return format
def generate_wrapper_function(
fn: FuncIR, emitter: Emitter, source_path: str, module_name: str
) -> None:
"""Generate a CPython-compatible vectorcall wrapper for a native function.
In particular, this handles unboxing the arguments, calling the native function, and
then boxing the return value.
"""
emitter.emit_line(f"{wrapper_function_header(fn, emitter.names)} {{")
# If fn is a method, then the first argument is a self param
real_args = list(fn.args)
if fn.sig.num_bitmap_args:
real_args = real_args[: -fn.sig.num_bitmap_args]
if fn.class_name and not fn.decl.kind == FUNC_STATICMETHOD:
arg = real_args.pop(0)
emitter.emit_line(f"PyObject *obj_{arg.name} = self;")
# Need to order args as: required, optional, kwonly optional, kwonly required
# This is because CPyArg_ParseStackAndKeywords format string requires
# them grouped in that way.
groups = make_arg_groups(real_args)
reordered_args = reorder_arg_groups(groups)
emitter.emit_line(make_static_kwlist(reordered_args))
fmt = make_format_string(fn.name, groups)
# Define the arguments the function accepts (but no types yet)
emitter.emit_line(f'static CPyArg_Parser parser = {{"{fmt}", kwlist, 0}};')
for arg in real_args:
emitter.emit_line(
"PyObject *obj_{}{};".format(arg.name, " = NULL" if arg.optional else "")
)
cleanups = [f"CPy_DECREF(obj_{arg.name});" for arg in groups[ARG_STAR] + groups[ARG_STAR2]]
arg_ptrs: list[str] = []
if groups[ARG_STAR] or groups[ARG_STAR2]:
arg_ptrs += [f"&obj_{groups[ARG_STAR][0].name}" if groups[ARG_STAR] else "NULL"]
arg_ptrs += [f"&obj_{groups[ARG_STAR2][0].name}" if groups[ARG_STAR2] else "NULL"]
arg_ptrs += [f"&obj_{arg.name}" for arg in reordered_args]
if fn.name == "__call__" and use_vectorcall(emitter.capi_version):
nargs = "PyVectorcall_NARGS(nargs)"
else:
nargs = "nargs"
parse_fn = "CPyArg_ParseStackAndKeywords"
# Special case some common signatures
if not real_args:
# No args
parse_fn = "CPyArg_ParseStackAndKeywordsNoArgs"
elif len(real_args) == 1 and len(groups[ARG_POS]) == 1:
# Single positional arg
parse_fn = "CPyArg_ParseStackAndKeywordsOneArg"
elif len(real_args) == len(groups[ARG_POS]) + len(groups[ARG_OPT]):
# No keyword-only args, *args or **kwargs
parse_fn = "CPyArg_ParseStackAndKeywordsSimple"
emitter.emit_lines(
"if (!{}(args, {}, kwnames, &parser{})) {{".format(
parse_fn, nargs, "".join(", " + n for n in arg_ptrs)
),
"return NULL;",
"}",
)
for i in range(fn.sig.num_bitmap_args):
name = bitmap_name(i)
emitter.emit_line(f"{BITMAP_TYPE} {name} = 0;")
traceback_code = generate_traceback_code(fn, emitter, source_path, module_name)
generate_wrapper_core(
fn,
emitter,
groups[ARG_OPT] + groups[ARG_NAMED_OPT],
cleanups=cleanups,
traceback_code=traceback_code,
)
emitter.emit_line("}")
# Legacy generic wrapper functions
#
# These take a self object, a Python tuple of positional arguments,
# and a dict of keyword arguments. These are a lot slower than
# vectorcall wrappers, especially in calls involving keyword
# arguments.
def legacy_wrapper_function_header(fn: FuncIR, names: NameGenerator) -> str:
return "PyObject *{prefix}{name}(PyObject *self, PyObject *args, PyObject *kw)".format(
prefix=PREFIX, name=fn.cname(names)
)
def generate_legacy_wrapper_function(
fn: FuncIR, emitter: Emitter, source_path: str, module_name: str
) -> None:
"""Generates a CPython-compatible legacy wrapper for a native function.
In particular, this handles unboxing the arguments, calling the native function, and
then boxing the return value.
"""
emitter.emit_line(f"{legacy_wrapper_function_header(fn, emitter.names)} {{")
# If fn is a method, then the first argument is a self param
real_args = list(fn.args)
if fn.sig.num_bitmap_args:
real_args = real_args[: -fn.sig.num_bitmap_args]
if fn.class_name and not fn.decl.kind == FUNC_STATICMETHOD:
arg = real_args.pop(0)
emitter.emit_line(f"PyObject *obj_{arg.name} = self;")
# Need to order args as: required, optional, kwonly optional, kwonly required
# This is because CPyArg_ParseTupleAndKeywords format string requires
# them grouped in that way.
groups = make_arg_groups(real_args)
reordered_args = reorder_arg_groups(groups)
emitter.emit_line(make_static_kwlist(reordered_args))
for arg in real_args:
emitter.emit_line(
"PyObject *obj_{}{};".format(arg.name, " = NULL" if arg.optional else "")
)
cleanups = [f"CPy_DECREF(obj_{arg.name});" for arg in groups[ARG_STAR] + groups[ARG_STAR2]]
arg_ptrs: list[str] = []
if groups[ARG_STAR] or groups[ARG_STAR2]:
arg_ptrs += [f"&obj_{groups[ARG_STAR][0].name}" if groups[ARG_STAR] else "NULL"]
arg_ptrs += [f"&obj_{groups[ARG_STAR2][0].name}" if groups[ARG_STAR2] else "NULL"]
arg_ptrs += [f"&obj_{arg.name}" for arg in reordered_args]
emitter.emit_lines(
'if (!CPyArg_ParseTupleAndKeywords(args, kw, "{}", "{}", kwlist{})) {{'.format(
make_format_string(None, groups), fn.name, "".join(", " + n for n in arg_ptrs)
),
"return NULL;",
"}",
)
for i in range(fn.sig.num_bitmap_args):
name = bitmap_name(i)
emitter.emit_line(f"{BITMAP_TYPE} {name} = 0;")
traceback_code = generate_traceback_code(fn, emitter, source_path, module_name)
generate_wrapper_core(
fn,
emitter,
groups[ARG_OPT] + groups[ARG_NAMED_OPT],
cleanups=cleanups,
traceback_code=traceback_code,
)
emitter.emit_line("}")
# Specialized wrapper functions
def generate_dunder_wrapper(cl: ClassIR, fn: FuncIR, emitter: Emitter) -> str:
"""Generates a wrapper for native __dunder__ methods to be able to fit into the mapping
protocol slot. This specifically means that the arguments are taken as *PyObjects and returned
as *PyObjects.
"""
gen = WrapperGenerator(cl, emitter)
gen.set_target(fn)
gen.emit_header()
gen.emit_arg_processing()
gen.emit_call()
gen.finish()
return gen.wrapper_name()
def generate_ipow_wrapper(cl: ClassIR, fn: FuncIR, emitter: Emitter) -> str:
"""Generate a wrapper for native __ipow__.
Since __ipow__ fills a ternary slot, but almost no one defines __ipow__ to take three
arguments, the wrapper needs to tweaked to force it to accept three arguments.
"""
gen = WrapperGenerator(cl, emitter)
gen.set_target(fn)
assert len(fn.args) in (2, 3), "__ipow__ should only take 2 or 3 arguments"
gen.arg_names = ["self", "exp", "mod"]
gen.emit_header()
gen.emit_arg_processing()
handle_third_pow_argument(
fn,
emitter,
gen,
if_unsupported=[
'PyErr_SetString(PyExc_TypeError, "__ipow__ takes 2 positional arguments but 3 were given");',
"return NULL;",
],
)
gen.emit_call()
gen.finish()
return gen.wrapper_name()
def generate_bin_op_wrapper(cl: ClassIR, fn: FuncIR, emitter: Emitter) -> str:
"""Generates a wrapper for a native binary dunder method.
The same wrapper that handles the forward method (e.g. __add__) also handles
the corresponding reverse method (e.g. __radd__), if defined.
Both arguments and the return value are PyObject *.
"""
gen = WrapperGenerator(cl, emitter)
gen.set_target(fn)
if fn.name in ("__pow__", "__rpow__"):
gen.arg_names = ["left", "right", "mod"]
else:
gen.arg_names = ["left", "right"]
wrapper_name = gen.wrapper_name()
gen.emit_header()
if fn.name not in reverse_op_methods and fn.name in reverse_op_method_names:
# There's only a reverse operator method.
generate_bin_op_reverse_only_wrapper(fn, emitter, gen)
else:
rmethod = reverse_op_methods[fn.name]
fn_rev = cl.get_method(rmethod)
if fn_rev is None:
# There's only a forward operator method.
generate_bin_op_forward_only_wrapper(fn, emitter, gen)
else:
# There's both a forward and a reverse operator method.
generate_bin_op_both_wrappers(cl, fn, fn_rev, emitter, gen)
return wrapper_name
def generate_bin_op_forward_only_wrapper(
fn: FuncIR, emitter: Emitter, gen: WrapperGenerator
) -> None:
gen.emit_arg_processing(error=GotoHandler("typefail"), raise_exception=False)
handle_third_pow_argument(fn, emitter, gen, if_unsupported=["goto typefail;"])
gen.emit_call(not_implemented_handler="goto typefail;")
gen.emit_error_handling()
emitter.emit_label("typefail")
# If some argument has an incompatible type, treat this the same as
# returning NotImplemented, and try to call the reverse operator method.
#
# Note that in normal Python you'd instead of an explicit
# return of NotImplemented, but it doesn't generally work here
# the body won't be executed at all if there is an argument
# type check failure.
#
# The recommended way is to still use a type check in the
# body. This will only be used in interpreted mode:
#
# def __add__(self, other: int) -> Foo:
# if not isinstance(other, int):
# return NotImplemented
# ...
generate_bin_op_reverse_dunder_call(fn, emitter, reverse_op_methods[fn.name])
gen.finish()
def generate_bin_op_reverse_only_wrapper(
fn: FuncIR, emitter: Emitter, gen: WrapperGenerator
) -> None:
gen.arg_names = ["right", "left"]
gen.emit_arg_processing(error=GotoHandler("typefail"), raise_exception=False)
handle_third_pow_argument(fn, emitter, gen, if_unsupported=["goto typefail;"])
gen.emit_call()
gen.emit_error_handling()
emitter.emit_label("typefail")
emitter.emit_line("Py_INCREF(Py_NotImplemented);")
emitter.emit_line("return Py_NotImplemented;")
gen.finish()
def generate_bin_op_both_wrappers(
cl: ClassIR, fn: FuncIR, fn_rev: FuncIR, emitter: Emitter, gen: WrapperGenerator
) -> None:
# There's both a forward and a reverse operator method. First
# check if we should try calling the forward one. If the
# argument type check fails, fall back to the reverse method.
#
# Similar to above, we can't perfectly match Python semantics.
# In regular Python code you'd return NotImplemented if the
# operand has the wrong type, but in compiled code we'll never
# get to execute the type check.
emitter.emit_line(
"if (PyObject_IsInstance(obj_left, (PyObject *){})) {{".format(
emitter.type_struct_name(cl)
)
)
gen.emit_arg_processing(error=GotoHandler("typefail"), raise_exception=False)
handle_third_pow_argument(fn, emitter, gen, if_unsupported=["goto typefail2;"])
# Ternary __rpow__ calls aren't a thing so immediately bail
# if ternary __pow__ returns NotImplemented.
if fn.name == "__pow__" and len(fn.args) == 3:
fwd_not_implemented_handler = "goto typefail2;"
else:
fwd_not_implemented_handler = "goto typefail;"
gen.emit_call(not_implemented_handler=fwd_not_implemented_handler)
gen.emit_error_handling()
emitter.emit_line("}")
emitter.emit_label("typefail")
emitter.emit_line(
"if (PyObject_IsInstance(obj_right, (PyObject *){})) {{".format(
emitter.type_struct_name(cl)
)
)
gen.set_target(fn_rev)
gen.arg_names = ["right", "left"]
gen.emit_arg_processing(error=GotoHandler("typefail2"), raise_exception=False)
handle_third_pow_argument(fn_rev, emitter, gen, if_unsupported=["goto typefail2;"])
gen.emit_call()
gen.emit_error_handling()
emitter.emit_line("} else {")
generate_bin_op_reverse_dunder_call(fn, emitter, fn_rev.name)
emitter.emit_line("}")
emitter.emit_label("typefail2")
emitter.emit_line("Py_INCREF(Py_NotImplemented);")
emitter.emit_line("return Py_NotImplemented;")
gen.finish()
def generate_bin_op_reverse_dunder_call(fn: FuncIR, emitter: Emitter, rmethod: str) -> None:
if fn.name in ("__pow__", "__rpow__"):
# Ternary pow() will never call the reverse dunder.
emitter.emit_line("if (obj_mod == Py_None) {")
emitter.emit_line(f"_Py_IDENTIFIER({rmethod});")
emitter.emit_line(
'return CPy_CallReverseOpMethod(obj_left, obj_right, "{}", &PyId_{});'.format(
op_methods_to_symbols[fn.name], rmethod
)
)
if fn.name in ("__pow__", "__rpow__"):
emitter.emit_line("} else {")
emitter.emit_line("Py_INCREF(Py_NotImplemented);")
emitter.emit_line("return Py_NotImplemented;")
emitter.emit_line("}")
def handle_third_pow_argument(
fn: FuncIR, emitter: Emitter, gen: WrapperGenerator, *, if_unsupported: list[str]
) -> None:
if fn.name not in ("__pow__", "__rpow__", "__ipow__"):
return
if (fn.name in ("__pow__", "__ipow__") and len(fn.args) == 2) or fn.name == "__rpow__":
# If the power dunder only supports two arguments and the third
# argument (AKA mod) is set to a non-default value, simply bail.
#
# Importantly, this prevents any ternary __rpow__ calls from
# happening (as per the language specification).
emitter.emit_line("if (obj_mod != Py_None) {")
for line in if_unsupported:
emitter.emit_line(line)
emitter.emit_line("}")
# The slot wrapper will receive three arguments, but the call only
# supports two so make sure that the third argument isn't passed
# along. This is needed as two-argument __(i)pow__ is allowed and
# rather common.
if len(gen.arg_names) == 3:
gen.arg_names.pop()
RICHCOMPARE_OPS = {
"__lt__": "Py_LT",
"__gt__": "Py_GT",
"__le__": "Py_LE",
"__ge__": "Py_GE",
"__eq__": "Py_EQ",
"__ne__": "Py_NE",
}
def generate_richcompare_wrapper(cl: ClassIR, emitter: Emitter) -> str | None:
"""Generates a wrapper for richcompare dunder methods."""
# Sort for determinism on Python 3.5
matches = sorted(name for name in RICHCOMPARE_OPS if cl.has_method(name))
if not matches:
return None
name = f"{DUNDER_PREFIX}_RichCompare_{cl.name_prefix(emitter.names)}"
emitter.emit_line(
"static PyObject *{name}(PyObject *obj_lhs, PyObject *obj_rhs, int op) {{".format(
name=name
)
)
emitter.emit_line("switch (op) {")
for func in matches:
emitter.emit_line(f"case {RICHCOMPARE_OPS[func]}: {{")
method = cl.get_method(func)
assert method is not None
generate_wrapper_core(method, emitter, arg_names=["lhs", "rhs"])
emitter.emit_line("}")
emitter.emit_line("}")
emitter.emit_line("Py_INCREF(Py_NotImplemented);")
emitter.emit_line("return Py_NotImplemented;")
emitter.emit_line("}")
return name
def generate_get_wrapper(cl: ClassIR, fn: FuncIR, emitter: Emitter) -> str:
"""Generates a wrapper for native __get__ methods."""
name = f"{DUNDER_PREFIX}{fn.name}{cl.name_prefix(emitter.names)}"
emitter.emit_line(
"static PyObject *{name}(PyObject *self, PyObject *instance, PyObject *owner) {{".format(
name=name
)
)
emitter.emit_line("instance = instance ? instance : Py_None;")
emitter.emit_line(f"return {NATIVE_PREFIX}{fn.cname(emitter.names)}(self, instance, owner);")
emitter.emit_line("}")
return name
def generate_hash_wrapper(cl: ClassIR, fn: FuncIR, emitter: Emitter) -> str:
"""Generates a wrapper for native __hash__ methods."""
name = f"{DUNDER_PREFIX}{fn.name}{cl.name_prefix(emitter.names)}"
emitter.emit_line(f"static Py_ssize_t {name}(PyObject *self) {{")
emitter.emit_line(
"{}retval = {}{}{}(self);".format(
emitter.ctype_spaced(fn.ret_type),
emitter.get_group_prefix(fn.decl),
NATIVE_PREFIX,
fn.cname(emitter.names),
)
)
emitter.emit_error_check("retval", fn.ret_type, "return -1;")
if is_int_rprimitive(fn.ret_type):
emitter.emit_line("Py_ssize_t val = CPyTagged_AsSsize_t(retval);")
else:
emitter.emit_line("Py_ssize_t val = PyLong_AsSsize_t(retval);")
emitter.emit_dec_ref("retval", fn.ret_type)
emitter.emit_line("if (PyErr_Occurred()) return -1;")
# We can't return -1 from a hash function..
emitter.emit_line("if (val == -1) return -2;")
emitter.emit_line("return val;")
emitter.emit_line("}")
return name
def generate_len_wrapper(cl: ClassIR, fn: FuncIR, emitter: Emitter) -> str:
"""Generates a wrapper for native __len__ methods."""
name = f"{DUNDER_PREFIX}{fn.name}{cl.name_prefix(emitter.names)}"
emitter.emit_line(f"static Py_ssize_t {name}(PyObject *self) {{")
emitter.emit_line(
"{}retval = {}{}{}(self);".format(
emitter.ctype_spaced(fn.ret_type),
emitter.get_group_prefix(fn.decl),
NATIVE_PREFIX,
fn.cname(emitter.names),
)
)
emitter.emit_error_check("retval", fn.ret_type, "return -1;")
if is_int_rprimitive(fn.ret_type):
emitter.emit_line("Py_ssize_t val = CPyTagged_AsSsize_t(retval);")
else:
emitter.emit_line("Py_ssize_t val = PyLong_AsSsize_t(retval);")
emitter.emit_dec_ref("retval", fn.ret_type)
emitter.emit_line("if (PyErr_Occurred()) return -1;")
emitter.emit_line("return val;")
emitter.emit_line("}")
return name
def generate_bool_wrapper(cl: ClassIR, fn: FuncIR, emitter: Emitter) -> str:
"""Generates a wrapper for native __bool__ methods."""
name = f"{DUNDER_PREFIX}{fn.name}{cl.name_prefix(emitter.names)}"
emitter.emit_line(f"static int {name}(PyObject *self) {{")
emitter.emit_line(
"{}val = {}{}(self);".format(
emitter.ctype_spaced(fn.ret_type), NATIVE_PREFIX, fn.cname(emitter.names)
)
)
emitter.emit_error_check("val", fn.ret_type, "return -1;")
# This wouldn't be that hard to fix but it seems unimportant and
# getting error handling and unboxing right would be fiddly. (And
# way easier to do in IR!)
assert is_bool_rprimitive(fn.ret_type), "Only bool return supported for __bool__"
emitter.emit_line("return val;")
emitter.emit_line("}")
return name
def generate_del_item_wrapper(cl: ClassIR, fn: FuncIR, emitter: Emitter) -> str:
"""Generates a wrapper for native __delitem__.
This is only called from a combined __delitem__/__setitem__ wrapper.
"""
name = "{}{}{}".format(DUNDER_PREFIX, "__delitem__", cl.name_prefix(emitter.names))
input_args = ", ".join(f"PyObject *obj_{arg.name}" for arg in fn.args)
emitter.emit_line(f"static int {name}({input_args}) {{")
generate_set_del_item_wrapper_inner(fn, emitter, fn.args)
return name
def generate_set_del_item_wrapper(cl: ClassIR, fn: FuncIR, emitter: Emitter) -> str:
"""Generates a wrapper for native __setitem__ method (also works for __delitem__).
This is used with the mapping protocol slot. Arguments are taken as *PyObjects and we
return a negative C int on error.
Create a separate wrapper function for __delitem__ as needed and have the
__setitem__ wrapper call it if the value is NULL. Return the name
of the outer (__setitem__) wrapper.
"""
method_cls = cl.get_method_and_class("__delitem__")
del_name = None
if method_cls and method_cls[1] == cl:
# Generate a separate wrapper for __delitem__
del_name = generate_del_item_wrapper(cl, method_cls[0], emitter)
args = fn.args
if fn.name == "__delitem__":
# Add an extra argument for value that we expect to be NULL.
args = list(args) + [RuntimeArg("___value", object_rprimitive, ARG_POS)]
name = "{}{}{}".format(DUNDER_PREFIX, "__setitem__", cl.name_prefix(emitter.names))
input_args = ", ".join(f"PyObject *obj_{arg.name}" for arg in args)
emitter.emit_line(f"static int {name}({input_args}) {{")
# First check if this is __delitem__
emitter.emit_line(f"if (obj_{args[2].name} == NULL) {{")
if del_name is not None:
# We have a native implementation, so call it
emitter.emit_line(f"return {del_name}(obj_{args[0].name}, obj_{args[1].name});")
else:
# Try to call superclass method instead
emitter.emit_line(f"PyObject *super = CPy_Super(CPyModule_builtins, obj_{args[0].name});")
emitter.emit_line("if (super == NULL) return -1;")
emitter.emit_line(
'PyObject *result = PyObject_CallMethod(super, "__delitem__", "O", obj_{});'.format(
args[1].name
)
)
emitter.emit_line("Py_DECREF(super);")
emitter.emit_line("Py_XDECREF(result);")
emitter.emit_line("return result == NULL ? -1 : 0;")
emitter.emit_line("}")
method_cls = cl.get_method_and_class("__setitem__")
if method_cls and method_cls[1] == cl:
generate_set_del_item_wrapper_inner(fn, emitter, args)
else:
emitter.emit_line(f"PyObject *super = CPy_Super(CPyModule_builtins, obj_{args[0].name});")
emitter.emit_line("if (super == NULL) return -1;")
emitter.emit_line("PyObject *result;")
if method_cls is None and cl.builtin_base is None:
msg = f"'{cl.name}' object does not support item assignment"
emitter.emit_line(f'PyErr_SetString(PyExc_TypeError, "{msg}");')
emitter.emit_line("result = NULL;")
else:
# A base class may have __setitem__
emitter.emit_line(
'result = PyObject_CallMethod(super, "__setitem__", "OO", obj_{}, obj_{});'.format(
args[1].name, args[2].name
)
)
emitter.emit_line("Py_DECREF(super);")
emitter.emit_line("Py_XDECREF(result);")
emitter.emit_line("return result == NULL ? -1 : 0;")
emitter.emit_line("}")
return name
def generate_set_del_item_wrapper_inner(
fn: FuncIR, emitter: Emitter, args: Sequence[RuntimeArg]
) -> None:
for arg in args:
generate_arg_check(arg.name, arg.type, emitter, GotoHandler("fail"))
native_args = ", ".join(f"arg_{arg.name}" for arg in args)
emitter.emit_line(
"{}val = {}{}({});".format(
emitter.ctype_spaced(fn.ret_type), NATIVE_PREFIX, fn.cname(emitter.names), native_args
)
)
emitter.emit_error_check("val", fn.ret_type, "goto fail;")
emitter.emit_dec_ref("val", fn.ret_type)
emitter.emit_line("return 0;")
emitter.emit_label("fail")
emitter.emit_line("return -1;")
emitter.emit_line("}")
def generate_contains_wrapper(cl: ClassIR, fn: FuncIR, emitter: Emitter) -> str:
"""Generates a wrapper for a native __contains__ method."""
name = f"{DUNDER_PREFIX}{fn.name}{cl.name_prefix(emitter.names)}"
emitter.emit_line(f"static int {name}(PyObject *self, PyObject *obj_item) {{")
generate_arg_check("item", fn.args[1].type, emitter, ReturnHandler("-1"))
emitter.emit_line(
"{}val = {}{}(self, arg_item);".format(
emitter.ctype_spaced(fn.ret_type), NATIVE_PREFIX, fn.cname(emitter.names)
)
)
emitter.emit_error_check("val", fn.ret_type, "return -1;")
if is_bool_rprimitive(fn.ret_type):
emitter.emit_line("return val;")
else:
emitter.emit_line("int boolval = PyObject_IsTrue(val);")
emitter.emit_dec_ref("val", fn.ret_type)
emitter.emit_line("return boolval;")
emitter.emit_line("}")
return name
# Helpers
def generate_wrapper_core(
fn: FuncIR,
emitter: Emitter,
optional_args: list[RuntimeArg] | None = None,
arg_names: list[str] | None = None,
cleanups: list[str] | None = None,
traceback_code: str | None = None,
) -> None:
"""Generates the core part of a wrapper function for a native function.
This expects each argument as a PyObject * named obj_{arg} as a precondition.
It converts the PyObject *s to the necessary types, checking and unboxing if necessary,
makes the call, then boxes the result if necessary and returns it.
"""
gen = WrapperGenerator(None, emitter)
gen.set_target(fn)
if arg_names:
gen.arg_names = arg_names
gen.cleanups = cleanups or []
gen.optional_args = optional_args or []
gen.traceback_code = traceback_code or ""
error = ReturnHandler("NULL") if not gen.use_goto() else GotoHandler("fail")
gen.emit_arg_processing(error=error)
gen.emit_call()
gen.emit_error_handling()
def generate_arg_check(
name: str,
typ: RType,
emitter: Emitter,
error: ErrorHandler | None = None,
*,
optional: bool = False,
raise_exception: bool = True,
bitmap_arg_index: int = 0,
) -> None:
"""Insert a runtime check for argument and unbox if necessary.
The object is named PyObject *obj_{}. This is expected to generate
a value of name arg_{} (unboxed if necessary). For each primitive a runtime
check ensures the correct type.
"""
error = error or AssignHandler()
if typ.is_unboxed:
if typ.error_overlap and optional:
# Update bitmap is value is provided.
init = emitter.c_undefined_value(typ)
emitter.emit_line(f"{emitter.ctype(typ)} arg_{name} = {init};")
emitter.emit_line(f"if (obj_{name} != NULL) {{")
bitmap = bitmap_name(bitmap_arg_index // BITMAP_BITS)
emitter.emit_line(f"{bitmap} |= 1 << {bitmap_arg_index & (BITMAP_BITS - 1)};")
emitter.emit_unbox(
f"obj_{name}",
f"arg_{name}",
typ,
declare_dest=False,
raise_exception=raise_exception,
error=error,
borrow=True,
)
emitter.emit_line("}")
else:
# Borrow when unboxing to avoid reference count manipulation.
emitter.emit_unbox(
f"obj_{name}",
f"arg_{name}",
typ,
declare_dest=True,
raise_exception=raise_exception,
error=error,
borrow=True,
optional=optional,
)
elif is_object_rprimitive(typ):
# Object is trivial since any object is valid
if optional:
emitter.emit_line(f"PyObject *arg_{name};")
emitter.emit_line(f"if (obj_{name} == NULL) {{")
emitter.emit_line(f"arg_{name} = {emitter.c_error_value(typ)};")
emitter.emit_lines("} else {", f"arg_{name} = obj_{name}; ", "}")
else:
emitter.emit_line(f"PyObject *arg_{name} = obj_{name};")
else:
emitter.emit_cast(
f"obj_{name}",
f"arg_{name}",
typ,
declare_dest=True,
raise_exception=raise_exception,
error=error,
optional=optional,
)
class WrapperGenerator:
"""Helper that simplifies the generation of wrapper functions."""
# TODO: Use this for more wrappers
def __init__(self, cl: ClassIR | None, emitter: Emitter) -> None:
self.cl = cl
self.emitter = emitter
self.cleanups: list[str] = []
self.optional_args: list[RuntimeArg] = []
self.traceback_code = ""
def set_target(self, fn: FuncIR) -> None:
"""Set the wrapped function.
It's fine to modify the attributes initialized here later to customize
the wrapper function.
"""
self.target_name = fn.name
self.target_cname = fn.cname(self.emitter.names)
self.num_bitmap_args = fn.sig.num_bitmap_args
if self.num_bitmap_args:
self.args = fn.args[: -self.num_bitmap_args]
else:
self.args = fn.args
self.arg_names = [arg.name for arg in self.args]
self.ret_type = fn.ret_type
def wrapper_name(self) -> str:
"""Return the name of the wrapper function."""
return "{}{}{}".format(
DUNDER_PREFIX,
self.target_name,
self.cl.name_prefix(self.emitter.names) if self.cl else "",
)
def use_goto(self) -> bool:
"""Do we use a goto for error handling (instead of straight return)?"""
return bool(self.cleanups or self.traceback_code)
def emit_header(self) -> None:
"""Emit the function header of the wrapper implementation."""
input_args = ", ".join(f"PyObject *obj_{arg}" for arg in self.arg_names)
self.emitter.emit_line(
"static PyObject *{name}({input_args}) {{".format(
name=self.wrapper_name(), input_args=input_args
)
)
def emit_arg_processing(
self, error: ErrorHandler | None = None, raise_exception: bool = True
) -> None:
"""Emit validation and unboxing of arguments."""
error = error or self.error()
bitmap_arg_index = 0
for arg_name, arg in zip(self.arg_names, self.args):
# Suppress the argument check for *args/**kwargs, since we know it must be right.
typ = arg.type if arg.kind not in (ARG_STAR, ARG_STAR2) else object_rprimitive
optional = arg in self.optional_args
generate_arg_check(
arg_name,
typ,
self.emitter,
error,
raise_exception=raise_exception,
optional=optional,
bitmap_arg_index=bitmap_arg_index,
)
if optional and typ.error_overlap:
bitmap_arg_index += 1
def emit_call(self, not_implemented_handler: str = "") -> None:
"""Emit call to the wrapper function.
If not_implemented_handler is non-empty, use this C code to handle
a NotImplemented return value (if it's possible based on the return type).
"""
native_args = ", ".join(f"arg_{arg}" for arg in self.arg_names)
if self.num_bitmap_args:
bitmap_args = ", ".join(
[bitmap_name(i) for i in reversed(range(self.num_bitmap_args))]
)
native_args = f"{native_args}, {bitmap_args}"
ret_type = self.ret_type
emitter = self.emitter
if ret_type.is_unboxed or self.use_goto():
# TODO: The Py_RETURN macros return the correct PyObject * with reference count
# handling. Are they relevant?
emitter.emit_line(
"{}retval = {}{}({});".format(
emitter.ctype_spaced(ret_type), NATIVE_PREFIX, self.target_cname, native_args
)
)
emitter.emit_lines(*self.cleanups)
if ret_type.is_unboxed:
emitter.emit_error_check("retval", ret_type, "return NULL;")
emitter.emit_box("retval", "retbox", ret_type, declare_dest=True)
emitter.emit_line("return {};".format("retbox" if ret_type.is_unboxed else "retval"))
else:
if not_implemented_handler and not isinstance(ret_type, RInstance):
# The return value type may overlap with NotImplemented.
emitter.emit_line(
"PyObject *retbox = {}{}({});".format(
NATIVE_PREFIX, self.target_cname, native_args
)
)
emitter.emit_lines(
"if (retbox == Py_NotImplemented) {",
not_implemented_handler,
"}",
"return retbox;",
)
else:
emitter.emit_line(f"return {NATIVE_PREFIX}{self.target_cname}({native_args});")
# TODO: Tracebacks?
def error(self) -> ErrorHandler:
"""Figure out how to deal with errors in the wrapper."""
if self.cleanups or self.traceback_code:
# We'll have a label at the end with error handling code.
return GotoHandler("fail")
else:
# Nothing special needs to done to handle errors, so just return.
return ReturnHandler("NULL")
def emit_error_handling(self) -> None:
"""Emit error handling block at the end of the wrapper, if needed."""
emitter = self.emitter
if self.use_goto():
emitter.emit_label("fail")
emitter.emit_lines(*self.cleanups)
if self.traceback_code:
emitter.emit_line(self.traceback_code)
emitter.emit_line("return NULL;")
def finish(self) -> None:
self.emitter.emit_line("}")

View File

@@ -0,0 +1,302 @@
from __future__ import annotations
from typing import Final, FrozenSet, Tuple, Union
from typing_extensions import TypeGuard
# Supported Python literal types. All tuple / frozenset items must have supported
# literal types as well, but we can't represent the type precisely.
LiteralValue = Union[
str, bytes, int, bool, float, complex, Tuple[object, ...], FrozenSet[object], None
]
def _is_literal_value(obj: object) -> TypeGuard[LiteralValue]:
return isinstance(obj, (str, bytes, int, float, complex, tuple, frozenset, type(None)))
# Some literals are singletons and handled specially (None, False and True)
NUM_SINGLETONS: Final = 3
class Literals:
"""Collection of literal values used in a compilation group and related helpers."""
def __init__(self) -> None:
# Each dict maps value to literal index (0, 1, ...)
self.str_literals: dict[str, int] = {}
self.bytes_literals: dict[bytes, int] = {}
self.int_literals: dict[int, int] = {}
self.float_literals: dict[float, int] = {}
self.complex_literals: dict[complex, int] = {}
self.tuple_literals: dict[tuple[object, ...], int] = {}
self.frozenset_literals: dict[frozenset[object], int] = {}
def record_literal(self, value: LiteralValue) -> None:
"""Ensure that the literal value is available in generated code."""
if value is None or value is True or value is False:
# These are special cased and always present
return
if isinstance(value, str):
str_literals = self.str_literals
if value not in str_literals:
str_literals[value] = len(str_literals)
elif isinstance(value, bytes):
bytes_literals = self.bytes_literals
if value not in bytes_literals:
bytes_literals[value] = len(bytes_literals)
elif isinstance(value, int):
int_literals = self.int_literals
if value not in int_literals:
int_literals[value] = len(int_literals)
elif isinstance(value, float):
float_literals = self.float_literals
if value not in float_literals:
float_literals[value] = len(float_literals)
elif isinstance(value, complex):
complex_literals = self.complex_literals
if value not in complex_literals:
complex_literals[value] = len(complex_literals)
elif isinstance(value, tuple):
tuple_literals = self.tuple_literals
if value not in tuple_literals:
for item in value:
assert _is_literal_value(item)
self.record_literal(item)
tuple_literals[value] = len(tuple_literals)
elif isinstance(value, frozenset):
frozenset_literals = self.frozenset_literals
if value not in frozenset_literals:
for item in value:
assert _is_literal_value(item)
self.record_literal(item)
frozenset_literals[value] = len(frozenset_literals)
else:
assert False, "invalid literal: %r" % value
def literal_index(self, value: LiteralValue) -> int:
"""Return the index to the literals array for given value."""
# The array contains first None and booleans, followed by all str values,
# followed by bytes values, etc.
if value is None:
return 0
elif value is False:
return 1
elif value is True:
return 2
n = NUM_SINGLETONS
if isinstance(value, str):
return n + self.str_literals[value]
n += len(self.str_literals)
if isinstance(value, bytes):
return n + self.bytes_literals[value]
n += len(self.bytes_literals)
if isinstance(value, int):
return n + self.int_literals[value]
n += len(self.int_literals)
if isinstance(value, float):
return n + self.float_literals[value]
n += len(self.float_literals)
if isinstance(value, complex):
return n + self.complex_literals[value]
n += len(self.complex_literals)
if isinstance(value, tuple):
return n + self.tuple_literals[value]
n += len(self.tuple_literals)
if isinstance(value, frozenset):
return n + self.frozenset_literals[value]
assert False, "invalid literal: %r" % value
def num_literals(self) -> int:
# The first three are for None, True and False
return (
NUM_SINGLETONS
+ len(self.str_literals)
+ len(self.bytes_literals)
+ len(self.int_literals)
+ len(self.float_literals)
+ len(self.complex_literals)
+ len(self.tuple_literals)
+ len(self.frozenset_literals)
)
# The following methods return the C encodings of literal values
# of different types
def encoded_str_values(self) -> list[bytes]:
return _encode_str_values(self.str_literals)
def encoded_int_values(self) -> list[bytes]:
return _encode_int_values(self.int_literals)
def encoded_bytes_values(self) -> list[bytes]:
return _encode_bytes_values(self.bytes_literals)
def encoded_float_values(self) -> list[str]:
return _encode_float_values(self.float_literals)
def encoded_complex_values(self) -> list[str]:
return _encode_complex_values(self.complex_literals)
def encoded_tuple_values(self) -> list[str]:
return self._encode_collection_values(self.tuple_literals)
def encoded_frozenset_values(self) -> list[str]:
return self._encode_collection_values(self.frozenset_literals)
def _encode_collection_values(
self, values: dict[tuple[object, ...], int] | dict[frozenset[object], int]
) -> list[str]:
"""Encode tuple/frozenset values into a C array.
The format of the result is like this:
<number of collections>
<length of the first collection>
<literal index of first item>
...
<literal index of last item>
<length of the second collection>
...
"""
value_by_index = {index: value for value, index in values.items()}
result = []
count = len(values)
result.append(str(count))
for i in range(count):
value = value_by_index[i]
result.append(str(len(value)))
for item in value:
assert _is_literal_value(item)
index = self.literal_index(item)
result.append(str(index))
return result
def _encode_str_values(values: dict[str, int]) -> list[bytes]:
value_by_index = {index: value for value, index in values.items()}
result = []
line: list[bytes] = []
line_len = 0
for i in range(len(values)):
value = value_by_index[i]
c_literal = format_str_literal(value)
c_len = len(c_literal)
if line_len > 0 and line_len + c_len > 70:
result.append(format_int(len(line)) + b"".join(line))
line = []
line_len = 0
line.append(c_literal)
line_len += c_len
if line:
result.append(format_int(len(line)) + b"".join(line))
result.append(b"")
return result
def _encode_bytes_values(values: dict[bytes, int]) -> list[bytes]:
value_by_index = {index: value for value, index in values.items()}
result = []
line: list[bytes] = []
line_len = 0
for i in range(len(values)):
value = value_by_index[i]
c_init = format_int(len(value))
c_len = len(c_init) + len(value)
if line_len > 0 and line_len + c_len > 70:
result.append(format_int(len(line)) + b"".join(line))
line = []
line_len = 0
line.append(c_init + value)
line_len += c_len
if line:
result.append(format_int(len(line)) + b"".join(line))
result.append(b"")
return result
def format_int(n: int) -> bytes:
"""Format an integer using a variable-length binary encoding."""
if n < 128:
a = [n]
else:
a = []
while n > 0:
a.insert(0, n & 0x7F)
n >>= 7
for i in range(len(a) - 1):
# If the highest bit is set, more 7-bit digits follow
a[i] |= 0x80
return bytes(a)
def format_str_literal(s: str) -> bytes:
utf8 = s.encode("utf-8")
return format_int(len(utf8)) + utf8
def _encode_int_values(values: dict[int, int]) -> list[bytes]:
"""Encode int values into C strings.
Values are stored in base 10 and separated by 0 bytes.
"""
value_by_index = {index: value for value, index in values.items()}
result = []
line: list[bytes] = []
line_len = 0
for i in range(len(values)):
value = value_by_index[i]
encoded = b"%d" % value
if line_len > 0 and line_len + len(encoded) > 70:
result.append(format_int(len(line)) + b"\0".join(line))
line = []
line_len = 0
line.append(encoded)
line_len += len(encoded)
if line:
result.append(format_int(len(line)) + b"\0".join(line))
result.append(b"")
return result
def float_to_c(x: float) -> str:
"""Return C literal representation of a float value."""
s = str(x)
if s == "inf":
return "INFINITY"
elif s == "-inf":
return "-INFINITY"
elif s == "nan":
return "NAN"
return s
def _encode_float_values(values: dict[float, int]) -> list[str]:
"""Encode float values into a C array values.
The result contains the number of values followed by individual values.
"""
value_by_index = {index: value for value, index in values.items()}
result = []
num = len(values)
result.append(str(num))
for i in range(num):
value = value_by_index[i]
result.append(float_to_c(value))
return result
def _encode_complex_values(values: dict[complex, int]) -> list[str]:
"""Encode float values into a C array values.
The result contains the number of values followed by pairs of doubles
representing complex numbers.
"""
value_by_index = {index: value for value, index in values.items()}
result = []
num = len(values)
result.append(str(num))
for i in range(num):
value = value_by_index[i]
result.append(float_to_c(value.real))
result.append(float_to_c(value.imag))
return result