Major fixes and new features
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2025-09-25 15:51:48 +09:00
parent dd7349bb4c
commit ddce9f5125
5586 changed files with 1470941 additions and 0 deletions

View File

@@ -0,0 +1,499 @@
"""Intermediate representation of classes."""
from __future__ import annotations
from typing import List, NamedTuple
from mypyc.common import PROPSET_PREFIX, JsonDict
from mypyc.ir.func_ir import FuncDecl, FuncIR, FuncSignature
from mypyc.ir.ops import DeserMaps, Value
from mypyc.ir.rtypes import RInstance, RType, deserialize_type
from mypyc.namegen import NameGenerator, exported_name
# Some notes on the vtable layout: Each concrete class has a vtable
# that contains function pointers for its methods. So that subclasses
# may be efficiently used when their parent class is expected, the
# layout of child vtables must be an extension of their base class's
# vtable.
#
# This makes multiple inheritance tricky, since obviously we cannot be
# an extension of multiple parent classes. We solve this by requiring
# all but one parent to be "traits", which we can operate on in a
# somewhat less efficient way. For each trait implemented by a class,
# we generate a separate vtable for the methods in that trait.
# We then store an array of (trait type, trait vtable) pointers alongside
# a class's main vtable. When we want to call a trait method, we
# (at runtime!) search the array of trait vtables to find the correct one,
# then call through it.
# Trait vtables additionally need entries for attribute getters and setters,
# since they can't always be in the same location.
#
# To keep down the number of indirections necessary, we store the
# array of trait vtables in the memory *before* the class vtable, and
# search it backwards. (This is a trick we can only do once---there
# are only two directions to store data in---but I don't think we'll
# need it again.)
# There are some tricks we could try in the future to store the trait
# vtables inline in the trait table (which would cut down one indirection),
# but this seems good enough for now.
#
# As an example:
# Imagine that we have a class B that inherits from a concrete class A
# and traits T1 and T2, and that A has methods foo() and
# bar() and B overrides bar() with a more specific type.
# Then B's vtable will look something like:
#
# T1 type object
# ptr to B's T1 trait vtable
# T2 type object
# ptr to B's T2 trait vtable
# -> | A.foo
# | Glue function that converts between A.bar's type and B.bar
# B.bar
# B.baz
#
# The arrow points to the "start" of the vtable (what vtable pointers
# point to) and the bars indicate which parts correspond to the parent
# class A's vtable layout.
#
# Classes that allow interpreted code to subclass them also have a
# "shadow vtable" that contains implementations that delegate to
# making a pycall, so that overridden methods in interpreted children
# will be called. (A better strategy could dynamically generate these
# vtables based on which methods are overridden in the children.)
# Descriptions of method and attribute entries in class vtables.
# The 'cls' field is the class that the method/attr was defined in,
# which might be a parent class.
# The 'shadow_method', if present, contains the method that should be
# placed in the class's shadow vtable (if it has one).
class VTableMethod(NamedTuple):
cls: "ClassIR"
name: str
method: FuncIR
shadow_method: FuncIR | None
VTableEntries = List[VTableMethod]
class ClassIR:
"""Intermediate representation of a class.
This also describes the runtime structure of native instances.
"""
def __init__(
self,
name: str,
module_name: str,
is_trait: bool = False,
is_generated: bool = False,
is_abstract: bool = False,
is_ext_class: bool = True,
) -> None:
self.name = name
self.module_name = module_name
self.is_trait = is_trait
self.is_generated = is_generated
self.is_abstract = is_abstract
self.is_ext_class = is_ext_class
# An augmented class has additional methods separate from what mypyc generates.
# Right now the only one is dataclasses.
self.is_augmented = False
# Does this inherit from a Python class?
self.inherits_python = False
# Do instances of this class have __dict__?
self.has_dict = False
# Do we allow interpreted subclasses? Derived from a mypyc_attr.
self.allow_interpreted_subclasses = False
# Does this class need getseters to be generated for its attributes? (getseters are also
# added if is_generated is False)
self.needs_getseters = False
# Is this class declared as serializable (supports copy.copy
# and pickle) using @mypyc_attr(serializable=True)?
#
# Additionally, any class with this attribute False but with
# an __init__ that can be called without any arguments is
# *implicitly serializable*. In this case __init__ will be
# called during deserialization without arguments. If this is
# True, we match Python semantics and __init__ won't be called
# during deserialization.
#
# This impacts also all subclasses. Use is_serializable() to
# also consider base classes.
self._serializable = False
# If this a subclass of some built-in python class, the name
# of the object for that class. We currently only support this
# in a few ad-hoc cases.
self.builtin_base: str | None = None
# Default empty constructor
self.ctor = FuncDecl(name, None, module_name, FuncSignature([], RInstance(self)))
# Attributes defined in the class (not inherited)
self.attributes: dict[str, RType] = {}
# Deletable attributes
self.deletable: list[str] = []
# We populate method_types with the signatures of every method before
# we generate methods, and we rely on this information being present.
self.method_decls: dict[str, FuncDecl] = {}
# Map of methods that are actually present in an extension class
self.methods: dict[str, FuncIR] = {}
# Glue methods for boxing/unboxing when a class changes the type
# while overriding a method. Maps from (parent class overridden, method)
# to IR of glue method.
self.glue_methods: dict[tuple[ClassIR, str], FuncIR] = {}
# Properties are accessed like attributes, but have behavior like method calls.
# They don't belong in the methods dictionary, since we don't want to expose them to
# Python's method API. But we want to put them into our own vtable as methods, so that
# they are properly handled and overridden. The property dictionary values are a tuple
# containing a property getter and an optional property setter.
self.properties: dict[str, tuple[FuncIR, FuncIR | None]] = {}
# We generate these in prepare_class_def so that we have access to them when generating
# other methods and properties that rely on these types.
self.property_types: dict[str, RType] = {}
self.vtable: dict[str, int] | None = None
self.vtable_entries: VTableEntries = []
self.trait_vtables: dict[ClassIR, VTableEntries] = {}
# N.B: base might not actually quite be the direct base.
# It is the nearest concrete base, but we allow a trait in between.
self.base: ClassIR | None = None
self.traits: list[ClassIR] = []
# Supply a working mro for most generated classes. Real classes will need to
# fix it up.
self.mro: list[ClassIR] = [self]
# base_mro is the chain of concrete (non-trait) ancestors
self.base_mro: list[ClassIR] = [self]
# Direct subclasses of this class (use subclasses() to also include non-direct ones)
# None if separate compilation prevents this from working.
#
# Often it's better to use has_no_subclasses() or subclasses() instead.
self.children: list[ClassIR] | None = []
# Instance attributes that are initialized in the class body.
self.attrs_with_defaults: set[str] = set()
# Attributes that are always initialized in __init__ or class body
# (inferred in mypyc.analysis.attrdefined using interprocedural analysis)
self._always_initialized_attrs: set[str] = set()
# Attributes that are sometimes initialized in __init__
self._sometimes_initialized_attrs: set[str] = set()
# If True, __init__ can make 'self' visible to unanalyzed/arbitrary code
self.init_self_leak = False
# Definedness of these attributes is backed by a bitmap. Index in the list
# indicates the bit number. Includes inherited attributes. We need the
# bitmap for types such as native ints that can't have a dedicated error
# value that doesn't overlap a valid value. The bitmap is used if the
# value of an attribute is the same as the error value.
self.bitmap_attrs: list[str] = []
def __repr__(self) -> str:
return (
"ClassIR("
"name={self.name}, module_name={self.module_name}, "
"is_trait={self.is_trait}, is_generated={self.is_generated}, "
"is_abstract={self.is_abstract}, is_ext_class={self.is_ext_class}"
")".format(self=self)
)
@property
def fullname(self) -> str:
return f"{self.module_name}.{self.name}"
def real_base(self) -> ClassIR | None:
"""Return the actual concrete base class, if there is one."""
if len(self.mro) > 1 and not self.mro[1].is_trait:
return self.mro[1]
return None
def vtable_entry(self, name: str) -> int:
assert self.vtable is not None, "vtable not computed yet"
assert name in self.vtable, f"{self.name!r} has no attribute {name!r}"
return self.vtable[name]
def attr_details(self, name: str) -> tuple[RType, ClassIR]:
for ir in self.mro:
if name in ir.attributes:
return ir.attributes[name], ir
if name in ir.property_types:
return ir.property_types[name], ir
raise KeyError(f"{self.name!r} has no attribute {name!r}")
def attr_type(self, name: str) -> RType:
return self.attr_details(name)[0]
def method_decl(self, name: str) -> FuncDecl:
for ir in self.mro:
if name in ir.method_decls:
return ir.method_decls[name]
raise KeyError(f"{self.name!r} has no attribute {name!r}")
def method_sig(self, name: str) -> FuncSignature:
return self.method_decl(name).sig
def has_method(self, name: str) -> bool:
try:
self.method_decl(name)
except KeyError:
return False
return True
def is_method_final(self, name: str) -> bool:
subs = self.subclasses()
if subs is None:
# TODO: Look at the final attribute!
return False
if self.has_method(name):
method_decl = self.method_decl(name)
for subc in subs:
if subc.method_decl(name) != method_decl:
return False
return True
else:
return not any(subc.has_method(name) for subc in subs)
def has_attr(self, name: str) -> bool:
try:
self.attr_type(name)
except KeyError:
return False
return True
def is_deletable(self, name: str) -> bool:
return any(name in ir.deletable for ir in self.mro)
def is_always_defined(self, name: str) -> bool:
if self.is_deletable(name):
return False
return name in self._always_initialized_attrs
def name_prefix(self, names: NameGenerator) -> str:
return names.private_name(self.module_name, self.name)
def struct_name(self, names: NameGenerator) -> str:
return f"{exported_name(self.fullname)}Object"
def get_method_and_class(
self, name: str, *, prefer_method: bool = False
) -> tuple[FuncIR, ClassIR] | None:
for ir in self.mro:
if name in ir.methods:
func_ir = ir.methods[name]
if not prefer_method and func_ir.decl.implicit:
# This is an implicit accessor, so there is also an attribute definition
# which the caller prefers. This happens if an attribute overrides a
# property.
return None
return func_ir, ir
return None
def get_method(self, name: str, *, prefer_method: bool = False) -> FuncIR | None:
res = self.get_method_and_class(name, prefer_method=prefer_method)
return res[0] if res else None
def has_method_decl(self, name: str) -> bool:
return any(name in ir.method_decls for ir in self.mro)
def has_no_subclasses(self) -> bool:
return self.children == [] and not self.allow_interpreted_subclasses
def subclasses(self) -> set[ClassIR] | None:
"""Return all subclasses of this class, both direct and indirect.
Return None if it is impossible to identify all subclasses, for example
because we are performing separate compilation.
"""
if self.children is None or self.allow_interpreted_subclasses:
return None
result = set(self.children)
for child in self.children:
if child.children:
child_subs = child.subclasses()
if child_subs is None:
return None
result.update(child_subs)
return result
def concrete_subclasses(self) -> list[ClassIR] | None:
"""Return all concrete (i.e. non-trait and non-abstract) subclasses.
Include both direct and indirect subclasses. Place classes with no children first.
"""
subs = self.subclasses()
if subs is None:
return None
concrete = {c for c in subs if not (c.is_trait or c.is_abstract)}
# We place classes with no children first because they are more likely
# to appear in various isinstance() checks. We then sort leaves by name
# to get stable order.
return sorted(concrete, key=lambda c: (len(c.children or []), c.name))
def is_serializable(self) -> bool:
return any(ci._serializable for ci in self.mro)
def serialize(self) -> JsonDict:
return {
"name": self.name,
"module_name": self.module_name,
"is_trait": self.is_trait,
"is_ext_class": self.is_ext_class,
"is_abstract": self.is_abstract,
"is_generated": self.is_generated,
"is_augmented": self.is_augmented,
"inherits_python": self.inherits_python,
"has_dict": self.has_dict,
"allow_interpreted_subclasses": self.allow_interpreted_subclasses,
"needs_getseters": self.needs_getseters,
"_serializable": self._serializable,
"builtin_base": self.builtin_base,
"ctor": self.ctor.serialize(),
# We serialize dicts as lists to ensure order is preserved
"attributes": [(k, t.serialize()) for k, t in self.attributes.items()],
# We try to serialize a name reference, but if the decl isn't in methods
# then we can't be sure that will work so we serialize the whole decl.
"method_decls": [
(k, d.id if k in self.methods else d.serialize())
for k, d in self.method_decls.items()
],
# We serialize method fullnames out and put methods in a separate dict
"methods": [(k, m.id) for k, m in self.methods.items()],
"glue_methods": [
((cir.fullname, k), m.id) for (cir, k), m in self.glue_methods.items()
],
# We serialize properties and property_types separately out of an
# abundance of caution about preserving dict ordering...
"property_types": [(k, t.serialize()) for k, t in self.property_types.items()],
"properties": list(self.properties),
"vtable": self.vtable,
"vtable_entries": serialize_vtable(self.vtable_entries),
"trait_vtables": [
(cir.fullname, serialize_vtable(v)) for cir, v in self.trait_vtables.items()
],
# References to class IRs are all just names
"base": self.base.fullname if self.base else None,
"traits": [cir.fullname for cir in self.traits],
"mro": [cir.fullname for cir in self.mro],
"base_mro": [cir.fullname for cir in self.base_mro],
"children": [cir.fullname for cir in self.children]
if self.children is not None
else None,
"deletable": self.deletable,
"attrs_with_defaults": sorted(self.attrs_with_defaults),
"_always_initialized_attrs": sorted(self._always_initialized_attrs),
"_sometimes_initialized_attrs": sorted(self._sometimes_initialized_attrs),
"init_self_leak": self.init_self_leak,
}
@classmethod
def deserialize(cls, data: JsonDict, ctx: DeserMaps) -> ClassIR:
fullname = data["module_name"] + "." + data["name"]
assert fullname in ctx.classes, "Class %s not in deser class map" % fullname
ir = ctx.classes[fullname]
ir.is_trait = data["is_trait"]
ir.is_generated = data["is_generated"]
ir.is_abstract = data["is_abstract"]
ir.is_ext_class = data["is_ext_class"]
ir.is_augmented = data["is_augmented"]
ir.inherits_python = data["inherits_python"]
ir.has_dict = data["has_dict"]
ir.allow_interpreted_subclasses = data["allow_interpreted_subclasses"]
ir.needs_getseters = data["needs_getseters"]
ir._serializable = data["_serializable"]
ir.builtin_base = data["builtin_base"]
ir.ctor = FuncDecl.deserialize(data["ctor"], ctx)
ir.attributes = {k: deserialize_type(t, ctx) for k, t in data["attributes"]}
ir.method_decls = {
k: ctx.functions[v].decl if isinstance(v, str) else FuncDecl.deserialize(v, ctx)
for k, v in data["method_decls"]
}
ir.methods = {k: ctx.functions[v] for k, v in data["methods"]}
ir.glue_methods = {
(ctx.classes[c], k): ctx.functions[v] for (c, k), v in data["glue_methods"]
}
ir.property_types = {k: deserialize_type(t, ctx) for k, t in data["property_types"]}
ir.properties = {
k: (ir.methods[k], ir.methods.get(PROPSET_PREFIX + k)) for k in data["properties"]
}
ir.vtable = data["vtable"]
ir.vtable_entries = deserialize_vtable(data["vtable_entries"], ctx)
ir.trait_vtables = {
ctx.classes[k]: deserialize_vtable(v, ctx) for k, v in data["trait_vtables"]
}
base = data["base"]
ir.base = ctx.classes[base] if base else None
ir.traits = [ctx.classes[s] for s in data["traits"]]
ir.mro = [ctx.classes[s] for s in data["mro"]]
ir.base_mro = [ctx.classes[s] for s in data["base_mro"]]
ir.children = data["children"] and [ctx.classes[s] for s in data["children"]]
ir.deletable = data["deletable"]
ir.attrs_with_defaults = set(data["attrs_with_defaults"])
ir._always_initialized_attrs = set(data["_always_initialized_attrs"])
ir._sometimes_initialized_attrs = set(data["_sometimes_initialized_attrs"])
ir.init_self_leak = data["init_self_leak"]
return ir
class NonExtClassInfo:
"""Information needed to construct a non-extension class (Python class).
Includes the class dictionary, a tuple of base classes,
the class annotations dictionary, and the metaclass.
"""
def __init__(self, dict: Value, bases: Value, anns: Value, metaclass: Value) -> None:
self.dict = dict
self.bases = bases
self.anns = anns
self.metaclass = metaclass
def serialize_vtable_entry(entry: VTableMethod) -> JsonDict:
return {
".class": "VTableMethod",
"cls": entry.cls.fullname,
"name": entry.name,
"method": entry.method.decl.id,
"shadow_method": entry.shadow_method.decl.id if entry.shadow_method else None,
}
def serialize_vtable(vtable: VTableEntries) -> list[JsonDict]:
return [serialize_vtable_entry(v) for v in vtable]
def deserialize_vtable_entry(data: JsonDict, ctx: DeserMaps) -> VTableMethod:
if data[".class"] == "VTableMethod":
return VTableMethod(
ctx.classes[data["cls"]],
data["name"],
ctx.functions[data["method"]],
ctx.functions[data["shadow_method"]] if data["shadow_method"] else None,
)
assert False, "Bogus vtable .class: %s" % data[".class"]
def deserialize_vtable(data: list[JsonDict], ctx: DeserMaps) -> VTableEntries:
return [deserialize_vtable_entry(x, ctx) for x in data]
def all_concrete_classes(class_ir: ClassIR) -> list[ClassIR] | None:
"""Return all concrete classes among the class itself and its subclasses."""
concrete = class_ir.concrete_subclasses()
if concrete is None:
return None
if not (class_ir.is_abstract or class_ir.is_trait):
concrete.append(class_ir)
return concrete

View File

@@ -0,0 +1,370 @@
"""Intermediate representation of functions."""
from __future__ import annotations
from typing import Final, Sequence
from mypy.nodes import ARG_POS, ArgKind, Block, FuncDef
from mypyc.common import BITMAP_BITS, JsonDict, bitmap_name, get_id_from_name, short_id_from_name
from mypyc.ir.ops import (
Assign,
AssignMulti,
BasicBlock,
ControlOp,
DeserMaps,
LoadAddress,
Register,
Value,
)
from mypyc.ir.rtypes import RType, bitmap_rprimitive, deserialize_type
from mypyc.namegen import NameGenerator
class RuntimeArg:
"""Description of a function argument in IR.
Argument kind is one of ARG_* constants defined in mypy.nodes.
"""
def __init__(
self, name: str, typ: RType, kind: ArgKind = ARG_POS, pos_only: bool = False
) -> None:
self.name = name
self.type = typ
self.kind = kind
self.pos_only = pos_only
@property
def optional(self) -> bool:
return self.kind.is_optional()
def __repr__(self) -> str:
return "RuntimeArg(name={}, type={}, optional={!r}, pos_only={!r})".format(
self.name, self.type, self.optional, self.pos_only
)
def serialize(self) -> JsonDict:
return {
"name": self.name,
"type": self.type.serialize(),
"kind": int(self.kind.value),
"pos_only": self.pos_only,
}
@classmethod
def deserialize(cls, data: JsonDict, ctx: DeserMaps) -> RuntimeArg:
return RuntimeArg(
data["name"],
deserialize_type(data["type"], ctx),
ArgKind(data["kind"]),
data["pos_only"],
)
class FuncSignature:
"""Signature of a function in IR."""
# TODO: Track if method?
def __init__(self, args: Sequence[RuntimeArg], ret_type: RType) -> None:
self.args = tuple(args)
self.ret_type = ret_type
# Bitmap arguments are use to mark default values for arguments that
# have types with overlapping error values.
self.num_bitmap_args = num_bitmap_args(self.args)
if self.num_bitmap_args:
extra = [
RuntimeArg(bitmap_name(i), bitmap_rprimitive, pos_only=True)
for i in range(self.num_bitmap_args)
]
self.args = self.args + tuple(reversed(extra))
def real_args(self) -> tuple[RuntimeArg, ...]:
"""Return arguments without any synthetic bitmap arguments."""
if self.num_bitmap_args:
return self.args[: -self.num_bitmap_args]
return self.args
def bound_sig(self) -> FuncSignature:
if self.num_bitmap_args:
return FuncSignature(self.args[1 : -self.num_bitmap_args], self.ret_type)
else:
return FuncSignature(self.args[1:], self.ret_type)
def __repr__(self) -> str:
return f"FuncSignature(args={self.args!r}, ret={self.ret_type!r})"
def serialize(self) -> JsonDict:
if self.num_bitmap_args:
args = self.args[: -self.num_bitmap_args]
else:
args = self.args
return {"args": [t.serialize() for t in args], "ret_type": self.ret_type.serialize()}
@classmethod
def deserialize(cls, data: JsonDict, ctx: DeserMaps) -> FuncSignature:
return FuncSignature(
[RuntimeArg.deserialize(arg, ctx) for arg in data["args"]],
deserialize_type(data["ret_type"], ctx),
)
def num_bitmap_args(args: tuple[RuntimeArg, ...]) -> int:
n = 0
for arg in args:
if arg.type.error_overlap and arg.kind.is_optional():
n += 1
return (n + (BITMAP_BITS - 1)) // BITMAP_BITS
FUNC_NORMAL: Final = 0
FUNC_STATICMETHOD: Final = 1
FUNC_CLASSMETHOD: Final = 2
class FuncDecl:
"""Declaration of a function in IR (without body or implementation).
A function can be a regular module-level function, a method, a
static method, a class method, or a property getter/setter.
"""
def __init__(
self,
name: str,
class_name: str | None,
module_name: str,
sig: FuncSignature,
kind: int = FUNC_NORMAL,
is_prop_setter: bool = False,
is_prop_getter: bool = False,
implicit: bool = False,
) -> None:
self.name = name
self.class_name = class_name
self.module_name = module_name
self.sig = sig
self.kind = kind
self.is_prop_setter = is_prop_setter
self.is_prop_getter = is_prop_getter
if class_name is None:
self.bound_sig: FuncSignature | None = None
else:
if kind == FUNC_STATICMETHOD:
self.bound_sig = sig
else:
self.bound_sig = sig.bound_sig()
# If True, not present in the mypy AST and must be synthesized during irbuild
# Currently only supported for property getters/setters
self.implicit = implicit
# This is optional because this will be set to the line number when the corresponding
# FuncIR is created
self._line: int | None = None
@property
def line(self) -> int:
assert self._line is not None
return self._line
@line.setter
def line(self, line: int) -> None:
self._line = line
@property
def id(self) -> str:
assert self.line is not None
return get_id_from_name(self.name, self.fullname, self.line)
@staticmethod
def compute_shortname(class_name: str | None, name: str) -> str:
return class_name + "." + name if class_name else name
@property
def shortname(self) -> str:
return FuncDecl.compute_shortname(self.class_name, self.name)
@property
def fullname(self) -> str:
return self.module_name + "." + self.shortname
def cname(self, names: NameGenerator) -> str:
partial_name = short_id_from_name(self.name, self.shortname, self._line)
return names.private_name(self.module_name, partial_name)
def serialize(self) -> JsonDict:
return {
"name": self.name,
"class_name": self.class_name,
"module_name": self.module_name,
"sig": self.sig.serialize(),
"kind": self.kind,
"is_prop_setter": self.is_prop_setter,
"is_prop_getter": self.is_prop_getter,
"implicit": self.implicit,
}
# TODO: move this to FuncIR?
@staticmethod
def get_id_from_json(func_ir: JsonDict) -> str:
"""Get the id from the serialized FuncIR associated with this FuncDecl"""
decl = func_ir["decl"]
shortname = FuncDecl.compute_shortname(decl["class_name"], decl["name"])
fullname = decl["module_name"] + "." + shortname
return get_id_from_name(decl["name"], fullname, func_ir["line"])
@classmethod
def deserialize(cls, data: JsonDict, ctx: DeserMaps) -> FuncDecl:
return FuncDecl(
data["name"],
data["class_name"],
data["module_name"],
FuncSignature.deserialize(data["sig"], ctx),
data["kind"],
data["is_prop_setter"],
data["is_prop_getter"],
data["implicit"],
)
class FuncIR:
"""Intermediate representation of a function with contextual information.
Unlike FuncDecl, this includes the IR of the body (basic blocks).
"""
def __init__(
self,
decl: FuncDecl,
arg_regs: list[Register],
blocks: list[BasicBlock],
line: int = -1,
traceback_name: str | None = None,
) -> None:
# Declaration of the function, including the signature
self.decl = decl
# Registers for all the arguments to the function
self.arg_regs = arg_regs
# Body of the function
self.blocks = blocks
self.decl.line = line
# The name that should be displayed for tracebacks that
# include this function. Function will be omitted from
# tracebacks if None.
self.traceback_name = traceback_name
@property
def line(self) -> int:
return self.decl.line
@property
def args(self) -> Sequence[RuntimeArg]:
return self.decl.sig.args
@property
def ret_type(self) -> RType:
return self.decl.sig.ret_type
@property
def class_name(self) -> str | None:
return self.decl.class_name
@property
def sig(self) -> FuncSignature:
return self.decl.sig
@property
def name(self) -> str:
return self.decl.name
@property
def fullname(self) -> str:
return self.decl.fullname
@property
def id(self) -> str:
return self.decl.id
def cname(self, names: NameGenerator) -> str:
return self.decl.cname(names)
def __repr__(self) -> str:
if self.class_name:
return f"<FuncIR {self.class_name}.{self.name}>"
else:
return f"<FuncIR {self.name}>"
def serialize(self) -> JsonDict:
# We don't include blocks in the serialized version
return {
"decl": self.decl.serialize(),
"line": self.line,
"traceback_name": self.traceback_name,
}
@classmethod
def deserialize(cls, data: JsonDict, ctx: DeserMaps) -> FuncIR:
return FuncIR(
FuncDecl.deserialize(data["decl"], ctx), [], [], data["line"], data["traceback_name"]
)
INVALID_FUNC_DEF: Final = FuncDef("<INVALID_FUNC_DEF>", [], Block([]))
def all_values(args: list[Register], blocks: list[BasicBlock]) -> list[Value]:
"""Return the set of all values that may be initialized in the blocks.
This omits registers that are only read.
"""
values: list[Value] = list(args)
seen_registers = set(args)
for block in blocks:
for op in block.ops:
if not isinstance(op, ControlOp):
if isinstance(op, (Assign, AssignMulti)):
if op.dest not in seen_registers:
values.append(op.dest)
seen_registers.add(op.dest)
elif op.is_void:
continue
else:
# If we take the address of a register, it might get initialized.
if (
isinstance(op, LoadAddress)
and isinstance(op.src, Register)
and op.src not in seen_registers
):
values.append(op.src)
seen_registers.add(op.src)
values.append(op)
return values
def all_values_full(args: list[Register], blocks: list[BasicBlock]) -> list[Value]:
"""Return set of all values that are initialized or accessed."""
values: list[Value] = list(args)
seen_registers = set(args)
for block in blocks:
for op in block.ops:
for source in op.sources():
# Look for uninitialized registers that are accessed. Ignore
# non-registers since we don't allow ops outside basic blocks.
if isinstance(source, Register) and source not in seen_registers:
values.append(source)
seen_registers.add(source)
if not isinstance(op, ControlOp):
if isinstance(op, (Assign, AssignMulti)):
if op.dest not in seen_registers:
values.append(op.dest)
seen_registers.add(op.dest)
elif op.is_void:
continue
else:
values.append(op)
return values

View File

@@ -0,0 +1,88 @@
"""Intermediate representation of modules."""
from __future__ import annotations
from typing import Dict
from mypyc.common import JsonDict
from mypyc.ir.class_ir import ClassIR
from mypyc.ir.func_ir import FuncDecl, FuncIR
from mypyc.ir.ops import DeserMaps
from mypyc.ir.rtypes import RType, deserialize_type
class ModuleIR:
"""Intermediate representation of a module."""
def __init__(
self,
fullname: str,
imports: list[str],
functions: list[FuncIR],
classes: list[ClassIR],
final_names: list[tuple[str, RType]],
) -> None:
self.fullname = fullname
self.imports = imports.copy()
self.functions = functions
self.classes = classes
self.final_names = final_names
def serialize(self) -> JsonDict:
return {
"fullname": self.fullname,
"imports": self.imports,
"functions": [f.serialize() for f in self.functions],
"classes": [c.serialize() for c in self.classes],
"final_names": [(k, t.serialize()) for k, t in self.final_names],
}
@classmethod
def deserialize(cls, data: JsonDict, ctx: DeserMaps) -> ModuleIR:
return ModuleIR(
data["fullname"],
data["imports"],
[ctx.functions[FuncDecl.get_id_from_json(f)] for f in data["functions"]],
[ClassIR.deserialize(c, ctx) for c in data["classes"]],
[(k, deserialize_type(t, ctx)) for k, t in data["final_names"]],
)
def deserialize_modules(data: dict[str, JsonDict], ctx: DeserMaps) -> dict[str, ModuleIR]:
"""Deserialize a collection of modules.
The modules can contain dependencies on each other.
Arguments:
data: A dict containing the modules to deserialize.
ctx: The deserialization maps to use and to populate.
They are populated with information from the deserialized
modules and as a precondition must have been populated by
deserializing any dependencies of the modules being deserialized
(outside of dependencies between the modules themselves).
Returns a map containing the deserialized modules.
"""
for mod in data.values():
# First create ClassIRs for every class so that we can construct types and whatnot
for cls in mod["classes"]:
ir = ClassIR(cls["name"], cls["module_name"])
assert ir.fullname not in ctx.classes, "Class %s already in map" % ir.fullname
ctx.classes[ir.fullname] = ir
for mod in data.values():
# Then deserialize all of the functions so that methods are available
# to the class deserialization.
for method in mod["functions"]:
func = FuncIR.deserialize(method, ctx)
assert func.decl.id not in ctx.functions, (
"Method %s already in map" % func.decl.fullname
)
ctx.functions[func.decl.id] = func
return {k: ModuleIR.deserialize(v, ctx) for k, v in data.items()}
# ModulesIRs should also always be an *OrderedDict*, but if we
# declared it that way we would need to put it in quotes everywhere...
ModuleIRs = Dict[str, ModuleIR]

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,486 @@
"""Utilities for pretty-printing IR in a human-readable form."""
from __future__ import annotations
from collections import defaultdict
from typing import Any, Final, Sequence, Union
from mypyc.common import short_name
from mypyc.ir.func_ir import FuncIR, all_values_full
from mypyc.ir.module_ir import ModuleIRs
from mypyc.ir.ops import (
ERR_NEVER,
Assign,
AssignMulti,
BasicBlock,
Box,
Branch,
Call,
CallC,
Cast,
ComparisonOp,
ControlOp,
DecRef,
Extend,
Float,
FloatComparisonOp,
FloatNeg,
FloatOp,
GetAttr,
GetElementPtr,
Goto,
IncRef,
InitStatic,
Integer,
IntOp,
KeepAlive,
LoadAddress,
LoadErrorValue,
LoadGlobal,
LoadLiteral,
LoadMem,
LoadStatic,
MethodCall,
Op,
OpVisitor,
RaiseStandardError,
Register,
Return,
SetAttr,
SetMem,
Truncate,
TupleGet,
TupleSet,
Unbox,
Unreachable,
Value,
)
from mypyc.ir.rtypes import RType, is_bool_rprimitive, is_int_rprimitive
ErrorSource = Union[BasicBlock, Op]
class IRPrettyPrintVisitor(OpVisitor[str]):
"""Internal visitor that pretty-prints ops."""
def __init__(self, names: dict[Value, str]) -> None:
# This should contain a name for all values that are shown as
# registers in the output. This is not just for Register
# instances -- all Ops that produce values need (generated) names.
self.names = names
def visit_goto(self, op: Goto) -> str:
return self.format("goto %l", op.label)
branch_op_names: Final = {Branch.BOOL: ("%r", "bool"), Branch.IS_ERROR: ("is_error(%r)", "")}
def visit_branch(self, op: Branch) -> str:
fmt, typ = self.branch_op_names[op.op]
if op.negated:
fmt = f"not {fmt}"
cond = self.format(fmt, op.value)
tb = ""
if op.traceback_entry:
tb = " (error at %s:%d)" % op.traceback_entry
fmt = f"if {cond} goto %l{tb} else goto %l"
if typ:
fmt += f" :: {typ}"
return self.format(fmt, op.true, op.false)
def visit_return(self, op: Return) -> str:
return self.format("return %r", op.value)
def visit_unreachable(self, op: Unreachable) -> str:
return "unreachable"
def visit_assign(self, op: Assign) -> str:
return self.format("%r = %r", op.dest, op.src)
def visit_assign_multi(self, op: AssignMulti) -> str:
return self.format("%r = [%s]", op.dest, ", ".join(self.format("%r", v) for v in op.src))
def visit_load_error_value(self, op: LoadErrorValue) -> str:
return self.format("%r = <error> :: %s", op, op.type)
def visit_load_literal(self, op: LoadLiteral) -> str:
prefix = ""
# For values that have a potential unboxed representation, make
# it explicit that this is a Python object.
if isinstance(op.value, int):
prefix = "object "
rvalue = repr(op.value)
if isinstance(op.value, frozenset):
# We need to generate a string representation that won't vary
# run-to-run because sets are unordered, otherwise we may get
# spurious irbuild test failures.
#
# Sorting by the item's string representation is a bit of a
# hack, but it's stable and won't cause TypeErrors.
formatted_items = [repr(i) for i in sorted(op.value, key=str)]
rvalue = "frozenset({" + ", ".join(formatted_items) + "})"
return self.format("%r = %s%s", op, prefix, rvalue)
def visit_get_attr(self, op: GetAttr) -> str:
return self.format("%r = %s%r.%s", op, self.borrow_prefix(op), op.obj, op.attr)
def borrow_prefix(self, op: Op) -> str:
if op.is_borrowed:
return "borrow "
return ""
def visit_set_attr(self, op: SetAttr) -> str:
if op.is_init:
assert op.error_kind == ERR_NEVER
if op.error_kind == ERR_NEVER:
# Initialization and direct struct access can never fail
return self.format("%r.%s = %r", op.obj, op.attr, op.src)
else:
return self.format("%r.%s = %r; %r = is_error", op.obj, op.attr, op.src, op)
def visit_load_static(self, op: LoadStatic) -> str:
ann = f" ({repr(op.ann)})" if op.ann else ""
name = op.identifier
if op.module_name is not None:
name = f"{op.module_name}.{name}"
return self.format("%r = %s :: %s%s", op, name, op.namespace, ann)
def visit_init_static(self, op: InitStatic) -> str:
name = op.identifier
if op.module_name is not None:
name = f"{op.module_name}.{name}"
return self.format("%s = %r :: %s", name, op.value, op.namespace)
def visit_tuple_get(self, op: TupleGet) -> str:
return self.format("%r = %r[%d]", op, op.src, op.index)
def visit_tuple_set(self, op: TupleSet) -> str:
item_str = ", ".join(self.format("%r", item) for item in op.items)
return self.format("%r = (%s)", op, item_str)
def visit_inc_ref(self, op: IncRef) -> str:
s = self.format("inc_ref %r", op.src)
# TODO: Remove bool check (it's unboxed)
if is_bool_rprimitive(op.src.type) or is_int_rprimitive(op.src.type):
s += f" :: {short_name(op.src.type.name)}"
return s
def visit_dec_ref(self, op: DecRef) -> str:
s = self.format("%sdec_ref %r", "x" if op.is_xdec else "", op.src)
# TODO: Remove bool check (it's unboxed)
if is_bool_rprimitive(op.src.type) or is_int_rprimitive(op.src.type):
s += f" :: {short_name(op.src.type.name)}"
return s
def visit_call(self, op: Call) -> str:
args = ", ".join(self.format("%r", arg) for arg in op.args)
# TODO: Display long name?
short_name = op.fn.shortname
s = f"{short_name}({args})"
if not op.is_void:
s = self.format("%r = ", op) + s
return s
def visit_method_call(self, op: MethodCall) -> str:
args = ", ".join(self.format("%r", arg) for arg in op.args)
s = self.format("%r.%s(%s)", op.obj, op.method, args)
if not op.is_void:
s = self.format("%r = ", op) + s
return s
def visit_cast(self, op: Cast) -> str:
return self.format("%r = %scast(%s, %r)", op, self.borrow_prefix(op), op.type, op.src)
def visit_box(self, op: Box) -> str:
return self.format("%r = box(%s, %r)", op, op.src.type, op.src)
def visit_unbox(self, op: Unbox) -> str:
return self.format("%r = unbox(%s, %r)", op, op.type, op.src)
def visit_raise_standard_error(self, op: RaiseStandardError) -> str:
if op.value is not None:
if isinstance(op.value, str):
return self.format("%r = raise %s(%s)", op, op.class_name, repr(op.value))
elif isinstance(op.value, Value):
return self.format("%r = raise %s(%r)", op, op.class_name, op.value)
else:
assert False, "value type must be either str or Value"
else:
return self.format("%r = raise %s", op, op.class_name)
def visit_call_c(self, op: CallC) -> str:
args_str = ", ".join(self.format("%r", arg) for arg in op.args)
if op.is_void:
return self.format("%s(%s)", op.function_name, args_str)
else:
return self.format("%r = %s(%s)", op, op.function_name, args_str)
def visit_truncate(self, op: Truncate) -> str:
return self.format("%r = truncate %r: %t to %t", op, op.src, op.src_type, op.type)
def visit_extend(self, op: Extend) -> str:
if op.signed:
extra = " signed"
else:
extra = ""
return self.format("%r = extend%s %r: %t to %t", op, extra, op.src, op.src_type, op.type)
def visit_load_global(self, op: LoadGlobal) -> str:
ann = f" ({repr(op.ann)})" if op.ann else ""
return self.format("%r = load_global %s :: static%s", op, op.identifier, ann)
def visit_int_op(self, op: IntOp) -> str:
return self.format("%r = %r %s %r", op, op.lhs, IntOp.op_str[op.op], op.rhs)
def visit_comparison_op(self, op: ComparisonOp) -> str:
if op.op in (ComparisonOp.SLT, ComparisonOp.SGT, ComparisonOp.SLE, ComparisonOp.SGE):
sign_format = " :: signed"
elif op.op in (ComparisonOp.ULT, ComparisonOp.UGT, ComparisonOp.ULE, ComparisonOp.UGE):
sign_format = " :: unsigned"
else:
sign_format = ""
return self.format(
"%r = %r %s %r%s", op, op.lhs, ComparisonOp.op_str[op.op], op.rhs, sign_format
)
def visit_float_op(self, op: FloatOp) -> str:
return self.format("%r = %r %s %r", op, op.lhs, FloatOp.op_str[op.op], op.rhs)
def visit_float_neg(self, op: FloatNeg) -> str:
return self.format("%r = -%r", op, op.src)
def visit_float_comparison_op(self, op: FloatComparisonOp) -> str:
return self.format("%r = %r %s %r", op, op.lhs, op.op_str[op.op], op.rhs)
def visit_load_mem(self, op: LoadMem) -> str:
return self.format("%r = load_mem %r :: %t*", op, op.src, op.type)
def visit_set_mem(self, op: SetMem) -> str:
return self.format("set_mem %r, %r :: %t*", op.dest, op.src, op.dest_type)
def visit_get_element_ptr(self, op: GetElementPtr) -> str:
return self.format("%r = get_element_ptr %r %s :: %t", op, op.src, op.field, op.src_type)
def visit_load_address(self, op: LoadAddress) -> str:
if isinstance(op.src, Register):
return self.format("%r = load_address %r", op, op.src)
elif isinstance(op.src, LoadStatic):
name = op.src.identifier
if op.src.module_name is not None:
name = f"{op.src.module_name}.{name}"
return self.format("%r = load_address %s :: %s", op, name, op.src.namespace)
else:
return self.format("%r = load_address %s", op, op.src)
def visit_keep_alive(self, op: KeepAlive) -> str:
return self.format("keep_alive %s" % ", ".join(self.format("%r", v) for v in op.src))
# Helpers
def format(self, fmt: str, *args: Any) -> str:
"""Helper for formatting strings.
These format sequences are supported in fmt:
%s: arbitrary object converted to string using str()
%r: name of IR value/register
%d: int
%f: float
%l: BasicBlock (formatted as label 'Ln')
%t: RType
"""
result = []
i = 0
arglist = list(args)
while i < len(fmt):
n = fmt.find("%", i)
if n < 0:
n = len(fmt)
result.append(fmt[i:n])
if n < len(fmt):
typespec = fmt[n + 1]
arg = arglist.pop(0)
if typespec == "r":
# Register/value
assert isinstance(arg, Value)
if isinstance(arg, Integer):
result.append(str(arg.value))
elif isinstance(arg, Float):
result.append(repr(arg.value))
else:
result.append(self.names[arg])
elif typespec == "d":
# Integer
result.append("%d" % arg)
elif typespec == "f":
# Float
result.append("%f" % arg)
elif typespec == "l":
# Basic block (label)
assert isinstance(arg, BasicBlock)
result.append("L%s" % arg.label)
elif typespec == "t":
# RType
assert isinstance(arg, RType)
result.append(arg.name)
elif typespec == "s":
# String
result.append(str(arg))
else:
raise ValueError(f"Invalid format sequence %{typespec}")
i = n + 2
else:
i = n
return "".join(result)
def format_registers(func_ir: FuncIR, names: dict[Value, str]) -> list[str]:
result = []
i = 0
regs = all_values_full(func_ir.arg_regs, func_ir.blocks)
while i < len(regs):
i0 = i
group = [names[regs[i0]]]
while i + 1 < len(regs) and regs[i + 1].type == regs[i0].type:
i += 1
group.append(names[regs[i]])
i += 1
result.append("{} :: {}".format(", ".join(group), regs[i0].type))
return result
def format_blocks(
blocks: list[BasicBlock],
names: dict[Value, str],
source_to_error: dict[ErrorSource, list[str]],
) -> list[str]:
"""Format a list of IR basic blocks into a human-readable form."""
# First label all of the blocks
for i, block in enumerate(blocks):
block.label = i
handler_map: dict[BasicBlock, list[BasicBlock]] = {}
for b in blocks:
if b.error_handler:
handler_map.setdefault(b.error_handler, []).append(b)
visitor = IRPrettyPrintVisitor(names)
lines = []
for i, block in enumerate(blocks):
handler_msg = ""
if block in handler_map:
labels = sorted("L%d" % b.label for b in handler_map[block])
handler_msg = " (handler for {})".format(", ".join(labels))
lines.append("L%d:%s" % (block.label, handler_msg))
if block in source_to_error:
for error in source_to_error[block]:
lines.append(f" ERR: {error}")
ops = block.ops
if (
isinstance(ops[-1], Goto)
and i + 1 < len(blocks)
and ops[-1].label == blocks[i + 1]
and not source_to_error.get(ops[-1], [])
):
# Hide the last goto if it just goes to the next basic block,
# and there are no assocatiated errors with the op.
ops = ops[:-1]
for op in ops:
line = " " + op.accept(visitor)
lines.append(line)
if op in source_to_error:
for error in source_to_error[op]:
lines.append(f" ERR: {error}")
if not isinstance(block.ops[-1], (Goto, Branch, Return, Unreachable)):
# Each basic block needs to exit somewhere.
lines.append(" [MISSING BLOCK EXIT OPCODE]")
return lines
def format_func(fn: FuncIR, errors: Sequence[tuple[ErrorSource, str]] = ()) -> list[str]:
lines = []
cls_prefix = fn.class_name + "." if fn.class_name else ""
lines.append(
"def {}{}({}):".format(cls_prefix, fn.name, ", ".join(arg.name for arg in fn.args))
)
names = generate_names_for_ir(fn.arg_regs, fn.blocks)
for line in format_registers(fn, names):
lines.append(" " + line)
source_to_error = defaultdict(list)
for source, error in errors:
source_to_error[source].append(error)
code = format_blocks(fn.blocks, names, source_to_error)
lines.extend(code)
return lines
def format_modules(modules: ModuleIRs) -> list[str]:
ops = []
for module in modules.values():
for fn in module.functions:
ops.extend(format_func(fn))
ops.append("")
return ops
def generate_names_for_ir(args: list[Register], blocks: list[BasicBlock]) -> dict[Value, str]:
"""Generate unique names for IR values.
Give names such as 'r5' to temp values in IR which are useful when
pretty-printing or generating C. Ensure generated names are unique.
"""
names: dict[Value, str] = {}
used_names = set()
temp_index = 0
for arg in args:
names[arg] = arg.name
used_names.add(arg.name)
for block in blocks:
for op in block.ops:
values = []
for source in op.sources():
if source not in names:
values.append(source)
if isinstance(op, (Assign, AssignMulti)):
values.append(op.dest)
elif isinstance(op, ControlOp) or op.is_void:
continue
elif op not in names:
values.append(op)
for value in values:
if value in names:
continue
if isinstance(value, Register) and value.name:
name = value.name
elif isinstance(value, (Integer, Float)):
continue
else:
name = "r%d" % temp_index
temp_index += 1
# Append _2, _3, ... if needed to make the name unique.
if name in used_names:
n = 2
while True:
candidate = "%s_%d" % (name, n)
if candidate not in used_names:
name = candidate
break
n += 1
names[value] = name
used_names.add(name)
return names

File diff suppressed because it is too large Load Diff