Major fixes and new features
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
Binary file not shown.
Binary file not shown.
436
venv/lib/python3.12/site-packages/mypyc/analysis/attrdefined.py
Normal file
436
venv/lib/python3.12/site-packages/mypyc/analysis/attrdefined.py
Normal file
@@ -0,0 +1,436 @@
|
||||
"""Always defined attribute analysis.
|
||||
|
||||
An always defined attribute has some statements in __init__ or the
|
||||
class body that cause the attribute to be always initialized when an
|
||||
instance is constructed. It must also not be possible to read the
|
||||
attribute before initialization, and it can't be deletable.
|
||||
|
||||
We can assume that the value is always defined when reading an always
|
||||
defined attribute. Otherwise we'll need to raise AttributeError if the
|
||||
value is undefined (i.e. has the error value).
|
||||
|
||||
We use data flow analysis to figure out attributes that are always
|
||||
defined. Example:
|
||||
|
||||
class C:
|
||||
def __init__(self) -> None:
|
||||
self.x = 0
|
||||
if func():
|
||||
self.y = 1
|
||||
else:
|
||||
self.y = 2
|
||||
self.z = 3
|
||||
|
||||
In this example, the attributes 'x' and 'y' are always defined, but 'z'
|
||||
is not. The analysis assumes that we know that there won't be any subclasses.
|
||||
|
||||
The analysis also works if there is a known, closed set of subclasses.
|
||||
An attribute defined in a base class can only be always defined if it's
|
||||
also always defined in all subclasses.
|
||||
|
||||
As soon as __init__ contains an op that can 'leak' self to another
|
||||
function, we will stop inferring always defined attributes, since the
|
||||
analysis is mostly intra-procedural and only looks at __init__ methods.
|
||||
The called code could read an uninitialized attribute. Example:
|
||||
|
||||
class C:
|
||||
def __init__(self) -> None:
|
||||
self.x = self.foo()
|
||||
|
||||
def foo(self) -> int:
|
||||
...
|
||||
|
||||
Now we won't infer 'x' as always defined, since 'foo' might read 'x'
|
||||
before initialization.
|
||||
|
||||
As an exception to the above limitation, we perform inter-procedural
|
||||
analysis of super().__init__ calls, since these are very common.
|
||||
|
||||
Our analysis is somewhat optimistic. We assume that nobody calls a
|
||||
method of a partially uninitialized object through gc.get_objects(), in
|
||||
particular. Code like this could potentially cause a segfault with a null
|
||||
pointer dereference. This seems very unlikely to be an issue in practice,
|
||||
however.
|
||||
|
||||
Accessing an attribute via getattr always checks for undefined attributes
|
||||
and thus works if the object is partially uninitialized. This can be used
|
||||
as a workaround if somebody ever needs to inspect partially uninitialized
|
||||
objects via gc.get_objects().
|
||||
|
||||
The analysis runs after IR building as a separate pass. Since we only
|
||||
run this on __init__ methods, this analysis pass will be fairly quick.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Final, Set, Tuple
|
||||
|
||||
from mypyc.analysis.dataflow import (
|
||||
CFG,
|
||||
MAYBE_ANALYSIS,
|
||||
AnalysisResult,
|
||||
BaseAnalysisVisitor,
|
||||
get_cfg,
|
||||
run_analysis,
|
||||
)
|
||||
from mypyc.analysis.selfleaks import analyze_self_leaks
|
||||
from mypyc.ir.class_ir import ClassIR
|
||||
from mypyc.ir.ops import (
|
||||
Assign,
|
||||
AssignMulti,
|
||||
BasicBlock,
|
||||
Branch,
|
||||
Call,
|
||||
ControlOp,
|
||||
GetAttr,
|
||||
Register,
|
||||
RegisterOp,
|
||||
Return,
|
||||
SetAttr,
|
||||
SetMem,
|
||||
Unreachable,
|
||||
)
|
||||
from mypyc.ir.rtypes import RInstance
|
||||
|
||||
# If True, print out all always-defined attributes of native classes (to aid
|
||||
# debugging and testing)
|
||||
dump_always_defined: Final = False
|
||||
|
||||
|
||||
def analyze_always_defined_attrs(class_irs: list[ClassIR]) -> None:
|
||||
"""Find always defined attributes all classes of a compilation unit.
|
||||
|
||||
Also tag attribute initialization ops to not decref the previous
|
||||
value (as this would read a NULL pointer and segfault).
|
||||
|
||||
Update the _always_initialized_attrs, _sometimes_initialized_attrs
|
||||
and init_self_leak attributes in ClassIR instances.
|
||||
|
||||
This is the main entry point.
|
||||
"""
|
||||
seen: set[ClassIR] = set()
|
||||
|
||||
# First pass: only look at target class and classes in MRO
|
||||
for cl in class_irs:
|
||||
analyze_always_defined_attrs_in_class(cl, seen)
|
||||
|
||||
# Second pass: look at all derived class
|
||||
seen = set()
|
||||
for cl in class_irs:
|
||||
update_always_defined_attrs_using_subclasses(cl, seen)
|
||||
|
||||
# Final pass: detect attributes that need to use a bitmap to track definedness
|
||||
seen = set()
|
||||
for cl in class_irs:
|
||||
detect_undefined_bitmap(cl, seen)
|
||||
|
||||
|
||||
def analyze_always_defined_attrs_in_class(cl: ClassIR, seen: set[ClassIR]) -> None:
|
||||
if cl in seen:
|
||||
return
|
||||
|
||||
seen.add(cl)
|
||||
|
||||
if (
|
||||
cl.is_trait
|
||||
or cl.inherits_python
|
||||
or cl.allow_interpreted_subclasses
|
||||
or cl.builtin_base is not None
|
||||
or cl.children is None
|
||||
or cl.is_serializable()
|
||||
):
|
||||
# Give up -- we can't enforce that attributes are always defined.
|
||||
return
|
||||
|
||||
# First analyze all base classes. Track seen classes to avoid duplicate work.
|
||||
for base in cl.mro[1:]:
|
||||
analyze_always_defined_attrs_in_class(base, seen)
|
||||
|
||||
m = cl.get_method("__init__")
|
||||
if m is None:
|
||||
cl._always_initialized_attrs = cl.attrs_with_defaults.copy()
|
||||
cl._sometimes_initialized_attrs = cl.attrs_with_defaults.copy()
|
||||
return
|
||||
self_reg = m.arg_regs[0]
|
||||
cfg = get_cfg(m.blocks)
|
||||
dirty = analyze_self_leaks(m.blocks, self_reg, cfg)
|
||||
maybe_defined = analyze_maybe_defined_attrs_in_init(
|
||||
m.blocks, self_reg, cl.attrs_with_defaults, cfg
|
||||
)
|
||||
all_attrs: set[str] = set()
|
||||
for base in cl.mro:
|
||||
all_attrs.update(base.attributes)
|
||||
maybe_undefined = analyze_maybe_undefined_attrs_in_init(
|
||||
m.blocks, self_reg, initial_undefined=all_attrs - cl.attrs_with_defaults, cfg=cfg
|
||||
)
|
||||
|
||||
always_defined = find_always_defined_attributes(
|
||||
m.blocks, self_reg, all_attrs, maybe_defined, maybe_undefined, dirty
|
||||
)
|
||||
always_defined = {a for a in always_defined if not cl.is_deletable(a)}
|
||||
|
||||
cl._always_initialized_attrs = always_defined
|
||||
if dump_always_defined:
|
||||
print(cl.name, sorted(always_defined))
|
||||
cl._sometimes_initialized_attrs = find_sometimes_defined_attributes(
|
||||
m.blocks, self_reg, maybe_defined, dirty
|
||||
)
|
||||
|
||||
mark_attr_initialiation_ops(m.blocks, self_reg, maybe_defined, dirty)
|
||||
|
||||
# Check if __init__ can run unpredictable code (leak 'self').
|
||||
any_dirty = False
|
||||
for b in m.blocks:
|
||||
for i, op in enumerate(b.ops):
|
||||
if dirty.after[b, i] and not isinstance(op, Return):
|
||||
any_dirty = True
|
||||
break
|
||||
cl.init_self_leak = any_dirty
|
||||
|
||||
|
||||
def find_always_defined_attributes(
|
||||
blocks: list[BasicBlock],
|
||||
self_reg: Register,
|
||||
all_attrs: set[str],
|
||||
maybe_defined: AnalysisResult[str],
|
||||
maybe_undefined: AnalysisResult[str],
|
||||
dirty: AnalysisResult[None],
|
||||
) -> set[str]:
|
||||
"""Find attributes that are always initialized in some basic blocks.
|
||||
|
||||
The analysis results are expected to be up-to-date for the blocks.
|
||||
|
||||
Return a set of always defined attributes.
|
||||
"""
|
||||
attrs = all_attrs.copy()
|
||||
for block in blocks:
|
||||
for i, op in enumerate(block.ops):
|
||||
# If an attribute we *read* may be undefined, it isn't always defined.
|
||||
if isinstance(op, GetAttr) and op.obj is self_reg:
|
||||
if op.attr in maybe_undefined.before[block, i]:
|
||||
attrs.discard(op.attr)
|
||||
# If an attribute we *set* may be sometimes undefined and
|
||||
# sometimes defined, don't consider it always defined. Unlike
|
||||
# the get case, it's fine for the attribute to be undefined.
|
||||
# The set operation will then be treated as initialization.
|
||||
if isinstance(op, SetAttr) and op.obj is self_reg:
|
||||
if (
|
||||
op.attr in maybe_undefined.before[block, i]
|
||||
and op.attr in maybe_defined.before[block, i]
|
||||
):
|
||||
attrs.discard(op.attr)
|
||||
# Treat an op that might run arbitrary code as an "exit"
|
||||
# in terms of the analysis -- we can't do any inference
|
||||
# afterwards reliably.
|
||||
if dirty.after[block, i]:
|
||||
if not dirty.before[block, i]:
|
||||
attrs = attrs & (
|
||||
maybe_defined.after[block, i] - maybe_undefined.after[block, i]
|
||||
)
|
||||
break
|
||||
if isinstance(op, ControlOp):
|
||||
for target in op.targets():
|
||||
# Gotos/branches can also be "exits".
|
||||
if not dirty.after[block, i] and dirty.before[target, 0]:
|
||||
attrs = attrs & (
|
||||
maybe_defined.after[target, 0] - maybe_undefined.after[target, 0]
|
||||
)
|
||||
return attrs
|
||||
|
||||
|
||||
def find_sometimes_defined_attributes(
|
||||
blocks: list[BasicBlock],
|
||||
self_reg: Register,
|
||||
maybe_defined: AnalysisResult[str],
|
||||
dirty: AnalysisResult[None],
|
||||
) -> set[str]:
|
||||
"""Find attributes that are sometimes initialized in some basic blocks."""
|
||||
attrs: set[str] = set()
|
||||
for block in blocks:
|
||||
for i, op in enumerate(block.ops):
|
||||
# Only look at possibly defined attributes at exits.
|
||||
if dirty.after[block, i]:
|
||||
if not dirty.before[block, i]:
|
||||
attrs = attrs | maybe_defined.after[block, i]
|
||||
break
|
||||
if isinstance(op, ControlOp):
|
||||
for target in op.targets():
|
||||
if not dirty.after[block, i] and dirty.before[target, 0]:
|
||||
attrs = attrs | maybe_defined.after[target, 0]
|
||||
return attrs
|
||||
|
||||
|
||||
def mark_attr_initialiation_ops(
|
||||
blocks: list[BasicBlock],
|
||||
self_reg: Register,
|
||||
maybe_defined: AnalysisResult[str],
|
||||
dirty: AnalysisResult[None],
|
||||
) -> None:
|
||||
"""Tag all SetAttr ops in the basic blocks that initialize attributes.
|
||||
|
||||
Initialization ops assume that the previous attribute value is the error value,
|
||||
so there's no need to decref or check for definedness.
|
||||
"""
|
||||
for block in blocks:
|
||||
for i, op in enumerate(block.ops):
|
||||
if isinstance(op, SetAttr) and op.obj is self_reg:
|
||||
attr = op.attr
|
||||
if attr not in maybe_defined.before[block, i] and not dirty.after[block, i]:
|
||||
op.mark_as_initializer()
|
||||
|
||||
|
||||
GenAndKill = Tuple[Set[str], Set[str]]
|
||||
|
||||
|
||||
def attributes_initialized_by_init_call(op: Call) -> set[str]:
|
||||
"""Calculate attributes that are always initialized by a super().__init__ call."""
|
||||
self_type = op.fn.sig.args[0].type
|
||||
assert isinstance(self_type, RInstance)
|
||||
cl = self_type.class_ir
|
||||
return {a for base in cl.mro for a in base.attributes if base.is_always_defined(a)}
|
||||
|
||||
|
||||
def attributes_maybe_initialized_by_init_call(op: Call) -> set[str]:
|
||||
"""Calculate attributes that may be initialized by a super().__init__ call."""
|
||||
self_type = op.fn.sig.args[0].type
|
||||
assert isinstance(self_type, RInstance)
|
||||
cl = self_type.class_ir
|
||||
return attributes_initialized_by_init_call(op) | cl._sometimes_initialized_attrs
|
||||
|
||||
|
||||
class AttributeMaybeDefinedVisitor(BaseAnalysisVisitor[str]):
|
||||
"""Find attributes that may have been defined via some code path.
|
||||
|
||||
Consider initializations in class body and assignments to 'self.x'
|
||||
and calls to base class '__init__'.
|
||||
"""
|
||||
|
||||
def __init__(self, self_reg: Register) -> None:
|
||||
self.self_reg = self_reg
|
||||
|
||||
def visit_branch(self, op: Branch) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
def visit_return(self, op: Return) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
def visit_unreachable(self, op: Unreachable) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
def visit_register_op(self, op: RegisterOp) -> tuple[set[str], set[str]]:
|
||||
if isinstance(op, SetAttr) and op.obj is self.self_reg:
|
||||
return {op.attr}, set()
|
||||
if isinstance(op, Call) and op.fn.class_name and op.fn.name == "__init__":
|
||||
return attributes_maybe_initialized_by_init_call(op), set()
|
||||
return set(), set()
|
||||
|
||||
def visit_assign(self, op: Assign) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
def visit_assign_multi(self, op: AssignMulti) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
def visit_set_mem(self, op: SetMem) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
|
||||
def analyze_maybe_defined_attrs_in_init(
|
||||
blocks: list[BasicBlock], self_reg: Register, attrs_with_defaults: set[str], cfg: CFG
|
||||
) -> AnalysisResult[str]:
|
||||
return run_analysis(
|
||||
blocks=blocks,
|
||||
cfg=cfg,
|
||||
gen_and_kill=AttributeMaybeDefinedVisitor(self_reg),
|
||||
initial=attrs_with_defaults,
|
||||
backward=False,
|
||||
kind=MAYBE_ANALYSIS,
|
||||
)
|
||||
|
||||
|
||||
class AttributeMaybeUndefinedVisitor(BaseAnalysisVisitor[str]):
|
||||
"""Find attributes that may be undefined via some code path.
|
||||
|
||||
Consider initializations in class body, assignments to 'self.x'
|
||||
and calls to base class '__init__'.
|
||||
"""
|
||||
|
||||
def __init__(self, self_reg: Register) -> None:
|
||||
self.self_reg = self_reg
|
||||
|
||||
def visit_branch(self, op: Branch) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
def visit_return(self, op: Return) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
def visit_unreachable(self, op: Unreachable) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
def visit_register_op(self, op: RegisterOp) -> tuple[set[str], set[str]]:
|
||||
if isinstance(op, SetAttr) and op.obj is self.self_reg:
|
||||
return set(), {op.attr}
|
||||
if isinstance(op, Call) and op.fn.class_name and op.fn.name == "__init__":
|
||||
return set(), attributes_initialized_by_init_call(op)
|
||||
return set(), set()
|
||||
|
||||
def visit_assign(self, op: Assign) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
def visit_assign_multi(self, op: AssignMulti) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
def visit_set_mem(self, op: SetMem) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
|
||||
def analyze_maybe_undefined_attrs_in_init(
|
||||
blocks: list[BasicBlock], self_reg: Register, initial_undefined: set[str], cfg: CFG
|
||||
) -> AnalysisResult[str]:
|
||||
return run_analysis(
|
||||
blocks=blocks,
|
||||
cfg=cfg,
|
||||
gen_and_kill=AttributeMaybeUndefinedVisitor(self_reg),
|
||||
initial=initial_undefined,
|
||||
backward=False,
|
||||
kind=MAYBE_ANALYSIS,
|
||||
)
|
||||
|
||||
|
||||
def update_always_defined_attrs_using_subclasses(cl: ClassIR, seen: set[ClassIR]) -> None:
|
||||
"""Remove attributes not defined in all subclasses from always defined attrs."""
|
||||
if cl in seen:
|
||||
return
|
||||
if cl.children is None:
|
||||
# Subclasses are unknown
|
||||
return
|
||||
removed = set()
|
||||
for attr in cl._always_initialized_attrs:
|
||||
for child in cl.children:
|
||||
update_always_defined_attrs_using_subclasses(child, seen)
|
||||
if attr not in child._always_initialized_attrs:
|
||||
removed.add(attr)
|
||||
cl._always_initialized_attrs -= removed
|
||||
seen.add(cl)
|
||||
|
||||
|
||||
def detect_undefined_bitmap(cl: ClassIR, seen: set[ClassIR]) -> None:
|
||||
if cl.is_trait:
|
||||
return
|
||||
|
||||
if cl in seen:
|
||||
return
|
||||
seen.add(cl)
|
||||
for base in cl.base_mro[1:]:
|
||||
detect_undefined_bitmap(cl, seen)
|
||||
|
||||
if len(cl.base_mro) > 1:
|
||||
cl.bitmap_attrs.extend(cl.base_mro[1].bitmap_attrs)
|
||||
for n, t in cl.attributes.items():
|
||||
if t.error_overlap and not cl.is_always_defined(n):
|
||||
cl.bitmap_attrs.append(n)
|
||||
|
||||
for base in cl.mro[1:]:
|
||||
if base.is_trait:
|
||||
for n, t in base.attributes.items():
|
||||
if t.error_overlap and not cl.is_always_defined(n) and n not in cl.bitmap_attrs:
|
||||
cl.bitmap_attrs.append(n)
|
||||
Binary file not shown.
@@ -0,0 +1,32 @@
|
||||
"""Find basic blocks that are likely to be executed frequently.
|
||||
|
||||
For example, this would not include blocks that have exception handlers.
|
||||
|
||||
We can use different optimization heuristics for common and rare code. For
|
||||
example, we can make IR fast to compile instead of fast to execute for rare
|
||||
code.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from mypyc.ir.ops import BasicBlock, Branch, Goto
|
||||
|
||||
|
||||
def frequently_executed_blocks(entry_point: BasicBlock) -> set[BasicBlock]:
|
||||
result: set[BasicBlock] = set()
|
||||
worklist = [entry_point]
|
||||
while worklist:
|
||||
block = worklist.pop()
|
||||
if block in result:
|
||||
continue
|
||||
result.add(block)
|
||||
t = block.terminator
|
||||
if isinstance(t, Goto):
|
||||
worklist.append(t.label)
|
||||
elif isinstance(t, Branch):
|
||||
if t.rare or t.traceback_entry is not None:
|
||||
worklist.append(t.false)
|
||||
else:
|
||||
worklist.append(t.true)
|
||||
worklist.append(t.false)
|
||||
return result
|
||||
Binary file not shown.
623
venv/lib/python3.12/site-packages/mypyc/analysis/dataflow.py
Normal file
623
venv/lib/python3.12/site-packages/mypyc/analysis/dataflow.py
Normal file
@@ -0,0 +1,623 @@
|
||||
"""Data-flow analyses."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import abstractmethod
|
||||
from typing import Dict, Generic, Iterable, Iterator, Set, Tuple, TypeVar
|
||||
|
||||
from mypyc.ir.func_ir import all_values
|
||||
from mypyc.ir.ops import (
|
||||
Assign,
|
||||
AssignMulti,
|
||||
BasicBlock,
|
||||
Box,
|
||||
Branch,
|
||||
Call,
|
||||
CallC,
|
||||
Cast,
|
||||
ComparisonOp,
|
||||
ControlOp,
|
||||
Extend,
|
||||
Float,
|
||||
FloatComparisonOp,
|
||||
FloatNeg,
|
||||
FloatOp,
|
||||
GetAttr,
|
||||
GetElementPtr,
|
||||
Goto,
|
||||
InitStatic,
|
||||
Integer,
|
||||
IntOp,
|
||||
KeepAlive,
|
||||
LoadAddress,
|
||||
LoadErrorValue,
|
||||
LoadGlobal,
|
||||
LoadLiteral,
|
||||
LoadMem,
|
||||
LoadStatic,
|
||||
MethodCall,
|
||||
Op,
|
||||
OpVisitor,
|
||||
RaiseStandardError,
|
||||
RegisterOp,
|
||||
Return,
|
||||
SetAttr,
|
||||
SetMem,
|
||||
Truncate,
|
||||
TupleGet,
|
||||
TupleSet,
|
||||
Unbox,
|
||||
Unreachable,
|
||||
Value,
|
||||
)
|
||||
|
||||
|
||||
class CFG:
|
||||
"""Control-flow graph.
|
||||
|
||||
Node 0 is always assumed to be the entry point. There must be a
|
||||
non-empty set of exits.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
succ: dict[BasicBlock, list[BasicBlock]],
|
||||
pred: dict[BasicBlock, list[BasicBlock]],
|
||||
exits: set[BasicBlock],
|
||||
) -> None:
|
||||
assert exits
|
||||
self.succ = succ
|
||||
self.pred = pred
|
||||
self.exits = exits
|
||||
|
||||
def __str__(self) -> str:
|
||||
lines = []
|
||||
lines.append("exits: %s" % sorted(self.exits, key=lambda e: int(e.label)))
|
||||
lines.append("succ: %s" % self.succ)
|
||||
lines.append("pred: %s" % self.pred)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def get_cfg(blocks: list[BasicBlock]) -> CFG:
|
||||
"""Calculate basic block control-flow graph.
|
||||
|
||||
The result is a dictionary like this:
|
||||
|
||||
basic block index -> (successors blocks, predecesssor blocks)
|
||||
"""
|
||||
succ_map = {}
|
||||
pred_map: dict[BasicBlock, list[BasicBlock]] = {}
|
||||
exits = set()
|
||||
for block in blocks:
|
||||
assert not any(
|
||||
isinstance(op, ControlOp) for op in block.ops[:-1]
|
||||
), "Control-flow ops must be at the end of blocks"
|
||||
|
||||
succ = list(block.terminator.targets())
|
||||
if not succ:
|
||||
exits.add(block)
|
||||
|
||||
# Errors can occur anywhere inside a block, which means that
|
||||
# we can't assume that the entire block has executed before
|
||||
# jumping to the error handler. In our CFG construction, we
|
||||
# model this as saying that a block can jump to its error
|
||||
# handler or the error handlers of any of its normal
|
||||
# successors (to represent an error before that next block
|
||||
# completes). This works well for analyses like "must
|
||||
# defined", where it implies that registers assigned in a
|
||||
# block may be undefined in its error handler, but is in
|
||||
# general not a precise representation of reality; any
|
||||
# analyses that require more fidelity must wait until after
|
||||
# exception insertion.
|
||||
for error_point in [block] + succ:
|
||||
if error_point.error_handler:
|
||||
succ.append(error_point.error_handler)
|
||||
|
||||
succ_map[block] = succ
|
||||
pred_map[block] = []
|
||||
for prev, nxt in succ_map.items():
|
||||
for label in nxt:
|
||||
pred_map[label].append(prev)
|
||||
return CFG(succ_map, pred_map, exits)
|
||||
|
||||
|
||||
def get_real_target(label: BasicBlock) -> BasicBlock:
|
||||
if len(label.ops) == 1 and isinstance(label.ops[-1], Goto):
|
||||
label = label.ops[-1].label
|
||||
return label
|
||||
|
||||
|
||||
def cleanup_cfg(blocks: list[BasicBlock]) -> None:
|
||||
"""Cleanup the control flow graph.
|
||||
|
||||
This eliminates obviously dead basic blocks and eliminates blocks that contain
|
||||
nothing but a single jump.
|
||||
|
||||
There is a lot more that could be done.
|
||||
"""
|
||||
changed = True
|
||||
while changed:
|
||||
# First collapse any jumps to basic block that only contain a goto
|
||||
for block in blocks:
|
||||
for i, tgt in enumerate(block.terminator.targets()):
|
||||
block.terminator.set_target(i, get_real_target(tgt))
|
||||
|
||||
# Then delete any blocks that have no predecessors
|
||||
changed = False
|
||||
cfg = get_cfg(blocks)
|
||||
orig_blocks = blocks.copy()
|
||||
blocks.clear()
|
||||
for i, block in enumerate(orig_blocks):
|
||||
if i == 0 or cfg.pred[block]:
|
||||
blocks.append(block)
|
||||
else:
|
||||
changed = True
|
||||
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
AnalysisDict = Dict[Tuple[BasicBlock, int], Set[T]]
|
||||
|
||||
|
||||
class AnalysisResult(Generic[T]):
|
||||
def __init__(self, before: AnalysisDict[T], after: AnalysisDict[T]) -> None:
|
||||
self.before = before
|
||||
self.after = after
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"before: {self.before}\nafter: {self.after}\n"
|
||||
|
||||
|
||||
GenAndKill = Tuple[Set[T], Set[T]]
|
||||
|
||||
|
||||
class BaseAnalysisVisitor(OpVisitor[GenAndKill[T]]):
|
||||
def visit_goto(self, op: Goto) -> GenAndKill[T]:
|
||||
return set(), set()
|
||||
|
||||
@abstractmethod
|
||||
def visit_register_op(self, op: RegisterOp) -> GenAndKill[T]:
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def visit_assign(self, op: Assign) -> GenAndKill[T]:
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def visit_assign_multi(self, op: AssignMulti) -> GenAndKill[T]:
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def visit_set_mem(self, op: SetMem) -> GenAndKill[T]:
|
||||
raise NotImplementedError
|
||||
|
||||
def visit_call(self, op: Call) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_method_call(self, op: MethodCall) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_load_error_value(self, op: LoadErrorValue) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_load_literal(self, op: LoadLiteral) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_get_attr(self, op: GetAttr) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_set_attr(self, op: SetAttr) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_load_static(self, op: LoadStatic) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_init_static(self, op: InitStatic) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_tuple_get(self, op: TupleGet) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_tuple_set(self, op: TupleSet) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_box(self, op: Box) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_unbox(self, op: Unbox) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_cast(self, op: Cast) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_raise_standard_error(self, op: RaiseStandardError) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_call_c(self, op: CallC) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_truncate(self, op: Truncate) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_extend(self, op: Extend) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_load_global(self, op: LoadGlobal) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_int_op(self, op: IntOp) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_float_op(self, op: FloatOp) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_float_neg(self, op: FloatNeg) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_comparison_op(self, op: ComparisonOp) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_float_comparison_op(self, op: FloatComparisonOp) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_load_mem(self, op: LoadMem) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_get_element_ptr(self, op: GetElementPtr) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_load_address(self, op: LoadAddress) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
def visit_keep_alive(self, op: KeepAlive) -> GenAndKill[T]:
|
||||
return self.visit_register_op(op)
|
||||
|
||||
|
||||
class DefinedVisitor(BaseAnalysisVisitor[Value]):
|
||||
"""Visitor for finding defined registers.
|
||||
|
||||
Note that this only deals with registers and not temporaries, on
|
||||
the assumption that we never access temporaries when they might be
|
||||
undefined.
|
||||
|
||||
If strict_errors is True, then we regard any use of LoadErrorValue
|
||||
as making a register undefined. Otherwise we only do if
|
||||
`undefines` is set on the error value.
|
||||
|
||||
This lets us only consider the things we care about during
|
||||
uninitialized variable checking while capturing all possibly
|
||||
undefined things for refcounting.
|
||||
"""
|
||||
|
||||
def __init__(self, strict_errors: bool = False) -> None:
|
||||
self.strict_errors = strict_errors
|
||||
|
||||
def visit_branch(self, op: Branch) -> GenAndKill[Value]:
|
||||
return set(), set()
|
||||
|
||||
def visit_return(self, op: Return) -> GenAndKill[Value]:
|
||||
return set(), set()
|
||||
|
||||
def visit_unreachable(self, op: Unreachable) -> GenAndKill[Value]:
|
||||
return set(), set()
|
||||
|
||||
def visit_register_op(self, op: RegisterOp) -> GenAndKill[Value]:
|
||||
return set(), set()
|
||||
|
||||
def visit_assign(self, op: Assign) -> GenAndKill[Value]:
|
||||
# Loading an error value may undefine the register.
|
||||
if isinstance(op.src, LoadErrorValue) and (op.src.undefines or self.strict_errors):
|
||||
return set(), {op.dest}
|
||||
else:
|
||||
return {op.dest}, set()
|
||||
|
||||
def visit_assign_multi(self, op: AssignMulti) -> GenAndKill[Value]:
|
||||
# Array registers are special and we don't track the definedness of them.
|
||||
return set(), set()
|
||||
|
||||
def visit_set_mem(self, op: SetMem) -> GenAndKill[Value]:
|
||||
return set(), set()
|
||||
|
||||
|
||||
def analyze_maybe_defined_regs(
|
||||
blocks: list[BasicBlock], cfg: CFG, initial_defined: set[Value]
|
||||
) -> AnalysisResult[Value]:
|
||||
"""Calculate potentially defined registers at each CFG location.
|
||||
|
||||
A register is defined if it has a value along some path from the initial location.
|
||||
"""
|
||||
return run_analysis(
|
||||
blocks=blocks,
|
||||
cfg=cfg,
|
||||
gen_and_kill=DefinedVisitor(),
|
||||
initial=initial_defined,
|
||||
backward=False,
|
||||
kind=MAYBE_ANALYSIS,
|
||||
)
|
||||
|
||||
|
||||
def analyze_must_defined_regs(
|
||||
blocks: list[BasicBlock],
|
||||
cfg: CFG,
|
||||
initial_defined: set[Value],
|
||||
regs: Iterable[Value],
|
||||
strict_errors: bool = False,
|
||||
) -> AnalysisResult[Value]:
|
||||
"""Calculate always defined registers at each CFG location.
|
||||
|
||||
This analysis can work before exception insertion, since it is a
|
||||
sound assumption that registers defined in a block might not be
|
||||
initialized in its error handler.
|
||||
|
||||
A register is defined if it has a value along all paths from the
|
||||
initial location.
|
||||
"""
|
||||
return run_analysis(
|
||||
blocks=blocks,
|
||||
cfg=cfg,
|
||||
gen_and_kill=DefinedVisitor(strict_errors=strict_errors),
|
||||
initial=initial_defined,
|
||||
backward=False,
|
||||
kind=MUST_ANALYSIS,
|
||||
universe=set(regs),
|
||||
)
|
||||
|
||||
|
||||
class BorrowedArgumentsVisitor(BaseAnalysisVisitor[Value]):
|
||||
def __init__(self, args: set[Value]) -> None:
|
||||
self.args = args
|
||||
|
||||
def visit_branch(self, op: Branch) -> GenAndKill[Value]:
|
||||
return set(), set()
|
||||
|
||||
def visit_return(self, op: Return) -> GenAndKill[Value]:
|
||||
return set(), set()
|
||||
|
||||
def visit_unreachable(self, op: Unreachable) -> GenAndKill[Value]:
|
||||
return set(), set()
|
||||
|
||||
def visit_register_op(self, op: RegisterOp) -> GenAndKill[Value]:
|
||||
return set(), set()
|
||||
|
||||
def visit_assign(self, op: Assign) -> GenAndKill[Value]:
|
||||
if op.dest in self.args:
|
||||
return set(), {op.dest}
|
||||
return set(), set()
|
||||
|
||||
def visit_assign_multi(self, op: AssignMulti) -> GenAndKill[Value]:
|
||||
return set(), set()
|
||||
|
||||
def visit_set_mem(self, op: SetMem) -> GenAndKill[Value]:
|
||||
return set(), set()
|
||||
|
||||
|
||||
def analyze_borrowed_arguments(
|
||||
blocks: list[BasicBlock], cfg: CFG, borrowed: set[Value]
|
||||
) -> AnalysisResult[Value]:
|
||||
"""Calculate arguments that can use references borrowed from the caller.
|
||||
|
||||
When assigning to an argument, it no longer is borrowed.
|
||||
"""
|
||||
return run_analysis(
|
||||
blocks=blocks,
|
||||
cfg=cfg,
|
||||
gen_and_kill=BorrowedArgumentsVisitor(borrowed),
|
||||
initial=borrowed,
|
||||
backward=False,
|
||||
kind=MUST_ANALYSIS,
|
||||
universe=borrowed,
|
||||
)
|
||||
|
||||
|
||||
class UndefinedVisitor(BaseAnalysisVisitor[Value]):
|
||||
def visit_branch(self, op: Branch) -> GenAndKill[Value]:
|
||||
return set(), set()
|
||||
|
||||
def visit_return(self, op: Return) -> GenAndKill[Value]:
|
||||
return set(), set()
|
||||
|
||||
def visit_unreachable(self, op: Unreachable) -> GenAndKill[Value]:
|
||||
return set(), set()
|
||||
|
||||
def visit_register_op(self, op: RegisterOp) -> GenAndKill[Value]:
|
||||
return set(), {op} if not op.is_void else set()
|
||||
|
||||
def visit_assign(self, op: Assign) -> GenAndKill[Value]:
|
||||
return set(), {op.dest}
|
||||
|
||||
def visit_assign_multi(self, op: AssignMulti) -> GenAndKill[Value]:
|
||||
return set(), {op.dest}
|
||||
|
||||
def visit_set_mem(self, op: SetMem) -> GenAndKill[Value]:
|
||||
return set(), set()
|
||||
|
||||
|
||||
def analyze_undefined_regs(
|
||||
blocks: list[BasicBlock], cfg: CFG, initial_defined: set[Value]
|
||||
) -> AnalysisResult[Value]:
|
||||
"""Calculate potentially undefined registers at each CFG location.
|
||||
|
||||
A register is undefined if there is some path from initial block
|
||||
where it has an undefined value.
|
||||
|
||||
Function arguments are assumed to be always defined.
|
||||
"""
|
||||
initial_undefined = set(all_values([], blocks)) - initial_defined
|
||||
return run_analysis(
|
||||
blocks=blocks,
|
||||
cfg=cfg,
|
||||
gen_and_kill=UndefinedVisitor(),
|
||||
initial=initial_undefined,
|
||||
backward=False,
|
||||
kind=MAYBE_ANALYSIS,
|
||||
)
|
||||
|
||||
|
||||
def non_trivial_sources(op: Op) -> set[Value]:
|
||||
result = set()
|
||||
for source in op.sources():
|
||||
if not isinstance(source, (Integer, Float)):
|
||||
result.add(source)
|
||||
return result
|
||||
|
||||
|
||||
class LivenessVisitor(BaseAnalysisVisitor[Value]):
|
||||
def visit_branch(self, op: Branch) -> GenAndKill[Value]:
|
||||
return non_trivial_sources(op), set()
|
||||
|
||||
def visit_return(self, op: Return) -> GenAndKill[Value]:
|
||||
if not isinstance(op.value, (Integer, Float)):
|
||||
return {op.value}, set()
|
||||
else:
|
||||
return set(), set()
|
||||
|
||||
def visit_unreachable(self, op: Unreachable) -> GenAndKill[Value]:
|
||||
return set(), set()
|
||||
|
||||
def visit_register_op(self, op: RegisterOp) -> GenAndKill[Value]:
|
||||
gen = non_trivial_sources(op)
|
||||
if not op.is_void:
|
||||
return gen, {op}
|
||||
else:
|
||||
return gen, set()
|
||||
|
||||
def visit_assign(self, op: Assign) -> GenAndKill[Value]:
|
||||
return non_trivial_sources(op), {op.dest}
|
||||
|
||||
def visit_assign_multi(self, op: AssignMulti) -> GenAndKill[Value]:
|
||||
return non_trivial_sources(op), {op.dest}
|
||||
|
||||
def visit_set_mem(self, op: SetMem) -> GenAndKill[Value]:
|
||||
return non_trivial_sources(op), set()
|
||||
|
||||
|
||||
def analyze_live_regs(blocks: list[BasicBlock], cfg: CFG) -> AnalysisResult[Value]:
|
||||
"""Calculate live registers at each CFG location.
|
||||
|
||||
A register is live at a location if it can be read along some CFG path starting
|
||||
from the location.
|
||||
"""
|
||||
return run_analysis(
|
||||
blocks=blocks,
|
||||
cfg=cfg,
|
||||
gen_and_kill=LivenessVisitor(),
|
||||
initial=set(),
|
||||
backward=True,
|
||||
kind=MAYBE_ANALYSIS,
|
||||
)
|
||||
|
||||
|
||||
# Analysis kinds
|
||||
MUST_ANALYSIS = 0
|
||||
MAYBE_ANALYSIS = 1
|
||||
|
||||
|
||||
def run_analysis(
|
||||
blocks: list[BasicBlock],
|
||||
cfg: CFG,
|
||||
gen_and_kill: OpVisitor[GenAndKill[T]],
|
||||
initial: set[T],
|
||||
kind: int,
|
||||
backward: bool,
|
||||
universe: set[T] | None = None,
|
||||
) -> AnalysisResult[T]:
|
||||
"""Run a general set-based data flow analysis.
|
||||
|
||||
Args:
|
||||
blocks: All basic blocks
|
||||
cfg: Control-flow graph for the code
|
||||
gen_and_kill: Implementation of gen and kill functions for each op
|
||||
initial: Value of analysis for the entry points (for a forward analysis) or the
|
||||
exit points (for a backward analysis)
|
||||
kind: MUST_ANALYSIS or MAYBE_ANALYSIS
|
||||
backward: If False, the analysis is a forward analysis; it's backward otherwise
|
||||
universe: For a must analysis, the set of all possible values. This is the starting
|
||||
value for the work list algorithm, which will narrow this down until reaching a
|
||||
fixed point. For a maybe analysis the iteration always starts from an empty set
|
||||
and this argument is ignored.
|
||||
|
||||
Return analysis results: (before, after)
|
||||
"""
|
||||
block_gen = {}
|
||||
block_kill = {}
|
||||
|
||||
# Calculate kill and gen sets for entire basic blocks.
|
||||
for block in blocks:
|
||||
gen: set[T] = set()
|
||||
kill: set[T] = set()
|
||||
ops = block.ops
|
||||
if backward:
|
||||
ops = list(reversed(ops))
|
||||
for op in ops:
|
||||
opgen, opkill = op.accept(gen_and_kill)
|
||||
gen = (gen - opkill) | opgen
|
||||
kill = (kill - opgen) | opkill
|
||||
block_gen[block] = gen
|
||||
block_kill[block] = kill
|
||||
|
||||
# Set up initial state for worklist algorithm.
|
||||
worklist = list(blocks)
|
||||
if not backward:
|
||||
worklist = worklist[::-1] # Reverse for a small performance improvement
|
||||
workset = set(worklist)
|
||||
before: dict[BasicBlock, set[T]] = {}
|
||||
after: dict[BasicBlock, set[T]] = {}
|
||||
for block in blocks:
|
||||
if kind == MAYBE_ANALYSIS:
|
||||
before[block] = set()
|
||||
after[block] = set()
|
||||
else:
|
||||
assert universe is not None, "Universe must be defined for a must analysis"
|
||||
before[block] = set(universe)
|
||||
after[block] = set(universe)
|
||||
|
||||
if backward:
|
||||
pred_map = cfg.succ
|
||||
succ_map = cfg.pred
|
||||
else:
|
||||
pred_map = cfg.pred
|
||||
succ_map = cfg.succ
|
||||
|
||||
# Run work list algorithm to generate in and out sets for each basic block.
|
||||
while worklist:
|
||||
label = worklist.pop()
|
||||
workset.remove(label)
|
||||
if pred_map[label]:
|
||||
new_before: set[T] | None = None
|
||||
for pred in pred_map[label]:
|
||||
if new_before is None:
|
||||
new_before = set(after[pred])
|
||||
elif kind == MAYBE_ANALYSIS:
|
||||
new_before |= after[pred]
|
||||
else:
|
||||
new_before &= after[pred]
|
||||
assert new_before is not None
|
||||
else:
|
||||
new_before = set(initial)
|
||||
before[label] = new_before
|
||||
new_after = (new_before - block_kill[label]) | block_gen[label]
|
||||
if new_after != after[label]:
|
||||
for succ in succ_map[label]:
|
||||
if succ not in workset:
|
||||
worklist.append(succ)
|
||||
workset.add(succ)
|
||||
after[label] = new_after
|
||||
|
||||
# Run algorithm for each basic block to generate opcode-level sets.
|
||||
op_before: dict[tuple[BasicBlock, int], set[T]] = {}
|
||||
op_after: dict[tuple[BasicBlock, int], set[T]] = {}
|
||||
for block in blocks:
|
||||
label = block
|
||||
cur = before[label]
|
||||
ops_enum: Iterator[tuple[int, Op]] = enumerate(block.ops)
|
||||
if backward:
|
||||
ops_enum = reversed(list(ops_enum))
|
||||
for idx, op in ops_enum:
|
||||
op_before[label, idx] = cur
|
||||
opgen, opkill = op.accept(gen_and_kill)
|
||||
cur = (cur - opkill) | opgen
|
||||
op_after[label, idx] = cur
|
||||
if backward:
|
||||
op_after, op_before = op_before, op_after
|
||||
|
||||
return AnalysisResult(op_before, op_after)
|
||||
Binary file not shown.
424
venv/lib/python3.12/site-packages/mypyc/analysis/ircheck.py
Normal file
424
venv/lib/python3.12/site-packages/mypyc/analysis/ircheck.py
Normal file
@@ -0,0 +1,424 @@
|
||||
"""Utilities for checking that internal ir is valid and consistent."""
|
||||
from __future__ import annotations
|
||||
|
||||
from mypyc.ir.func_ir import FUNC_STATICMETHOD, FuncIR
|
||||
from mypyc.ir.ops import (
|
||||
Assign,
|
||||
AssignMulti,
|
||||
BaseAssign,
|
||||
BasicBlock,
|
||||
Box,
|
||||
Branch,
|
||||
Call,
|
||||
CallC,
|
||||
Cast,
|
||||
ComparisonOp,
|
||||
ControlOp,
|
||||
DecRef,
|
||||
Extend,
|
||||
FloatComparisonOp,
|
||||
FloatNeg,
|
||||
FloatOp,
|
||||
GetAttr,
|
||||
GetElementPtr,
|
||||
Goto,
|
||||
IncRef,
|
||||
InitStatic,
|
||||
Integer,
|
||||
IntOp,
|
||||
KeepAlive,
|
||||
LoadAddress,
|
||||
LoadErrorValue,
|
||||
LoadGlobal,
|
||||
LoadLiteral,
|
||||
LoadMem,
|
||||
LoadStatic,
|
||||
MethodCall,
|
||||
Op,
|
||||
OpVisitor,
|
||||
RaiseStandardError,
|
||||
Register,
|
||||
Return,
|
||||
SetAttr,
|
||||
SetMem,
|
||||
Truncate,
|
||||
TupleGet,
|
||||
TupleSet,
|
||||
Unbox,
|
||||
Unreachable,
|
||||
Value,
|
||||
)
|
||||
from mypyc.ir.pprint import format_func
|
||||
from mypyc.ir.rtypes import (
|
||||
RArray,
|
||||
RInstance,
|
||||
RPrimitive,
|
||||
RType,
|
||||
RUnion,
|
||||
bytes_rprimitive,
|
||||
dict_rprimitive,
|
||||
int_rprimitive,
|
||||
is_float_rprimitive,
|
||||
is_object_rprimitive,
|
||||
list_rprimitive,
|
||||
range_rprimitive,
|
||||
set_rprimitive,
|
||||
str_rprimitive,
|
||||
tuple_rprimitive,
|
||||
)
|
||||
|
||||
|
||||
class FnError:
|
||||
def __init__(self, source: Op | BasicBlock, desc: str) -> None:
|
||||
self.source = source
|
||||
self.desc = desc
|
||||
|
||||
def __eq__(self, other: object) -> bool:
|
||||
return (
|
||||
isinstance(other, FnError) and self.source == other.source and self.desc == other.desc
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"FnError(source={self.source}, desc={self.desc})"
|
||||
|
||||
|
||||
def check_func_ir(fn: FuncIR) -> list[FnError]:
|
||||
"""Applies validations to a given function ir and returns a list of errors found."""
|
||||
errors = []
|
||||
|
||||
op_set = set()
|
||||
|
||||
for block in fn.blocks:
|
||||
if not block.terminated:
|
||||
errors.append(
|
||||
FnError(source=block.ops[-1] if block.ops else block, desc="Block not terminated")
|
||||
)
|
||||
for op in block.ops[:-1]:
|
||||
if isinstance(op, ControlOp):
|
||||
errors.append(FnError(source=op, desc="Block has operations after control op"))
|
||||
|
||||
if op in op_set:
|
||||
errors.append(FnError(source=op, desc="Func has a duplicate op"))
|
||||
op_set.add(op)
|
||||
|
||||
errors.extend(check_op_sources_valid(fn))
|
||||
if errors:
|
||||
return errors
|
||||
|
||||
op_checker = OpChecker(fn)
|
||||
for block in fn.blocks:
|
||||
for op in block.ops:
|
||||
op.accept(op_checker)
|
||||
|
||||
return op_checker.errors
|
||||
|
||||
|
||||
class IrCheckException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def assert_func_ir_valid(fn: FuncIR) -> None:
|
||||
errors = check_func_ir(fn)
|
||||
if errors:
|
||||
raise IrCheckException(
|
||||
"Internal error: Generated invalid IR: \n"
|
||||
+ "\n".join(format_func(fn, [(e.source, e.desc) for e in errors]))
|
||||
)
|
||||
|
||||
|
||||
def check_op_sources_valid(fn: FuncIR) -> list[FnError]:
|
||||
errors = []
|
||||
valid_ops: set[Op] = set()
|
||||
valid_registers: set[Register] = set()
|
||||
|
||||
for block in fn.blocks:
|
||||
valid_ops.update(block.ops)
|
||||
|
||||
for op in block.ops:
|
||||
if isinstance(op, BaseAssign):
|
||||
valid_registers.add(op.dest)
|
||||
elif isinstance(op, LoadAddress) and isinstance(op.src, Register):
|
||||
valid_registers.add(op.src)
|
||||
|
||||
valid_registers.update(fn.arg_regs)
|
||||
|
||||
for block in fn.blocks:
|
||||
for op in block.ops:
|
||||
for source in op.sources():
|
||||
if isinstance(source, Integer):
|
||||
pass
|
||||
elif isinstance(source, Op):
|
||||
if source not in valid_ops:
|
||||
errors.append(
|
||||
FnError(
|
||||
source=op,
|
||||
desc=f"Invalid op reference to op of type {type(source).__name__}",
|
||||
)
|
||||
)
|
||||
elif isinstance(source, Register):
|
||||
if source not in valid_registers:
|
||||
errors.append(
|
||||
FnError(
|
||||
source=op, desc=f"Invalid op reference to register {source.name!r}"
|
||||
)
|
||||
)
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
disjoint_types = {
|
||||
int_rprimitive.name,
|
||||
bytes_rprimitive.name,
|
||||
str_rprimitive.name,
|
||||
dict_rprimitive.name,
|
||||
list_rprimitive.name,
|
||||
set_rprimitive.name,
|
||||
tuple_rprimitive.name,
|
||||
range_rprimitive.name,
|
||||
}
|
||||
|
||||
|
||||
def can_coerce_to(src: RType, dest: RType) -> bool:
|
||||
"""Check if src can be assigned to dest_rtype.
|
||||
|
||||
Currently okay to have false positives.
|
||||
"""
|
||||
if isinstance(dest, RUnion):
|
||||
return any(can_coerce_to(src, d) for d in dest.items)
|
||||
|
||||
if isinstance(dest, RPrimitive):
|
||||
if isinstance(src, RPrimitive):
|
||||
# If either src or dest is a disjoint type, then they must both be.
|
||||
if src.name in disjoint_types and dest.name in disjoint_types:
|
||||
return src.name == dest.name
|
||||
return src.size == dest.size
|
||||
if isinstance(src, RInstance):
|
||||
return is_object_rprimitive(dest)
|
||||
if isinstance(src, RUnion):
|
||||
# IR doesn't have the ability to narrow unions based on
|
||||
# control flow, so cannot be a strict all() here.
|
||||
return any(can_coerce_to(s, dest) for s in src.items)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class OpChecker(OpVisitor[None]):
|
||||
def __init__(self, parent_fn: FuncIR) -> None:
|
||||
self.parent_fn = parent_fn
|
||||
self.errors: list[FnError] = []
|
||||
|
||||
def fail(self, source: Op, desc: str) -> None:
|
||||
self.errors.append(FnError(source=source, desc=desc))
|
||||
|
||||
def check_control_op_targets(self, op: ControlOp) -> None:
|
||||
for target in op.targets():
|
||||
if target not in self.parent_fn.blocks:
|
||||
self.fail(source=op, desc=f"Invalid control operation target: {target.label}")
|
||||
|
||||
def check_type_coercion(self, op: Op, src: RType, dest: RType) -> None:
|
||||
if not can_coerce_to(src, dest):
|
||||
self.fail(
|
||||
source=op, desc=f"Cannot coerce source type {src.name} to dest type {dest.name}"
|
||||
)
|
||||
|
||||
def check_compatibility(self, op: Op, t: RType, s: RType) -> None:
|
||||
if not can_coerce_to(t, s) or not can_coerce_to(s, t):
|
||||
self.fail(source=op, desc=f"{t.name} and {s.name} are not compatible")
|
||||
|
||||
def expect_float(self, op: Op, v: Value) -> None:
|
||||
if not is_float_rprimitive(v.type):
|
||||
self.fail(op, f"Float expected (actual type is {v.type})")
|
||||
|
||||
def expect_non_float(self, op: Op, v: Value) -> None:
|
||||
if is_float_rprimitive(v.type):
|
||||
self.fail(op, "Float not expected")
|
||||
|
||||
def visit_goto(self, op: Goto) -> None:
|
||||
self.check_control_op_targets(op)
|
||||
|
||||
def visit_branch(self, op: Branch) -> None:
|
||||
self.check_control_op_targets(op)
|
||||
|
||||
def visit_return(self, op: Return) -> None:
|
||||
self.check_type_coercion(op, op.value.type, self.parent_fn.decl.sig.ret_type)
|
||||
|
||||
def visit_unreachable(self, op: Unreachable) -> None:
|
||||
# Unreachables are checked at a higher level since validation
|
||||
# requires access to the entire basic block.
|
||||
pass
|
||||
|
||||
def visit_assign(self, op: Assign) -> None:
|
||||
self.check_type_coercion(op, op.src.type, op.dest.type)
|
||||
|
||||
def visit_assign_multi(self, op: AssignMulti) -> None:
|
||||
for src in op.src:
|
||||
assert isinstance(op.dest.type, RArray)
|
||||
self.check_type_coercion(op, src.type, op.dest.type.item_type)
|
||||
|
||||
def visit_load_error_value(self, op: LoadErrorValue) -> None:
|
||||
# Currently it is assumed that all types have an error value.
|
||||
# Once this is fixed we can validate that the rtype here actually
|
||||
# has an error value.
|
||||
pass
|
||||
|
||||
def check_tuple_items_valid_literals(self, op: LoadLiteral, t: tuple[object, ...]) -> None:
|
||||
for x in t:
|
||||
if x is not None and not isinstance(x, (str, bytes, bool, int, float, complex, tuple)):
|
||||
self.fail(op, f"Invalid type for item of tuple literal: {type(x)})")
|
||||
if isinstance(x, tuple):
|
||||
self.check_tuple_items_valid_literals(op, x)
|
||||
|
||||
def check_frozenset_items_valid_literals(self, op: LoadLiteral, s: frozenset[object]) -> None:
|
||||
for x in s:
|
||||
if x is None or isinstance(x, (str, bytes, bool, int, float, complex)):
|
||||
pass
|
||||
elif isinstance(x, tuple):
|
||||
self.check_tuple_items_valid_literals(op, x)
|
||||
else:
|
||||
self.fail(op, f"Invalid type for item of frozenset literal: {type(x)})")
|
||||
|
||||
def visit_load_literal(self, op: LoadLiteral) -> None:
|
||||
expected_type = None
|
||||
if op.value is None:
|
||||
expected_type = "builtins.object"
|
||||
elif isinstance(op.value, int):
|
||||
expected_type = "builtins.int"
|
||||
elif isinstance(op.value, str):
|
||||
expected_type = "builtins.str"
|
||||
elif isinstance(op.value, bytes):
|
||||
expected_type = "builtins.bytes"
|
||||
elif isinstance(op.value, bool):
|
||||
expected_type = "builtins.object"
|
||||
elif isinstance(op.value, float):
|
||||
expected_type = "builtins.float"
|
||||
elif isinstance(op.value, complex):
|
||||
expected_type = "builtins.object"
|
||||
elif isinstance(op.value, tuple):
|
||||
expected_type = "builtins.tuple"
|
||||
self.check_tuple_items_valid_literals(op, op.value)
|
||||
elif isinstance(op.value, frozenset):
|
||||
# There's no frozenset_rprimitive type since it'd be pretty useless so we just pretend
|
||||
# it's a set (when it's really a frozenset).
|
||||
expected_type = "builtins.set"
|
||||
self.check_frozenset_items_valid_literals(op, op.value)
|
||||
|
||||
assert expected_type is not None, "Missed a case for LoadLiteral check"
|
||||
|
||||
if op.type.name not in [expected_type, "builtins.object"]:
|
||||
self.fail(
|
||||
op,
|
||||
f"Invalid literal value for type: value has "
|
||||
f"type {expected_type}, but op has type {op.type.name}",
|
||||
)
|
||||
|
||||
def visit_get_attr(self, op: GetAttr) -> None:
|
||||
# Nothing to do.
|
||||
pass
|
||||
|
||||
def visit_set_attr(self, op: SetAttr) -> None:
|
||||
# Nothing to do.
|
||||
pass
|
||||
|
||||
# Static operations cannot be checked at the function level.
|
||||
def visit_load_static(self, op: LoadStatic) -> None:
|
||||
pass
|
||||
|
||||
def visit_init_static(self, op: InitStatic) -> None:
|
||||
pass
|
||||
|
||||
def visit_tuple_get(self, op: TupleGet) -> None:
|
||||
# Nothing to do.
|
||||
pass
|
||||
|
||||
def visit_tuple_set(self, op: TupleSet) -> None:
|
||||
# Nothing to do.
|
||||
pass
|
||||
|
||||
def visit_inc_ref(self, op: IncRef) -> None:
|
||||
# Nothing to do.
|
||||
pass
|
||||
|
||||
def visit_dec_ref(self, op: DecRef) -> None:
|
||||
# Nothing to do.
|
||||
pass
|
||||
|
||||
def visit_call(self, op: Call) -> None:
|
||||
# Length is checked in constructor, and return type is set
|
||||
# in a way that can't be incorrect
|
||||
for arg_value, arg_runtime in zip(op.args, op.fn.sig.args):
|
||||
self.check_type_coercion(op, arg_value.type, arg_runtime.type)
|
||||
|
||||
def visit_method_call(self, op: MethodCall) -> None:
|
||||
# Similar to above, but we must look up method first.
|
||||
method_decl = op.receiver_type.class_ir.method_decl(op.method)
|
||||
if method_decl.kind == FUNC_STATICMETHOD:
|
||||
decl_index = 0
|
||||
else:
|
||||
decl_index = 1
|
||||
|
||||
if len(op.args) + decl_index != len(method_decl.sig.args):
|
||||
self.fail(op, "Incorrect number of args for method call.")
|
||||
|
||||
# Skip the receiver argument (self)
|
||||
for arg_value, arg_runtime in zip(op.args, method_decl.sig.args[decl_index:]):
|
||||
self.check_type_coercion(op, arg_value.type, arg_runtime.type)
|
||||
|
||||
def visit_cast(self, op: Cast) -> None:
|
||||
pass
|
||||
|
||||
def visit_box(self, op: Box) -> None:
|
||||
pass
|
||||
|
||||
def visit_unbox(self, op: Unbox) -> None:
|
||||
pass
|
||||
|
||||
def visit_raise_standard_error(self, op: RaiseStandardError) -> None:
|
||||
pass
|
||||
|
||||
def visit_call_c(self, op: CallC) -> None:
|
||||
pass
|
||||
|
||||
def visit_truncate(self, op: Truncate) -> None:
|
||||
pass
|
||||
|
||||
def visit_extend(self, op: Extend) -> None:
|
||||
pass
|
||||
|
||||
def visit_load_global(self, op: LoadGlobal) -> None:
|
||||
pass
|
||||
|
||||
def visit_int_op(self, op: IntOp) -> None:
|
||||
self.expect_non_float(op, op.lhs)
|
||||
self.expect_non_float(op, op.rhs)
|
||||
|
||||
def visit_comparison_op(self, op: ComparisonOp) -> None:
|
||||
self.check_compatibility(op, op.lhs.type, op.rhs.type)
|
||||
self.expect_non_float(op, op.lhs)
|
||||
self.expect_non_float(op, op.rhs)
|
||||
|
||||
def visit_float_op(self, op: FloatOp) -> None:
|
||||
self.expect_float(op, op.lhs)
|
||||
self.expect_float(op, op.rhs)
|
||||
|
||||
def visit_float_neg(self, op: FloatNeg) -> None:
|
||||
self.expect_float(op, op.src)
|
||||
|
||||
def visit_float_comparison_op(self, op: FloatComparisonOp) -> None:
|
||||
self.expect_float(op, op.lhs)
|
||||
self.expect_float(op, op.rhs)
|
||||
|
||||
def visit_load_mem(self, op: LoadMem) -> None:
|
||||
pass
|
||||
|
||||
def visit_set_mem(self, op: SetMem) -> None:
|
||||
pass
|
||||
|
||||
def visit_get_element_ptr(self, op: GetElementPtr) -> None:
|
||||
pass
|
||||
|
||||
def visit_load_address(self, op: LoadAddress) -> None:
|
||||
pass
|
||||
|
||||
def visit_keep_alive(self, op: KeepAlive) -> None:
|
||||
pass
|
||||
Binary file not shown.
203
venv/lib/python3.12/site-packages/mypyc/analysis/selfleaks.py
Normal file
203
venv/lib/python3.12/site-packages/mypyc/analysis/selfleaks.py
Normal file
@@ -0,0 +1,203 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Set, Tuple
|
||||
|
||||
from mypyc.analysis.dataflow import CFG, MAYBE_ANALYSIS, AnalysisResult, run_analysis
|
||||
from mypyc.ir.ops import (
|
||||
Assign,
|
||||
AssignMulti,
|
||||
BasicBlock,
|
||||
Box,
|
||||
Branch,
|
||||
Call,
|
||||
CallC,
|
||||
Cast,
|
||||
ComparisonOp,
|
||||
Extend,
|
||||
FloatComparisonOp,
|
||||
FloatNeg,
|
||||
FloatOp,
|
||||
GetAttr,
|
||||
GetElementPtr,
|
||||
Goto,
|
||||
InitStatic,
|
||||
IntOp,
|
||||
KeepAlive,
|
||||
LoadAddress,
|
||||
LoadErrorValue,
|
||||
LoadGlobal,
|
||||
LoadLiteral,
|
||||
LoadMem,
|
||||
LoadStatic,
|
||||
MethodCall,
|
||||
OpVisitor,
|
||||
RaiseStandardError,
|
||||
Register,
|
||||
RegisterOp,
|
||||
Return,
|
||||
SetAttr,
|
||||
SetMem,
|
||||
Truncate,
|
||||
TupleGet,
|
||||
TupleSet,
|
||||
Unbox,
|
||||
Unreachable,
|
||||
)
|
||||
from mypyc.ir.rtypes import RInstance
|
||||
|
||||
GenAndKill = Tuple[Set[None], Set[None]]
|
||||
|
||||
CLEAN: GenAndKill = (set(), set())
|
||||
DIRTY: GenAndKill = ({None}, {None})
|
||||
|
||||
|
||||
class SelfLeakedVisitor(OpVisitor[GenAndKill]):
|
||||
"""Analyze whether 'self' may be seen by arbitrary code in '__init__'.
|
||||
|
||||
More formally, the set is not empty if along some path from IR entry point
|
||||
arbitrary code could have been executed that has access to 'self'.
|
||||
|
||||
(We don't consider access via 'gc.get_objects()'.)
|
||||
"""
|
||||
|
||||
def __init__(self, self_reg: Register) -> None:
|
||||
self.self_reg = self_reg
|
||||
|
||||
def visit_goto(self, op: Goto) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_branch(self, op: Branch) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_return(self, op: Return) -> GenAndKill:
|
||||
# Consider all exits from the function 'dirty' since they implicitly
|
||||
# cause 'self' to be returned.
|
||||
return DIRTY
|
||||
|
||||
def visit_unreachable(self, op: Unreachable) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_assign(self, op: Assign) -> GenAndKill:
|
||||
if op.src is self.self_reg or op.dest is self.self_reg:
|
||||
return DIRTY
|
||||
return CLEAN
|
||||
|
||||
def visit_assign_multi(self, op: AssignMulti) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_set_mem(self, op: SetMem) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_call(self, op: Call) -> GenAndKill:
|
||||
fn = op.fn
|
||||
if fn.class_name and fn.name == "__init__":
|
||||
self_type = op.fn.sig.args[0].type
|
||||
assert isinstance(self_type, RInstance)
|
||||
cl = self_type.class_ir
|
||||
if not cl.init_self_leak:
|
||||
return CLEAN
|
||||
return self.check_register_op(op)
|
||||
|
||||
def visit_method_call(self, op: MethodCall) -> GenAndKill:
|
||||
return self.check_register_op(op)
|
||||
|
||||
def visit_load_error_value(self, op: LoadErrorValue) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_load_literal(self, op: LoadLiteral) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_get_attr(self, op: GetAttr) -> GenAndKill:
|
||||
cl = op.class_type.class_ir
|
||||
if cl.get_method(op.attr):
|
||||
# Property -- calls a function
|
||||
return self.check_register_op(op)
|
||||
return CLEAN
|
||||
|
||||
def visit_set_attr(self, op: SetAttr) -> GenAndKill:
|
||||
cl = op.class_type.class_ir
|
||||
if cl.get_method(op.attr):
|
||||
# Property - calls a function
|
||||
return self.check_register_op(op)
|
||||
return CLEAN
|
||||
|
||||
def visit_load_static(self, op: LoadStatic) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_init_static(self, op: InitStatic) -> GenAndKill:
|
||||
return self.check_register_op(op)
|
||||
|
||||
def visit_tuple_get(self, op: TupleGet) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_tuple_set(self, op: TupleSet) -> GenAndKill:
|
||||
return self.check_register_op(op)
|
||||
|
||||
def visit_box(self, op: Box) -> GenAndKill:
|
||||
return self.check_register_op(op)
|
||||
|
||||
def visit_unbox(self, op: Unbox) -> GenAndKill:
|
||||
return self.check_register_op(op)
|
||||
|
||||
def visit_cast(self, op: Cast) -> GenAndKill:
|
||||
return self.check_register_op(op)
|
||||
|
||||
def visit_raise_standard_error(self, op: RaiseStandardError) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_call_c(self, op: CallC) -> GenAndKill:
|
||||
return self.check_register_op(op)
|
||||
|
||||
def visit_truncate(self, op: Truncate) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_extend(self, op: Extend) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_load_global(self, op: LoadGlobal) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_int_op(self, op: IntOp) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_comparison_op(self, op: ComparisonOp) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_float_op(self, op: FloatOp) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_float_neg(self, op: FloatNeg) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_float_comparison_op(self, op: FloatComparisonOp) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_load_mem(self, op: LoadMem) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_get_element_ptr(self, op: GetElementPtr) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_load_address(self, op: LoadAddress) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def visit_keep_alive(self, op: KeepAlive) -> GenAndKill:
|
||||
return CLEAN
|
||||
|
||||
def check_register_op(self, op: RegisterOp) -> GenAndKill:
|
||||
if any(src is self.self_reg for src in op.sources()):
|
||||
return DIRTY
|
||||
return CLEAN
|
||||
|
||||
|
||||
def analyze_self_leaks(
|
||||
blocks: list[BasicBlock], self_reg: Register, cfg: CFG
|
||||
) -> AnalysisResult[None]:
|
||||
return run_analysis(
|
||||
blocks=blocks,
|
||||
cfg=cfg,
|
||||
gen_and_kill=SelfLeakedVisitor(self_reg),
|
||||
initial=set(),
|
||||
backward=False,
|
||||
kind=MAYBE_ANALYSIS,
|
||||
)
|
||||
Reference in New Issue
Block a user