Major fixes and new features
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
436
venv/lib/python3.12/site-packages/mypyc/analysis/attrdefined.py
Normal file
436
venv/lib/python3.12/site-packages/mypyc/analysis/attrdefined.py
Normal file
@@ -0,0 +1,436 @@
|
||||
"""Always defined attribute analysis.
|
||||
|
||||
An always defined attribute has some statements in __init__ or the
|
||||
class body that cause the attribute to be always initialized when an
|
||||
instance is constructed. It must also not be possible to read the
|
||||
attribute before initialization, and it can't be deletable.
|
||||
|
||||
We can assume that the value is always defined when reading an always
|
||||
defined attribute. Otherwise we'll need to raise AttributeError if the
|
||||
value is undefined (i.e. has the error value).
|
||||
|
||||
We use data flow analysis to figure out attributes that are always
|
||||
defined. Example:
|
||||
|
||||
class C:
|
||||
def __init__(self) -> None:
|
||||
self.x = 0
|
||||
if func():
|
||||
self.y = 1
|
||||
else:
|
||||
self.y = 2
|
||||
self.z = 3
|
||||
|
||||
In this example, the attributes 'x' and 'y' are always defined, but 'z'
|
||||
is not. The analysis assumes that we know that there won't be any subclasses.
|
||||
|
||||
The analysis also works if there is a known, closed set of subclasses.
|
||||
An attribute defined in a base class can only be always defined if it's
|
||||
also always defined in all subclasses.
|
||||
|
||||
As soon as __init__ contains an op that can 'leak' self to another
|
||||
function, we will stop inferring always defined attributes, since the
|
||||
analysis is mostly intra-procedural and only looks at __init__ methods.
|
||||
The called code could read an uninitialized attribute. Example:
|
||||
|
||||
class C:
|
||||
def __init__(self) -> None:
|
||||
self.x = self.foo()
|
||||
|
||||
def foo(self) -> int:
|
||||
...
|
||||
|
||||
Now we won't infer 'x' as always defined, since 'foo' might read 'x'
|
||||
before initialization.
|
||||
|
||||
As an exception to the above limitation, we perform inter-procedural
|
||||
analysis of super().__init__ calls, since these are very common.
|
||||
|
||||
Our analysis is somewhat optimistic. We assume that nobody calls a
|
||||
method of a partially uninitialized object through gc.get_objects(), in
|
||||
particular. Code like this could potentially cause a segfault with a null
|
||||
pointer dereference. This seems very unlikely to be an issue in practice,
|
||||
however.
|
||||
|
||||
Accessing an attribute via getattr always checks for undefined attributes
|
||||
and thus works if the object is partially uninitialized. This can be used
|
||||
as a workaround if somebody ever needs to inspect partially uninitialized
|
||||
objects via gc.get_objects().
|
||||
|
||||
The analysis runs after IR building as a separate pass. Since we only
|
||||
run this on __init__ methods, this analysis pass will be fairly quick.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Final, Set, Tuple
|
||||
|
||||
from mypyc.analysis.dataflow import (
|
||||
CFG,
|
||||
MAYBE_ANALYSIS,
|
||||
AnalysisResult,
|
||||
BaseAnalysisVisitor,
|
||||
get_cfg,
|
||||
run_analysis,
|
||||
)
|
||||
from mypyc.analysis.selfleaks import analyze_self_leaks
|
||||
from mypyc.ir.class_ir import ClassIR
|
||||
from mypyc.ir.ops import (
|
||||
Assign,
|
||||
AssignMulti,
|
||||
BasicBlock,
|
||||
Branch,
|
||||
Call,
|
||||
ControlOp,
|
||||
GetAttr,
|
||||
Register,
|
||||
RegisterOp,
|
||||
Return,
|
||||
SetAttr,
|
||||
SetMem,
|
||||
Unreachable,
|
||||
)
|
||||
from mypyc.ir.rtypes import RInstance
|
||||
|
||||
# If True, print out all always-defined attributes of native classes (to aid
|
||||
# debugging and testing)
|
||||
dump_always_defined: Final = False
|
||||
|
||||
|
||||
def analyze_always_defined_attrs(class_irs: list[ClassIR]) -> None:
|
||||
"""Find always defined attributes all classes of a compilation unit.
|
||||
|
||||
Also tag attribute initialization ops to not decref the previous
|
||||
value (as this would read a NULL pointer and segfault).
|
||||
|
||||
Update the _always_initialized_attrs, _sometimes_initialized_attrs
|
||||
and init_self_leak attributes in ClassIR instances.
|
||||
|
||||
This is the main entry point.
|
||||
"""
|
||||
seen: set[ClassIR] = set()
|
||||
|
||||
# First pass: only look at target class and classes in MRO
|
||||
for cl in class_irs:
|
||||
analyze_always_defined_attrs_in_class(cl, seen)
|
||||
|
||||
# Second pass: look at all derived class
|
||||
seen = set()
|
||||
for cl in class_irs:
|
||||
update_always_defined_attrs_using_subclasses(cl, seen)
|
||||
|
||||
# Final pass: detect attributes that need to use a bitmap to track definedness
|
||||
seen = set()
|
||||
for cl in class_irs:
|
||||
detect_undefined_bitmap(cl, seen)
|
||||
|
||||
|
||||
def analyze_always_defined_attrs_in_class(cl: ClassIR, seen: set[ClassIR]) -> None:
|
||||
if cl in seen:
|
||||
return
|
||||
|
||||
seen.add(cl)
|
||||
|
||||
if (
|
||||
cl.is_trait
|
||||
or cl.inherits_python
|
||||
or cl.allow_interpreted_subclasses
|
||||
or cl.builtin_base is not None
|
||||
or cl.children is None
|
||||
or cl.is_serializable()
|
||||
):
|
||||
# Give up -- we can't enforce that attributes are always defined.
|
||||
return
|
||||
|
||||
# First analyze all base classes. Track seen classes to avoid duplicate work.
|
||||
for base in cl.mro[1:]:
|
||||
analyze_always_defined_attrs_in_class(base, seen)
|
||||
|
||||
m = cl.get_method("__init__")
|
||||
if m is None:
|
||||
cl._always_initialized_attrs = cl.attrs_with_defaults.copy()
|
||||
cl._sometimes_initialized_attrs = cl.attrs_with_defaults.copy()
|
||||
return
|
||||
self_reg = m.arg_regs[0]
|
||||
cfg = get_cfg(m.blocks)
|
||||
dirty = analyze_self_leaks(m.blocks, self_reg, cfg)
|
||||
maybe_defined = analyze_maybe_defined_attrs_in_init(
|
||||
m.blocks, self_reg, cl.attrs_with_defaults, cfg
|
||||
)
|
||||
all_attrs: set[str] = set()
|
||||
for base in cl.mro:
|
||||
all_attrs.update(base.attributes)
|
||||
maybe_undefined = analyze_maybe_undefined_attrs_in_init(
|
||||
m.blocks, self_reg, initial_undefined=all_attrs - cl.attrs_with_defaults, cfg=cfg
|
||||
)
|
||||
|
||||
always_defined = find_always_defined_attributes(
|
||||
m.blocks, self_reg, all_attrs, maybe_defined, maybe_undefined, dirty
|
||||
)
|
||||
always_defined = {a for a in always_defined if not cl.is_deletable(a)}
|
||||
|
||||
cl._always_initialized_attrs = always_defined
|
||||
if dump_always_defined:
|
||||
print(cl.name, sorted(always_defined))
|
||||
cl._sometimes_initialized_attrs = find_sometimes_defined_attributes(
|
||||
m.blocks, self_reg, maybe_defined, dirty
|
||||
)
|
||||
|
||||
mark_attr_initialiation_ops(m.blocks, self_reg, maybe_defined, dirty)
|
||||
|
||||
# Check if __init__ can run unpredictable code (leak 'self').
|
||||
any_dirty = False
|
||||
for b in m.blocks:
|
||||
for i, op in enumerate(b.ops):
|
||||
if dirty.after[b, i] and not isinstance(op, Return):
|
||||
any_dirty = True
|
||||
break
|
||||
cl.init_self_leak = any_dirty
|
||||
|
||||
|
||||
def find_always_defined_attributes(
|
||||
blocks: list[BasicBlock],
|
||||
self_reg: Register,
|
||||
all_attrs: set[str],
|
||||
maybe_defined: AnalysisResult[str],
|
||||
maybe_undefined: AnalysisResult[str],
|
||||
dirty: AnalysisResult[None],
|
||||
) -> set[str]:
|
||||
"""Find attributes that are always initialized in some basic blocks.
|
||||
|
||||
The analysis results are expected to be up-to-date for the blocks.
|
||||
|
||||
Return a set of always defined attributes.
|
||||
"""
|
||||
attrs = all_attrs.copy()
|
||||
for block in blocks:
|
||||
for i, op in enumerate(block.ops):
|
||||
# If an attribute we *read* may be undefined, it isn't always defined.
|
||||
if isinstance(op, GetAttr) and op.obj is self_reg:
|
||||
if op.attr in maybe_undefined.before[block, i]:
|
||||
attrs.discard(op.attr)
|
||||
# If an attribute we *set* may be sometimes undefined and
|
||||
# sometimes defined, don't consider it always defined. Unlike
|
||||
# the get case, it's fine for the attribute to be undefined.
|
||||
# The set operation will then be treated as initialization.
|
||||
if isinstance(op, SetAttr) and op.obj is self_reg:
|
||||
if (
|
||||
op.attr in maybe_undefined.before[block, i]
|
||||
and op.attr in maybe_defined.before[block, i]
|
||||
):
|
||||
attrs.discard(op.attr)
|
||||
# Treat an op that might run arbitrary code as an "exit"
|
||||
# in terms of the analysis -- we can't do any inference
|
||||
# afterwards reliably.
|
||||
if dirty.after[block, i]:
|
||||
if not dirty.before[block, i]:
|
||||
attrs = attrs & (
|
||||
maybe_defined.after[block, i] - maybe_undefined.after[block, i]
|
||||
)
|
||||
break
|
||||
if isinstance(op, ControlOp):
|
||||
for target in op.targets():
|
||||
# Gotos/branches can also be "exits".
|
||||
if not dirty.after[block, i] and dirty.before[target, 0]:
|
||||
attrs = attrs & (
|
||||
maybe_defined.after[target, 0] - maybe_undefined.after[target, 0]
|
||||
)
|
||||
return attrs
|
||||
|
||||
|
||||
def find_sometimes_defined_attributes(
|
||||
blocks: list[BasicBlock],
|
||||
self_reg: Register,
|
||||
maybe_defined: AnalysisResult[str],
|
||||
dirty: AnalysisResult[None],
|
||||
) -> set[str]:
|
||||
"""Find attributes that are sometimes initialized in some basic blocks."""
|
||||
attrs: set[str] = set()
|
||||
for block in blocks:
|
||||
for i, op in enumerate(block.ops):
|
||||
# Only look at possibly defined attributes at exits.
|
||||
if dirty.after[block, i]:
|
||||
if not dirty.before[block, i]:
|
||||
attrs = attrs | maybe_defined.after[block, i]
|
||||
break
|
||||
if isinstance(op, ControlOp):
|
||||
for target in op.targets():
|
||||
if not dirty.after[block, i] and dirty.before[target, 0]:
|
||||
attrs = attrs | maybe_defined.after[target, 0]
|
||||
return attrs
|
||||
|
||||
|
||||
def mark_attr_initialiation_ops(
|
||||
blocks: list[BasicBlock],
|
||||
self_reg: Register,
|
||||
maybe_defined: AnalysisResult[str],
|
||||
dirty: AnalysisResult[None],
|
||||
) -> None:
|
||||
"""Tag all SetAttr ops in the basic blocks that initialize attributes.
|
||||
|
||||
Initialization ops assume that the previous attribute value is the error value,
|
||||
so there's no need to decref or check for definedness.
|
||||
"""
|
||||
for block in blocks:
|
||||
for i, op in enumerate(block.ops):
|
||||
if isinstance(op, SetAttr) and op.obj is self_reg:
|
||||
attr = op.attr
|
||||
if attr not in maybe_defined.before[block, i] and not dirty.after[block, i]:
|
||||
op.mark_as_initializer()
|
||||
|
||||
|
||||
GenAndKill = Tuple[Set[str], Set[str]]
|
||||
|
||||
|
||||
def attributes_initialized_by_init_call(op: Call) -> set[str]:
|
||||
"""Calculate attributes that are always initialized by a super().__init__ call."""
|
||||
self_type = op.fn.sig.args[0].type
|
||||
assert isinstance(self_type, RInstance)
|
||||
cl = self_type.class_ir
|
||||
return {a for base in cl.mro for a in base.attributes if base.is_always_defined(a)}
|
||||
|
||||
|
||||
def attributes_maybe_initialized_by_init_call(op: Call) -> set[str]:
|
||||
"""Calculate attributes that may be initialized by a super().__init__ call."""
|
||||
self_type = op.fn.sig.args[0].type
|
||||
assert isinstance(self_type, RInstance)
|
||||
cl = self_type.class_ir
|
||||
return attributes_initialized_by_init_call(op) | cl._sometimes_initialized_attrs
|
||||
|
||||
|
||||
class AttributeMaybeDefinedVisitor(BaseAnalysisVisitor[str]):
|
||||
"""Find attributes that may have been defined via some code path.
|
||||
|
||||
Consider initializations in class body and assignments to 'self.x'
|
||||
and calls to base class '__init__'.
|
||||
"""
|
||||
|
||||
def __init__(self, self_reg: Register) -> None:
|
||||
self.self_reg = self_reg
|
||||
|
||||
def visit_branch(self, op: Branch) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
def visit_return(self, op: Return) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
def visit_unreachable(self, op: Unreachable) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
def visit_register_op(self, op: RegisterOp) -> tuple[set[str], set[str]]:
|
||||
if isinstance(op, SetAttr) and op.obj is self.self_reg:
|
||||
return {op.attr}, set()
|
||||
if isinstance(op, Call) and op.fn.class_name and op.fn.name == "__init__":
|
||||
return attributes_maybe_initialized_by_init_call(op), set()
|
||||
return set(), set()
|
||||
|
||||
def visit_assign(self, op: Assign) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
def visit_assign_multi(self, op: AssignMulti) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
def visit_set_mem(self, op: SetMem) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
|
||||
def analyze_maybe_defined_attrs_in_init(
|
||||
blocks: list[BasicBlock], self_reg: Register, attrs_with_defaults: set[str], cfg: CFG
|
||||
) -> AnalysisResult[str]:
|
||||
return run_analysis(
|
||||
blocks=blocks,
|
||||
cfg=cfg,
|
||||
gen_and_kill=AttributeMaybeDefinedVisitor(self_reg),
|
||||
initial=attrs_with_defaults,
|
||||
backward=False,
|
||||
kind=MAYBE_ANALYSIS,
|
||||
)
|
||||
|
||||
|
||||
class AttributeMaybeUndefinedVisitor(BaseAnalysisVisitor[str]):
|
||||
"""Find attributes that may be undefined via some code path.
|
||||
|
||||
Consider initializations in class body, assignments to 'self.x'
|
||||
and calls to base class '__init__'.
|
||||
"""
|
||||
|
||||
def __init__(self, self_reg: Register) -> None:
|
||||
self.self_reg = self_reg
|
||||
|
||||
def visit_branch(self, op: Branch) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
def visit_return(self, op: Return) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
def visit_unreachable(self, op: Unreachable) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
def visit_register_op(self, op: RegisterOp) -> tuple[set[str], set[str]]:
|
||||
if isinstance(op, SetAttr) and op.obj is self.self_reg:
|
||||
return set(), {op.attr}
|
||||
if isinstance(op, Call) and op.fn.class_name and op.fn.name == "__init__":
|
||||
return set(), attributes_initialized_by_init_call(op)
|
||||
return set(), set()
|
||||
|
||||
def visit_assign(self, op: Assign) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
def visit_assign_multi(self, op: AssignMulti) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
def visit_set_mem(self, op: SetMem) -> tuple[set[str], set[str]]:
|
||||
return set(), set()
|
||||
|
||||
|
||||
def analyze_maybe_undefined_attrs_in_init(
|
||||
blocks: list[BasicBlock], self_reg: Register, initial_undefined: set[str], cfg: CFG
|
||||
) -> AnalysisResult[str]:
|
||||
return run_analysis(
|
||||
blocks=blocks,
|
||||
cfg=cfg,
|
||||
gen_and_kill=AttributeMaybeUndefinedVisitor(self_reg),
|
||||
initial=initial_undefined,
|
||||
backward=False,
|
||||
kind=MAYBE_ANALYSIS,
|
||||
)
|
||||
|
||||
|
||||
def update_always_defined_attrs_using_subclasses(cl: ClassIR, seen: set[ClassIR]) -> None:
|
||||
"""Remove attributes not defined in all subclasses from always defined attrs."""
|
||||
if cl in seen:
|
||||
return
|
||||
if cl.children is None:
|
||||
# Subclasses are unknown
|
||||
return
|
||||
removed = set()
|
||||
for attr in cl._always_initialized_attrs:
|
||||
for child in cl.children:
|
||||
update_always_defined_attrs_using_subclasses(child, seen)
|
||||
if attr not in child._always_initialized_attrs:
|
||||
removed.add(attr)
|
||||
cl._always_initialized_attrs -= removed
|
||||
seen.add(cl)
|
||||
|
||||
|
||||
def detect_undefined_bitmap(cl: ClassIR, seen: set[ClassIR]) -> None:
|
||||
if cl.is_trait:
|
||||
return
|
||||
|
||||
if cl in seen:
|
||||
return
|
||||
seen.add(cl)
|
||||
for base in cl.base_mro[1:]:
|
||||
detect_undefined_bitmap(cl, seen)
|
||||
|
||||
if len(cl.base_mro) > 1:
|
||||
cl.bitmap_attrs.extend(cl.base_mro[1].bitmap_attrs)
|
||||
for n, t in cl.attributes.items():
|
||||
if t.error_overlap and not cl.is_always_defined(n):
|
||||
cl.bitmap_attrs.append(n)
|
||||
|
||||
for base in cl.mro[1:]:
|
||||
if base.is_trait:
|
||||
for n, t in base.attributes.items():
|
||||
if t.error_overlap and not cl.is_always_defined(n) and n not in cl.bitmap_attrs:
|
||||
cl.bitmap_attrs.append(n)
|
||||
Reference in New Issue
Block a user