Major fixes and new features
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2025-09-25 15:51:48 +09:00
parent dd7349bb4c
commit ddce9f5125
5586 changed files with 1470941 additions and 0 deletions

View File

@@ -0,0 +1,182 @@
"""Transform that inserts error checks after opcodes.
When initially building the IR, the code doesn't perform error checks
for exceptions. This module is used to insert all required error checks
afterwards. Each Op describes how it indicates an error condition (if
at all).
We need to split basic blocks on each error check since branches can
only be placed at the end of a basic block.
"""
from __future__ import annotations
from mypyc.ir.func_ir import FuncIR
from mypyc.ir.ops import (
ERR_ALWAYS,
ERR_FALSE,
ERR_MAGIC,
ERR_MAGIC_OVERLAPPING,
ERR_NEVER,
NO_TRACEBACK_LINE_NO,
BasicBlock,
Branch,
CallC,
ComparisonOp,
Float,
GetAttr,
Integer,
LoadErrorValue,
Op,
RegisterOp,
Return,
SetAttr,
TupleGet,
Value,
)
from mypyc.ir.rtypes import RTuple, bool_rprimitive, is_float_rprimitive
from mypyc.primitives.exc_ops import err_occurred_op
from mypyc.primitives.registry import CFunctionDescription
def insert_exception_handling(ir: FuncIR) -> None:
# Generate error block if any ops may raise an exception. If an op
# fails without its own error handler, we'll branch to this
# block. The block just returns an error value.
error_label: BasicBlock | None = None
for block in ir.blocks:
adjust_error_kinds(block)
if error_label is None and any(op.can_raise() for op in block.ops):
error_label = add_default_handler_block(ir)
if error_label:
ir.blocks = split_blocks_at_errors(ir.blocks, error_label, ir.traceback_name)
def add_default_handler_block(ir: FuncIR) -> BasicBlock:
block = BasicBlock()
ir.blocks.append(block)
op = LoadErrorValue(ir.ret_type)
block.ops.append(op)
block.ops.append(Return(op))
return block
def split_blocks_at_errors(
blocks: list[BasicBlock], default_error_handler: BasicBlock, func_name: str | None
) -> list[BasicBlock]:
new_blocks: list[BasicBlock] = []
# First split blocks on ops that may raise.
for block in blocks:
ops = block.ops
block.ops = []
cur_block = block
new_blocks.append(cur_block)
# If the block has an error handler specified, use it. Otherwise
# fall back to the default.
error_label = block.error_handler or default_error_handler
block.error_handler = None
for op in ops:
target: Value = op
cur_block.ops.append(op)
if isinstance(op, RegisterOp) and op.error_kind != ERR_NEVER:
# Split
new_block = BasicBlock()
new_blocks.append(new_block)
if op.error_kind == ERR_MAGIC:
# Op returns an error value on error that depends on result RType.
variant = Branch.IS_ERROR
negated = False
elif op.error_kind == ERR_FALSE:
# Op returns a C false value on error.
variant = Branch.BOOL
negated = True
elif op.error_kind == ERR_ALWAYS:
variant = Branch.BOOL
negated = True
# this is a hack to represent the always fail
# semantics, using a temporary bool with value false
target = Integer(0, bool_rprimitive)
elif op.error_kind == ERR_MAGIC_OVERLAPPING:
comp = insert_overlapping_error_value_check(cur_block.ops, target)
new_block2 = BasicBlock()
new_blocks.append(new_block2)
branch = Branch(
comp,
true_label=new_block2,
false_label=new_block,
op=Branch.BOOL,
rare=True,
)
cur_block.ops.append(branch)
cur_block = new_block2
target = primitive_call(err_occurred_op, [], target.line)
cur_block.ops.append(target)
variant = Branch.IS_ERROR
negated = True
else:
assert False, "unknown error kind %d" % op.error_kind
# Void ops can't generate errors since error is always
# indicated by a special value stored in a register.
if op.error_kind != ERR_ALWAYS:
assert not op.is_void, "void op generating errors?"
branch = Branch(
target, true_label=error_label, false_label=new_block, op=variant, line=op.line
)
branch.negated = negated
if op.line != NO_TRACEBACK_LINE_NO and func_name is not None:
branch.traceback_entry = (func_name, op.line)
cur_block.ops.append(branch)
cur_block = new_block
return new_blocks
def primitive_call(desc: CFunctionDescription, args: list[Value], line: int) -> CallC:
return CallC(
desc.c_function_name,
[],
desc.return_type,
desc.steals,
desc.is_borrowed,
desc.error_kind,
line,
)
def adjust_error_kinds(block: BasicBlock) -> None:
"""Infer more precise error_kind attributes for ops.
We have access here to more information than what was available
when the IR was initially built.
"""
for op in block.ops:
if isinstance(op, GetAttr):
if op.class_type.class_ir.is_always_defined(op.attr):
op.error_kind = ERR_NEVER
if isinstance(op, SetAttr):
if op.class_type.class_ir.is_always_defined(op.attr):
op.error_kind = ERR_NEVER
def insert_overlapping_error_value_check(ops: list[Op], target: Value) -> ComparisonOp:
"""Append to ops to check for an overlapping error value."""
typ = target.type
if isinstance(typ, RTuple):
item = TupleGet(target, 0)
ops.append(item)
return insert_overlapping_error_value_check(ops, item)
else:
errvalue: Value
if is_float_rprimitive(target.type):
errvalue = Float(float(typ.c_undefined))
else:
errvalue = Integer(int(typ.c_undefined), rtype=typ)
op = ComparisonOp(target, errvalue, ComparisonOp.EQ)
ops.append(op)
return op

View File

@@ -0,0 +1,294 @@
"""Transformation for inserting refrecence count inc/dec opcodes.
This transformation happens towards the end of compilation. Before this
transformation, reference count management is not explicitly handled at all.
By postponing this pass, the previous passes are simpler as they don't have
to update reference count opcodes.
The approach is to decrement reference counts soon after a value is no
longer live, to quickly free memory (and call __del__ methods), though
there are no strict guarantees -- other than that local variables are
freed before return from a function.
Function arguments are a little special. They are initially considered
'borrowed' from the caller and their reference counts don't need to be
decremented before returning. An assignment to a borrowed value turns it
into a regular, owned reference that needs to freed before return.
"""
from __future__ import annotations
from typing import Dict, Iterable, Tuple
from mypyc.analysis.dataflow import (
AnalysisDict,
analyze_borrowed_arguments,
analyze_live_regs,
analyze_must_defined_regs,
cleanup_cfg,
get_cfg,
)
from mypyc.ir.func_ir import FuncIR, all_values
from mypyc.ir.ops import (
Assign,
BasicBlock,
Branch,
ControlOp,
DecRef,
Goto,
IncRef,
Integer,
KeepAlive,
LoadAddress,
Op,
Register,
RegisterOp,
Value,
)
Decs = Tuple[Tuple[Value, bool], ...]
Incs = Tuple[Value, ...]
# A cache of basic blocks that decrement and increment specific values
# and then jump to some target block. This lets us cut down on how
# much code we generate in some circumstances.
BlockCache = Dict[Tuple[BasicBlock, Decs, Incs], BasicBlock]
def insert_ref_count_opcodes(ir: FuncIR) -> None:
"""Insert reference count inc/dec opcodes to a function.
This is the entry point to this module.
"""
cfg = get_cfg(ir.blocks)
values = all_values(ir.arg_regs, ir.blocks)
borrowed = {value for value in values if value.is_borrowed}
args: set[Value] = set(ir.arg_regs)
live = analyze_live_regs(ir.blocks, cfg)
borrow = analyze_borrowed_arguments(ir.blocks, cfg, borrowed)
defined = analyze_must_defined_regs(ir.blocks, cfg, args, values, strict_errors=True)
ordering = make_value_ordering(ir)
cache: BlockCache = {}
for block in ir.blocks.copy():
if isinstance(block.ops[-1], (Branch, Goto)):
insert_branch_inc_and_decrefs(
block,
cache,
ir.blocks,
live.before,
borrow.before,
borrow.after,
defined.after,
ordering,
)
transform_block(block, live.before, live.after, borrow.before, defined.after)
cleanup_cfg(ir.blocks)
def is_maybe_undefined(post_must_defined: set[Value], src: Value) -> bool:
return isinstance(src, Register) and src not in post_must_defined
def maybe_append_dec_ref(
ops: list[Op], dest: Value, defined: AnalysisDict[Value], key: tuple[BasicBlock, int]
) -> None:
if dest.type.is_refcounted and not isinstance(dest, Integer):
ops.append(DecRef(dest, is_xdec=is_maybe_undefined(defined[key], dest)))
def maybe_append_inc_ref(ops: list[Op], dest: Value) -> None:
if dest.type.is_refcounted:
ops.append(IncRef(dest))
def transform_block(
block: BasicBlock,
pre_live: AnalysisDict[Value],
post_live: AnalysisDict[Value],
pre_borrow: AnalysisDict[Value],
post_must_defined: AnalysisDict[Value],
) -> None:
old_ops = block.ops
ops: list[Op] = []
for i, op in enumerate(old_ops):
key = (block, i)
assert op not in pre_live[key]
dest = op.dest if isinstance(op, Assign) else op
stolen = op.stolen()
# Incref any references that are being stolen that stay live, were borrowed,
# or are stolen more than once by this operation.
for j, src in enumerate(stolen):
if src in post_live[key] or src in pre_borrow[key] or src in stolen[:j]:
maybe_append_inc_ref(ops, src)
# For assignments to registers that were already live,
# decref the old value.
if dest not in pre_borrow[key] and dest in pre_live[key]:
assert isinstance(op, Assign)
maybe_append_dec_ref(ops, dest, post_must_defined, key)
# Strip KeepAlive. Its only purpose is to help with this transform.
if not isinstance(op, KeepAlive):
ops.append(op)
# Control ops don't have any space to insert ops after them, so
# their inc/decrefs get inserted by insert_branch_inc_and_decrefs.
if isinstance(op, ControlOp):
continue
for src in op.unique_sources():
# Decrement source that won't be live afterwards.
if src not in post_live[key] and src not in pre_borrow[key] and src not in stolen:
maybe_append_dec_ref(ops, src, post_must_defined, key)
# Decrement the destination if it is dead after the op and
# wasn't a borrowed RegisterOp
if (
not dest.is_void
and dest not in post_live[key]
and not (isinstance(op, RegisterOp) and dest.is_borrowed)
):
maybe_append_dec_ref(ops, dest, post_must_defined, key)
block.ops = ops
def insert_branch_inc_and_decrefs(
block: BasicBlock,
cache: BlockCache,
blocks: list[BasicBlock],
pre_live: AnalysisDict[Value],
pre_borrow: AnalysisDict[Value],
post_borrow: AnalysisDict[Value],
post_must_defined: AnalysisDict[Value],
ordering: dict[Value, int],
) -> None:
"""Insert inc_refs and/or dec_refs after a branch/goto.
Add dec_refs for registers that become dead after a branch.
Add inc_refs for registers that become unborrowed after a branch or goto.
Branches are special as the true and false targets may have a different
live and borrowed register sets. Add new blocks before the true/false target
blocks that tweak reference counts.
Example where we need to add an inc_ref:
def f(a: int) -> None
if a:
a = 1
return a # a is borrowed if condition is false and unborrowed if true
"""
prev_key = (block, len(block.ops) - 1)
source_live_regs = pre_live[prev_key]
source_borrowed = post_borrow[prev_key]
source_defined = post_must_defined[prev_key]
term = block.terminator
for i, target in enumerate(term.targets()):
# HAX: After we've checked against an error value the value we must not touch the
# refcount since it will be a null pointer. The correct way to do this would be
# to perform data flow analysis on whether a value can be null (or is always
# null).
omitted: Iterable[Value]
if isinstance(term, Branch) and term.op == Branch.IS_ERROR and i == 0:
omitted = (term.value,)
else:
omitted = ()
decs = after_branch_decrefs(
target, pre_live, source_defined, source_borrowed, source_live_regs, ordering, omitted
)
incs = after_branch_increfs(target, pre_live, pre_borrow, source_borrowed, ordering)
term.set_target(i, add_block(decs, incs, cache, blocks, target))
def after_branch_decrefs(
label: BasicBlock,
pre_live: AnalysisDict[Value],
source_defined: set[Value],
source_borrowed: set[Value],
source_live_regs: set[Value],
ordering: dict[Value, int],
omitted: Iterable[Value],
) -> tuple[tuple[Value, bool], ...]:
target_pre_live = pre_live[label, 0]
decref = source_live_regs - target_pre_live - source_borrowed
if decref:
return tuple(
(reg, is_maybe_undefined(source_defined, reg))
for reg in sorted(decref, key=lambda r: ordering[r])
if reg.type.is_refcounted and reg not in omitted
)
return ()
def after_branch_increfs(
label: BasicBlock,
pre_live: AnalysisDict[Value],
pre_borrow: AnalysisDict[Value],
source_borrowed: set[Value],
ordering: dict[Value, int],
) -> tuple[Value, ...]:
target_pre_live = pre_live[label, 0]
target_borrowed = pre_borrow[label, 0]
incref = (source_borrowed - target_borrowed) & target_pre_live
if incref:
return tuple(
reg for reg in sorted(incref, key=lambda r: ordering[r]) if reg.type.is_refcounted
)
return ()
def add_block(
decs: Decs, incs: Incs, cache: BlockCache, blocks: list[BasicBlock], label: BasicBlock
) -> BasicBlock:
if not decs and not incs:
return label
# TODO: be able to share *partial* results
if (label, decs, incs) in cache:
return cache[label, decs, incs]
block = BasicBlock()
blocks.append(block)
block.ops.extend(DecRef(reg, is_xdec=xdec) for reg, xdec in decs)
block.ops.extend(IncRef(reg) for reg in incs)
block.ops.append(Goto(label))
cache[label, decs, incs] = block
return block
def make_value_ordering(ir: FuncIR) -> dict[Value, int]:
"""Create a ordering of values that allows them to be sorted.
This omits registers that are only ever read.
"""
# TODO: Never initialized values??
result: dict[Value, int] = {}
n = 0
for arg in ir.arg_regs:
result[arg] = n
n += 1
for block in ir.blocks:
for op in block.ops:
if (
isinstance(op, LoadAddress)
and isinstance(op.src, Register)
and op.src not in result
):
# Taking the address of a register allows initialization.
result[op.src] = n
n += 1
if isinstance(op, Assign):
if op.dest not in result:
result[op.dest] = n
n += 1
elif op not in result:
result[op] = n
n += 1
return result

View File

@@ -0,0 +1,190 @@
"""Insert checks for uninitialized values."""
from __future__ import annotations
from mypyc.analysis.dataflow import AnalysisDict, analyze_must_defined_regs, cleanup_cfg, get_cfg
from mypyc.common import BITMAP_BITS
from mypyc.ir.func_ir import FuncIR, all_values
from mypyc.ir.ops import (
Assign,
BasicBlock,
Branch,
ComparisonOp,
Integer,
IntOp,
LoadAddress,
LoadErrorValue,
Op,
RaiseStandardError,
Register,
Unreachable,
Value,
)
from mypyc.ir.rtypes import bitmap_rprimitive
def insert_uninit_checks(ir: FuncIR) -> None:
# Remove dead blocks from the CFG, which helps avoid spurious
# checks due to unused error handling blocks.
cleanup_cfg(ir.blocks)
cfg = get_cfg(ir.blocks)
must_defined = analyze_must_defined_regs(
ir.blocks, cfg, set(ir.arg_regs), all_values(ir.arg_regs, ir.blocks)
)
ir.blocks = split_blocks_at_uninits(ir.blocks, must_defined.before)
def split_blocks_at_uninits(
blocks: list[BasicBlock], pre_must_defined: AnalysisDict[Value]
) -> list[BasicBlock]:
new_blocks: list[BasicBlock] = []
init_registers = []
init_registers_set = set()
bitmap_registers: list[Register] = [] # Init status bitmaps
bitmap_backed: list[Register] = [] # These use bitmaps to track init status
# First split blocks on ops that may raise.
for block in blocks:
ops = block.ops
block.ops = []
cur_block = block
new_blocks.append(cur_block)
for i, op in enumerate(ops):
defined = pre_must_defined[block, i]
for src in op.unique_sources():
# If a register operand is not guaranteed to be
# initialized is an operand to something other than a
# check that it is defined, insert a check.
# Note that for register operand in a LoadAddress op,
# we should be able to use it without initialization
# as we may need to use its address to update itself
if (
isinstance(src, Register)
and src not in defined
and not (isinstance(op, Branch) and op.op == Branch.IS_ERROR)
and not isinstance(op, LoadAddress)
):
new_block, error_block = BasicBlock(), BasicBlock()
new_block.error_handler = error_block.error_handler = cur_block.error_handler
new_blocks += [error_block, new_block]
if src not in init_registers_set:
init_registers.append(src)
init_registers_set.add(src)
if not src.type.error_overlap:
cur_block.ops.append(
Branch(
src,
true_label=error_block,
false_label=new_block,
op=Branch.IS_ERROR,
line=op.line,
)
)
else:
# We need to use bitmap for this one.
check_for_uninit_using_bitmap(
cur_block.ops,
src,
bitmap_registers,
bitmap_backed,
error_block,
new_block,
op.line,
)
raise_std = RaiseStandardError(
RaiseStandardError.UNBOUND_LOCAL_ERROR,
f'local variable "{src.name}" referenced before assignment',
op.line,
)
error_block.ops.append(raise_std)
error_block.ops.append(Unreachable())
cur_block = new_block
cur_block.ops.append(op)
if bitmap_backed:
update_register_assignments_to_set_bitmap(new_blocks, bitmap_registers, bitmap_backed)
if init_registers:
new_ops: list[Op] = []
for reg in init_registers:
err = LoadErrorValue(reg.type, undefines=True)
new_ops.append(err)
new_ops.append(Assign(reg, err))
for reg in bitmap_registers:
new_ops.append(Assign(reg, Integer(0, bitmap_rprimitive)))
new_blocks[0].ops[0:0] = new_ops
return new_blocks
def check_for_uninit_using_bitmap(
ops: list[Op],
src: Register,
bitmap_registers: list[Register],
bitmap_backed: list[Register],
error_block: BasicBlock,
ok_block: BasicBlock,
line: int,
) -> None:
"""Check if src is defined using a bitmap.
Modifies ops, bitmap_registers and bitmap_backed.
"""
if src not in bitmap_backed:
# Set up a new bitmap backed register.
bitmap_backed.append(src)
n = (len(bitmap_backed) - 1) // BITMAP_BITS
if len(bitmap_registers) <= n:
bitmap_registers.append(Register(bitmap_rprimitive, f"__locals_bitmap{n}"))
index = bitmap_backed.index(src)
masked = IntOp(
bitmap_rprimitive,
bitmap_registers[index // BITMAP_BITS],
Integer(1 << (index & (BITMAP_BITS - 1)), bitmap_rprimitive),
IntOp.AND,
line,
)
ops.append(masked)
chk = ComparisonOp(masked, Integer(0, bitmap_rprimitive), ComparisonOp.EQ)
ops.append(chk)
ops.append(Branch(chk, error_block, ok_block, Branch.BOOL))
def update_register_assignments_to_set_bitmap(
blocks: list[BasicBlock], bitmap_registers: list[Register], bitmap_backed: list[Register]
) -> None:
"""Update some assignments to registers to also set a bit in a bitmap.
The bitmaps are used to track if a local variable has been assigned to.
Modifies blocks.
"""
for block in blocks:
if any(isinstance(op, Assign) and op.dest in bitmap_backed for op in block.ops):
new_ops: list[Op] = []
for op in block.ops:
if isinstance(op, Assign) and op.dest in bitmap_backed:
index = bitmap_backed.index(op.dest)
new_ops.append(op)
reg = bitmap_registers[index // BITMAP_BITS]
new = IntOp(
bitmap_rprimitive,
reg,
Integer(1 << (index & (BITMAP_BITS - 1)), bitmap_rprimitive),
IntOp.OR,
op.line,
)
new_ops.append(new)
new_ops.append(Assign(reg, new))
else:
new_ops.append(op)
block.ops = new_ops