All checks were successful
continuous-integration/drone/push Build is passing
517 lines
19 KiB
Python
517 lines
19 KiB
Python
"""Utilities for comparing two versions of a module symbol table.
|
|
|
|
The goal is to find which AST nodes have externally visible changes, so
|
|
that we can fire triggers and re-process other parts of the program
|
|
that are stale because of the changes.
|
|
|
|
Only look at detail at definitions at the current module -- don't
|
|
recurse into other modules.
|
|
|
|
A summary of the module contents:
|
|
|
|
* snapshot_symbol_table(...) creates an opaque snapshot description of a
|
|
module/class symbol table (recursing into nested class symbol tables).
|
|
|
|
* compare_symbol_table_snapshots(...) compares two snapshots for the same
|
|
module id and returns fully qualified names of differences (which act as
|
|
triggers).
|
|
|
|
To compare two versions of a module symbol table, take snapshots of both
|
|
versions and compare the snapshots. The use of snapshots makes it easy to
|
|
compare two versions of the *same* symbol table that is being mutated.
|
|
|
|
Summary of how this works for certain kinds of differences:
|
|
|
|
* If a symbol table node is deleted or added (only present in old/new version
|
|
of the symbol table), it is considered different, of course.
|
|
|
|
* If a symbol table node refers to a different sort of thing in the new version,
|
|
it is considered different (for example, if a class is replaced with a
|
|
function).
|
|
|
|
* If the signature of a function has changed, it is considered different.
|
|
|
|
* If the type of a variable changes, it is considered different.
|
|
|
|
* If the MRO of a class changes, or a non-generic class is turned into a
|
|
generic class, the class is considered different (there are other such "big"
|
|
differences that cause a class to be considered changed). However, just changes
|
|
to attributes or methods don't generally constitute a difference at the
|
|
class level -- these are handled at attribute level (say, 'mod.Cls.method'
|
|
is different rather than 'mod.Cls' being different).
|
|
|
|
* If an imported name targets a different name (say, 'from x import y' is
|
|
replaced with 'from z import y'), the name in the module is considered
|
|
different. If the target of an import continues to have the same name,
|
|
but it's specifics change, this doesn't mean that the imported name is
|
|
treated as changed. Say, there is 'from x import y' in 'm', and the
|
|
type of 'x.y' has changed. This doesn't mean that that 'm.y' is considered
|
|
changed. Instead, processing the difference in 'm' will be handled through
|
|
fine-grained dependencies.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Sequence, Tuple, Union
|
|
from typing_extensions import TypeAlias as _TypeAlias
|
|
|
|
from mypy.expandtype import expand_type
|
|
from mypy.nodes import (
|
|
UNBOUND_IMPORTED,
|
|
Decorator,
|
|
FuncBase,
|
|
FuncDef,
|
|
FuncItem,
|
|
MypyFile,
|
|
OverloadedFuncDef,
|
|
ParamSpecExpr,
|
|
SymbolNode,
|
|
SymbolTable,
|
|
TypeAlias,
|
|
TypeInfo,
|
|
TypeVarExpr,
|
|
TypeVarTupleExpr,
|
|
Var,
|
|
)
|
|
from mypy.semanal_shared import find_dataclass_transform_spec
|
|
from mypy.types import (
|
|
AnyType,
|
|
CallableType,
|
|
DeletedType,
|
|
ErasedType,
|
|
Instance,
|
|
LiteralType,
|
|
NoneType,
|
|
Overloaded,
|
|
Parameters,
|
|
ParamSpecType,
|
|
PartialType,
|
|
TupleType,
|
|
Type,
|
|
TypeAliasType,
|
|
TypedDictType,
|
|
TypeType,
|
|
TypeVarId,
|
|
TypeVarLikeType,
|
|
TypeVarTupleType,
|
|
TypeVarType,
|
|
TypeVisitor,
|
|
UnboundType,
|
|
UninhabitedType,
|
|
UnionType,
|
|
UnpackType,
|
|
)
|
|
from mypy.util import get_prefix
|
|
|
|
# Snapshot representation of a symbol table node or type. The representation is
|
|
# opaque -- the only supported operations are comparing for equality and
|
|
# hashing (latter for type snapshots only). Snapshots can contain primitive
|
|
# objects, nested tuples, lists and dictionaries and primitive objects (type
|
|
# snapshots are immutable).
|
|
#
|
|
# For example, the snapshot of the 'int' type is ('Instance', 'builtins.int', ()).
|
|
|
|
# Type snapshots are strict, they must be hashable and ordered (e.g. for Unions).
|
|
Primitive: _TypeAlias = Union[str, float, int, bool] # float is for Literal[3.14] support.
|
|
SnapshotItem: _TypeAlias = Tuple[Union[Primitive, "SnapshotItem"], ...]
|
|
|
|
# Symbol snapshots can be more lenient.
|
|
SymbolSnapshot: _TypeAlias = Tuple[object, ...]
|
|
|
|
|
|
def compare_symbol_table_snapshots(
|
|
name_prefix: str, snapshot1: dict[str, SymbolSnapshot], snapshot2: dict[str, SymbolSnapshot]
|
|
) -> set[str]:
|
|
"""Return names that are different in two snapshots of a symbol table.
|
|
|
|
Only shallow (intra-module) differences are considered. References to things defined
|
|
outside the module are compared based on the name of the target only.
|
|
|
|
Recurse into class symbol tables (if the class is defined in the target module).
|
|
|
|
Return a set of fully-qualified names (e.g., 'mod.func' or 'mod.Class.method').
|
|
"""
|
|
# Find names only defined only in one version.
|
|
names1 = {f"{name_prefix}.{name}" for name in snapshot1}
|
|
names2 = {f"{name_prefix}.{name}" for name in snapshot2}
|
|
triggers = names1 ^ names2
|
|
|
|
# Look for names defined in both versions that are different.
|
|
for name in set(snapshot1.keys()) & set(snapshot2.keys()):
|
|
item1 = snapshot1[name]
|
|
item2 = snapshot2[name]
|
|
kind1 = item1[0]
|
|
kind2 = item2[0]
|
|
item_name = f"{name_prefix}.{name}"
|
|
if kind1 != kind2:
|
|
# Different kind of node in two snapshots -> trivially different.
|
|
triggers.add(item_name)
|
|
elif kind1 == "TypeInfo":
|
|
if item1[:-1] != item2[:-1]:
|
|
# Record major difference (outside class symbol tables).
|
|
triggers.add(item_name)
|
|
# Look for differences in nested class symbol table entries.
|
|
assert isinstance(item1[-1], dict)
|
|
assert isinstance(item2[-1], dict)
|
|
triggers |= compare_symbol_table_snapshots(item_name, item1[-1], item2[-1])
|
|
else:
|
|
# Shallow node (no interesting internal structure). Just use equality.
|
|
if snapshot1[name] != snapshot2[name]:
|
|
triggers.add(item_name)
|
|
|
|
return triggers
|
|
|
|
|
|
def snapshot_symbol_table(name_prefix: str, table: SymbolTable) -> dict[str, SymbolSnapshot]:
|
|
"""Create a snapshot description that represents the state of a symbol table.
|
|
|
|
The snapshot has a representation based on nested tuples and dicts
|
|
that makes it easy and fast to find differences.
|
|
|
|
Only "shallow" state is included in the snapshot -- references to
|
|
things defined in other modules are represented just by the names of
|
|
the targets.
|
|
"""
|
|
result: dict[str, SymbolSnapshot] = {}
|
|
for name, symbol in table.items():
|
|
node = symbol.node
|
|
# TODO: cross_ref?
|
|
fullname = node.fullname if node else None
|
|
common = (fullname, symbol.kind, symbol.module_public)
|
|
if isinstance(node, MypyFile):
|
|
# This is a cross-reference to another module.
|
|
# If the reference is busted because the other module is missing,
|
|
# the node will be a "stale_info" TypeInfo produced by fixup,
|
|
# but that doesn't really matter to us here.
|
|
result[name] = ("Moduleref", common)
|
|
elif isinstance(node, TypeVarExpr):
|
|
result[name] = (
|
|
"TypeVar",
|
|
node.variance,
|
|
[snapshot_type(value) for value in node.values],
|
|
snapshot_type(node.upper_bound),
|
|
snapshot_type(node.default),
|
|
)
|
|
elif isinstance(node, TypeAlias):
|
|
result[name] = (
|
|
"TypeAlias",
|
|
snapshot_types(node.alias_tvars),
|
|
node.normalized,
|
|
node.no_args,
|
|
snapshot_optional_type(node.target),
|
|
)
|
|
elif isinstance(node, ParamSpecExpr):
|
|
result[name] = (
|
|
"ParamSpec",
|
|
node.variance,
|
|
snapshot_type(node.upper_bound),
|
|
snapshot_type(node.default),
|
|
)
|
|
elif isinstance(node, TypeVarTupleExpr):
|
|
result[name] = (
|
|
"TypeVarTuple",
|
|
node.variance,
|
|
snapshot_type(node.upper_bound),
|
|
snapshot_type(node.default),
|
|
)
|
|
else:
|
|
assert symbol.kind != UNBOUND_IMPORTED
|
|
if node and get_prefix(node.fullname) != name_prefix:
|
|
# This is a cross-reference to a node defined in another module.
|
|
result[name] = ("CrossRef", common)
|
|
else:
|
|
result[name] = snapshot_definition(node, common)
|
|
return result
|
|
|
|
|
|
def snapshot_definition(node: SymbolNode | None, common: SymbolSnapshot) -> SymbolSnapshot:
|
|
"""Create a snapshot description of a symbol table node.
|
|
|
|
The representation is nested tuples and dicts. Only externally
|
|
visible attributes are included.
|
|
"""
|
|
if isinstance(node, FuncBase):
|
|
# TODO: info
|
|
if node.type:
|
|
signature = snapshot_type(node.type)
|
|
else:
|
|
signature = snapshot_untyped_signature(node)
|
|
impl: FuncDef | None = None
|
|
if isinstance(node, FuncDef):
|
|
impl = node
|
|
elif isinstance(node, OverloadedFuncDef) and node.impl:
|
|
impl = node.impl.func if isinstance(node.impl, Decorator) else node.impl
|
|
is_trivial_body = impl.is_trivial_body if impl else False
|
|
dataclass_transform_spec = find_dataclass_transform_spec(node)
|
|
return (
|
|
"Func",
|
|
common,
|
|
node.is_property,
|
|
node.is_final,
|
|
node.is_class,
|
|
node.is_static,
|
|
signature,
|
|
is_trivial_body,
|
|
dataclass_transform_spec.serialize() if dataclass_transform_spec is not None else None,
|
|
)
|
|
elif isinstance(node, Var):
|
|
return ("Var", common, snapshot_optional_type(node.type), node.is_final)
|
|
elif isinstance(node, Decorator):
|
|
# Note that decorated methods are represented by Decorator instances in
|
|
# a symbol table since we need to preserve information about the
|
|
# decorated function (whether it's a class function, for
|
|
# example). Top-level decorated functions, however, are represented by
|
|
# the corresponding Var node, since that happens to provide enough
|
|
# context.
|
|
return (
|
|
"Decorator",
|
|
node.is_overload,
|
|
snapshot_optional_type(node.var.type),
|
|
snapshot_definition(node.func, common),
|
|
)
|
|
elif isinstance(node, TypeInfo):
|
|
dataclass_transform_spec = node.dataclass_transform_spec
|
|
if dataclass_transform_spec is None:
|
|
dataclass_transform_spec = find_dataclass_transform_spec(node)
|
|
|
|
attrs = (
|
|
node.is_abstract,
|
|
node.is_enum,
|
|
node.is_protocol,
|
|
node.fallback_to_any,
|
|
node.meta_fallback_to_any,
|
|
node.is_named_tuple,
|
|
node.is_newtype,
|
|
# We need this to e.g. trigger metaclass calculation in subclasses.
|
|
snapshot_optional_type(node.metaclass_type),
|
|
snapshot_optional_type(node.tuple_type),
|
|
snapshot_optional_type(node.typeddict_type),
|
|
[base.fullname for base in node.mro],
|
|
# Note that the structure of type variables is a part of the external interface,
|
|
# since creating instances might fail, for example:
|
|
# T = TypeVar('T', bound=int)
|
|
# class C(Generic[T]):
|
|
# ...
|
|
# x: C[str] <- this is invalid, and needs to be re-checked if `T` changes.
|
|
# An alternative would be to create both deps: <...> -> C, and <...> -> <C>,
|
|
# but this currently seems a bit ad hoc.
|
|
tuple(snapshot_type(tdef) for tdef in node.defn.type_vars),
|
|
[snapshot_type(base) for base in node.bases],
|
|
[snapshot_type(p) for p in node._promote],
|
|
dataclass_transform_spec.serialize() if dataclass_transform_spec is not None else None,
|
|
)
|
|
prefix = node.fullname
|
|
symbol_table = snapshot_symbol_table(prefix, node.names)
|
|
# Special dependency for abstract attribute handling.
|
|
symbol_table["(abstract)"] = ("Abstract", tuple(sorted(node.abstract_attributes)))
|
|
return ("TypeInfo", common, attrs, symbol_table)
|
|
else:
|
|
# Other node types are handled elsewhere.
|
|
assert False, type(node)
|
|
|
|
|
|
def snapshot_type(typ: Type) -> SnapshotItem:
|
|
"""Create a snapshot representation of a type using nested tuples."""
|
|
return typ.accept(SnapshotTypeVisitor())
|
|
|
|
|
|
def snapshot_optional_type(typ: Type | None) -> SnapshotItem:
|
|
if typ:
|
|
return snapshot_type(typ)
|
|
else:
|
|
return ("<not set>",)
|
|
|
|
|
|
def snapshot_types(types: Sequence[Type]) -> SnapshotItem:
|
|
return tuple(snapshot_type(item) for item in types)
|
|
|
|
|
|
def snapshot_simple_type(typ: Type) -> SnapshotItem:
|
|
return (type(typ).__name__,)
|
|
|
|
|
|
def encode_optional_str(s: str | None) -> str:
|
|
if s is None:
|
|
return "<None>"
|
|
else:
|
|
return s
|
|
|
|
|
|
class SnapshotTypeVisitor(TypeVisitor[SnapshotItem]):
|
|
"""Creates a read-only, self-contained snapshot of a type object.
|
|
|
|
Properties of a snapshot:
|
|
|
|
- Contains (nested) tuples and other immutable primitive objects only.
|
|
- References to AST nodes are replaced with full names of targets.
|
|
- Has no references to mutable or non-primitive objects.
|
|
- Two snapshots represent the same object if and only if they are
|
|
equal.
|
|
- Results must be sortable. It's important that tuples have
|
|
consistent types and can't arbitrarily mix str and None values,
|
|
for example, since they can't be compared.
|
|
"""
|
|
|
|
def visit_unbound_type(self, typ: UnboundType) -> SnapshotItem:
|
|
return (
|
|
"UnboundType",
|
|
typ.name,
|
|
typ.optional,
|
|
typ.empty_tuple_index,
|
|
snapshot_types(typ.args),
|
|
)
|
|
|
|
def visit_any(self, typ: AnyType) -> SnapshotItem:
|
|
return snapshot_simple_type(typ)
|
|
|
|
def visit_none_type(self, typ: NoneType) -> SnapshotItem:
|
|
return snapshot_simple_type(typ)
|
|
|
|
def visit_uninhabited_type(self, typ: UninhabitedType) -> SnapshotItem:
|
|
return snapshot_simple_type(typ)
|
|
|
|
def visit_erased_type(self, typ: ErasedType) -> SnapshotItem:
|
|
return snapshot_simple_type(typ)
|
|
|
|
def visit_deleted_type(self, typ: DeletedType) -> SnapshotItem:
|
|
return snapshot_simple_type(typ)
|
|
|
|
def visit_instance(self, typ: Instance) -> SnapshotItem:
|
|
return (
|
|
"Instance",
|
|
encode_optional_str(typ.type.fullname),
|
|
snapshot_types(typ.args),
|
|
("None",) if typ.last_known_value is None else snapshot_type(typ.last_known_value),
|
|
)
|
|
|
|
def visit_type_var(self, typ: TypeVarType) -> SnapshotItem:
|
|
return (
|
|
"TypeVar",
|
|
typ.name,
|
|
typ.fullname,
|
|
typ.id.raw_id,
|
|
typ.id.meta_level,
|
|
snapshot_types(typ.values),
|
|
snapshot_type(typ.upper_bound),
|
|
snapshot_type(typ.default),
|
|
typ.variance,
|
|
)
|
|
|
|
def visit_param_spec(self, typ: ParamSpecType) -> SnapshotItem:
|
|
return (
|
|
"ParamSpec",
|
|
typ.id.raw_id,
|
|
typ.id.meta_level,
|
|
typ.flavor,
|
|
snapshot_type(typ.upper_bound),
|
|
snapshot_type(typ.default),
|
|
)
|
|
|
|
def visit_type_var_tuple(self, typ: TypeVarTupleType) -> SnapshotItem:
|
|
return (
|
|
"TypeVarTupleType",
|
|
typ.id.raw_id,
|
|
typ.id.meta_level,
|
|
snapshot_type(typ.upper_bound),
|
|
snapshot_type(typ.default),
|
|
)
|
|
|
|
def visit_unpack_type(self, typ: UnpackType) -> SnapshotItem:
|
|
return ("UnpackType", snapshot_type(typ.type))
|
|
|
|
def visit_parameters(self, typ: Parameters) -> SnapshotItem:
|
|
return (
|
|
"Parameters",
|
|
snapshot_types(typ.arg_types),
|
|
tuple(encode_optional_str(name) for name in typ.arg_names),
|
|
tuple(k.value for k in typ.arg_kinds),
|
|
)
|
|
|
|
def visit_callable_type(self, typ: CallableType) -> SnapshotItem:
|
|
if typ.is_generic():
|
|
typ = self.normalize_callable_variables(typ)
|
|
return (
|
|
"CallableType",
|
|
snapshot_types(typ.arg_types),
|
|
snapshot_type(typ.ret_type),
|
|
tuple(encode_optional_str(name) for name in typ.arg_names),
|
|
tuple(k.value for k in typ.arg_kinds),
|
|
typ.is_type_obj(),
|
|
typ.is_ellipsis_args,
|
|
snapshot_types(typ.variables),
|
|
)
|
|
|
|
def normalize_callable_variables(self, typ: CallableType) -> CallableType:
|
|
"""Normalize all type variable ids to run from -1 to -len(variables)."""
|
|
tvs = []
|
|
tvmap: dict[TypeVarId, Type] = {}
|
|
for i, v in enumerate(typ.variables):
|
|
tid = TypeVarId(-1 - i)
|
|
if isinstance(v, TypeVarType):
|
|
tv: TypeVarLikeType = v.copy_modified(id=tid)
|
|
elif isinstance(v, TypeVarTupleType):
|
|
tv = v.copy_modified(id=tid)
|
|
else:
|
|
assert isinstance(v, ParamSpecType)
|
|
tv = v.copy_modified(id=tid)
|
|
tvs.append(tv)
|
|
tvmap[v.id] = tv
|
|
return expand_type(typ, tvmap).copy_modified(variables=tvs)
|
|
|
|
def visit_tuple_type(self, typ: TupleType) -> SnapshotItem:
|
|
return ("TupleType", snapshot_types(typ.items))
|
|
|
|
def visit_typeddict_type(self, typ: TypedDictType) -> SnapshotItem:
|
|
items = tuple((key, snapshot_type(item_type)) for key, item_type in typ.items.items())
|
|
required = tuple(sorted(typ.required_keys))
|
|
return ("TypedDictType", items, required)
|
|
|
|
def visit_literal_type(self, typ: LiteralType) -> SnapshotItem:
|
|
return ("LiteralType", snapshot_type(typ.fallback), typ.value)
|
|
|
|
def visit_union_type(self, typ: UnionType) -> SnapshotItem:
|
|
# Sort and remove duplicates so that we can use equality to test for
|
|
# equivalent union type snapshots.
|
|
items = {snapshot_type(item) for item in typ.items}
|
|
normalized = tuple(sorted(items))
|
|
return ("UnionType", normalized)
|
|
|
|
def visit_overloaded(self, typ: Overloaded) -> SnapshotItem:
|
|
return ("Overloaded", snapshot_types(typ.items))
|
|
|
|
def visit_partial_type(self, typ: PartialType) -> SnapshotItem:
|
|
# A partial type is not fully defined, so the result is indeterminate. We shouldn't
|
|
# get here.
|
|
raise RuntimeError
|
|
|
|
def visit_type_type(self, typ: TypeType) -> SnapshotItem:
|
|
return ("TypeType", snapshot_type(typ.item))
|
|
|
|
def visit_type_alias_type(self, typ: TypeAliasType) -> SnapshotItem:
|
|
assert typ.alias is not None
|
|
return ("TypeAliasType", typ.alias.fullname, snapshot_types(typ.args))
|
|
|
|
|
|
def snapshot_untyped_signature(func: OverloadedFuncDef | FuncItem) -> SymbolSnapshot:
|
|
"""Create a snapshot of the signature of a function that has no explicit signature.
|
|
|
|
If the arguments to a function without signature change, it must be
|
|
considered as different. We have this special casing since we don't store
|
|
the implicit signature anywhere, and we'd rather not construct new
|
|
Callable objects in this module (the idea is to only read properties of
|
|
the AST here).
|
|
"""
|
|
if isinstance(func, FuncItem):
|
|
return (tuple(func.arg_names), tuple(func.arg_kinds))
|
|
else:
|
|
result: list[SymbolSnapshot] = []
|
|
for item in func.items:
|
|
if isinstance(item, Decorator):
|
|
if item.var.type:
|
|
result.append(snapshot_type(item.var.type))
|
|
else:
|
|
result.append(("DecoratorWithoutType",))
|
|
else:
|
|
result.append(snapshot_untyped_signature(item))
|
|
return tuple(result)
|