From 199b367e6f2547627049872d5b43923c652a4220 Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Tue, 17 Jun 2025 16:52:57 -0700 Subject: [PATCH 01/14] Some micro-optimizations that make save() 5% faster More importantly, commit() now calls __gel_commit__() on link props of single links. --- gel/_internal/_qbmodel/_pydantic/_fields.py | 19 +++-- gel/_internal/_save.py | 86 +++++++++++++-------- 2 files changed, 62 insertions(+), 43 deletions(-) diff --git a/gel/_internal/_qbmodel/_pydantic/_fields.py b/gel/_internal/_qbmodel/_pydantic/_fields.py index 115de3e2c..fd77b01a0 100644 --- a/gel/_internal/_qbmodel/_pydantic/_fields.py +++ b/gel/_internal/_qbmodel/_pydantic/_fields.py @@ -804,13 +804,6 @@ def _validate( ) -@functools.cache -def _make_dlist_type( - types: tuple[type[_PT_co], type[_BMT_co]], -) -> type[_UpcastingDistinctList[_PT_co, _BMT_co]]: - return _UpcastingDistinctList[types[0], types[1]] # type: ignore [valid-type] - - class _MultiLinkWithProps( _MultiPointer[_PT_co, _BMT_co], _abstract.LinkDescriptor[_PT_co, _BMT_co], @@ -840,7 +833,10 @@ def __gel_resolve_dlist__( cls, type_args: tuple[type[Any]] | tuple[type[Any], type[Any]], ) -> _dlist.DistinctList[_MT_co]: - return _make_dlist_type(type_args) # type: ignore [return-value] + return _UpcastingDistinctList[ + type_args[0], # type: ignore [valid-type] + type_args[1], # type: ignore [valid-type] + ] # type: ignore [return-value] @classmethod def _validate( @@ -848,8 +844,11 @@ def _validate( value: Any, generic_args: tuple[type[Any], type[Any]], ) -> _UpcastingDistinctList[_PT_co, _BMT_co]: - lt: type[_UpcastingDistinctList[_PT_co, _BMT_co]] = _make_dlist_type( - generic_args + lt: type[_UpcastingDistinctList[_PT_co, _BMT_co]] = ( + _UpcastingDistinctList[ + generic_args[0], # type: ignore [valid-type] + generic_args[1], # type: ignore [valid-type] + ] ) if type(lt) is list: # type: ignore [comparison-overlap] # Optimization for the most common scenario - user passes diff --git a/gel/_internal/_save.py b/gel/_internal/_save.py index 987455192..5afd33198 100644 --- a/gel/_internal/_save.py +++ b/gel/_internal/_save.py @@ -2,6 +2,7 @@ import collections import dataclasses +import weakref from typing import ( TYPE_CHECKING, @@ -27,6 +28,7 @@ ) from gel._internal._unsetid import UNSET_UUID from gel._internal._edgeql import PointerKind, quote_ident +from gel._internal._qbmodel._pydantic._fields import _UpcastingDistinctList if TYPE_CHECKING: import uuid @@ -45,6 +47,11 @@ _unset = object() +_sorted_pointers_cache: weakref.WeakKeyDictionary[ + type[GelSourceModel], Iterable[GelPointerReflection] +] = weakref.WeakKeyDictionary() + + _ll_getattr = object.__getattribute__ @@ -246,36 +253,43 @@ def is_link_list(val: object) -> TypeGuard[DistinctList[GelModel]]: ) +def is_proxy_link_list( + val: object, +) -> TypeGuard[_UpcastingDistinctList[ProxyModel[GelModel], GelModel]]: + return isinstance(val, _UpcastingDistinctList) + + def unwrap_proxy(val: GelModel) -> GelModel: if isinstance(val, ProxyModel): # This is perf-sensitive function as it's called on # every edge of the graph multiple times. - obj = _ll_getattr(val, "_p__obj__") - assert isinstance(obj, GelModel) - return obj + return _ll_getattr(val, "_p__obj__") # type: ignore [no-any-return] else: return val -def unwrap_dlist(val: Iterable[GelModel]) -> list[GelModel]: - return [unwrap_proxy(o) for o in val] +def unwrap_proxy_no_check(val: ProxyModel[GelModel]) -> GelModel: + return _ll_getattr(val, "_p__obj__") # type: ignore [no-any-return] -def unwrap(val: GelModel) -> tuple[ProxyModel[GelModel] | None, GelModel]: - if isinstance(val, ProxyModel): - obj = _ll_getattr(val, "_p__obj__") - assert isinstance(obj, GelModel) - return val, obj - else: - return None, val +def unwrap_dlist(val: Iterable[GelModel]) -> list[GelModel]: + return [unwrap_proxy(o) for o in val] -def get_pointers(tp: type[GelSourceModel]) -> list[GelPointerReflection]: +def get_pointers(tp: type[GelSourceModel]) -> Iterable[GelPointerReflection]: # We sort pointers to produce similar queies regardless of Python # hashing -- this is to maximize the probability of a generated # uodate/insert query to hit the Gel's compiled cache. + + try: + return _sorted_pointers_cache[tp] + except KeyError: + pass + pointers = tp.__gel_reflection__.pointers - return [pointers[name] for name in sorted(pointers)] + ret = tuple(pointers[name] for name in sorted(pointers)) + _sorted_pointers_cache[tp] = ret + return ret def iter_graph(objs: Iterable[GelModel]) -> Iterable[GelModel]: @@ -284,8 +298,6 @@ def iter_graph(objs: Iterable[GelModel]) -> Iterable[GelModel]: visited: IDTracker[GelModel, None] = IDTracker() def _traverse(obj: GelModel) -> Iterable[GelModel]: - obj = unwrap_proxy(obj) - if obj in visited: return @@ -307,12 +319,15 @@ def _traverse(obj: GelModel) -> Iterable[GelModel]: continue if prop.cardinality.is_multi(): - assert is_link_list(linked) - for ref in linked: - yield from _traverse(ref) + if is_proxy_link_list(linked): + for proxy in linked: + yield from _traverse(unwrap_proxy_no_check(proxy)) + else: + assert is_link_list(linked) + for lobj in linked: + yield from _traverse(lobj) else: - assert isinstance(linked, GelModel) - yield from _traverse(linked) + yield from _traverse(unwrap_proxy(cast("GelModel", linked))) for o in objs: yield from _traverse(o) @@ -788,12 +803,13 @@ def commit(self) -> None: visited: IDTracker[GelModel, None] = IDTracker() def _traverse(obj: GelModel) -> None: - if not isinstance(obj, GelModel) or obj in visited: + if obj in visited: return + assert not isinstance(obj, ProxyModel) + visited.track(obj) - unwrapped = unwrap_proxy(obj) - unwrapped.__gel_commit__(self.object_ids.get(id(unwrapped))) + obj.__gel_commit__(self.object_ids.get(id(obj))) for prop in get_pointers(type(obj)): if prop.computed: @@ -814,17 +830,21 @@ def _traverse(obj: GelModel) -> None: if prop.kind is PointerKind.Link: if prop.cardinality.is_multi(): - assert is_link_list(linked) - for ref in linked: - if isinstance(ref, ProxyModel): - ref.__linkprops__.__gel_commit__() - _traverse(unwrap_proxy(ref)) - else: - _traverse(ref) + if is_proxy_link_list(linked): + for proxy in linked: + proxy.__linkprops__.__gel_commit__() + _traverse(unwrap_proxy_no_check(proxy)) + else: + assert is_link_list(linked) + for model in linked: + _traverse(model) linked.__gel_commit__() else: - assert isinstance(linked, GelModel) - _traverse(unwrap_proxy(linked)) + if isinstance(linked, ProxyModel): + linked.__linkprops__.__gel_commit__() + _traverse(unwrap_proxy_no_check(linked)) + else: + _traverse(cast("GelModel", linked)) else: assert prop.kind is PointerKind.Property From 942fe98f60f8c151ac1a792d72191b275972730d Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Tue, 17 Jun 2025 22:15:41 -0700 Subject: [PATCH 02/14] Move UpcastingDistinctList into a separate file Rename to ProxyDistinctList --- gel/_internal/_dlist.py | 2 +- gel/_internal/_qbmodel/_pydantic/_fields.py | 229 +------------------ gel/_internal/_qbmodel/_pydantic/_pdlist.py | 234 ++++++++++++++++++++ gel/_internal/_save.py | 8 +- tests/test_model_generator.py | 14 +- 5 files changed, 257 insertions(+), 230 deletions(-) create mode 100644 gel/_internal/_qbmodel/_pydantic/_pdlist.py diff --git a/gel/_internal/_dlist.py b/gel/_internal/_dlist.py index 6a961548c..66ddd1751 100644 --- a/gel/_internal/_dlist.py +++ b/gel/_internal/_dlist.py @@ -74,7 +74,7 @@ def __init__( else: self._initial_items = [] self._items = [] - # 'extend' is optimized in _UpcastingDistinctList + # 'extend' is optimized in ProxyDistinctList # for use in __init__ self.extend(iterable) diff --git a/gel/_internal/_qbmodel/_pydantic/_fields.py b/gel/_internal/_qbmodel/_pydantic/_fields.py index fd77b01a0..248a2b343 100644 --- a/gel/_internal/_qbmodel/_pydantic/_fields.py +++ b/gel/_internal/_qbmodel/_pydantic/_fields.py @@ -8,18 +8,15 @@ from typing import ( TYPE_CHECKING, - cast, Annotated, Any, Generic, - SupportsIndex, TypeVar, overload, ) from typing_extensions import ( TypeAliasType, - Self, ) import functools @@ -38,12 +35,13 @@ from gel._internal._qbmodel import _abstract from ._models import GelModel, ProxyModel +from ._pdlist import ProxyDistinctList from gel._internal._unsetid import UNSET_UUID if TYPE_CHECKING: from typing_extensions import Never - from collections.abc import Sequence, Iterable + from collections.abc import Sequence _T_co = TypeVar("_T_co", covariant=True) @@ -542,210 +540,6 @@ class _OptionalLink( type_params=(_PT_co, _MT_co), ) -ll_getattr = object.__getattribute__ - - -class _UpcastingDistinctList( - _dlist.DistinctList[_PT_co], Generic[_PT_co, _BMT_co] -): - # Mapping of object IDs to ProxyModels that wrap them. - _wrapped_index: dict[int, _PT_co] | None = None - - def _init_tracking(self) -> None: - super()._init_tracking() - - if self._wrapped_index is None: - self._wrapped_index = {} - for item in self._items: - assert isinstance(item, ProxyModel) - wrapped = ll_getattr(item, "_p__obj__") - self._wrapped_index[id(wrapped)] = cast("_PT_co", item) - - def _track_item(self, item: _PT_co) -> None: # type: ignore [misc] - assert isinstance(item, ProxyModel) - super()._track_item(cast("_PT_co", item)) - assert self._wrapped_index is not None - wrapped = ll_getattr(item, "_p__obj__") - self._wrapped_index[id(wrapped)] = cast("_PT_co", item) - - def _untrack_item(self, item: _PT_co) -> None: # type: ignore [misc] - assert isinstance(item, ProxyModel) - super()._untrack_item(cast("_PT_co", item)) - assert self._wrapped_index is not None - wrapped = ll_getattr(item, "_p__obj__") - self._wrapped_index.pop(id(wrapped), None) - - def _is_tracked(self, item: _PT_co | _BMT_co) -> bool: - self._init_tracking() - assert self._wrapped_index is not None - - if isinstance(item, ProxyModel): - return id(item._p__obj__) in self._wrapped_index - else: - return id(item) in self._wrapped_index - - def extend(self, values: Iterable[_PT_co | _BMT_co]) -> None: - # An optimized version of `extend()` - - if not values: - # Empty list => early return - return - - if values is self: - values = list(values) - - self._ensure_snapshot() - - cls = type(self) - t = cls.type - - assert self._wrapped_index is not None - assert self._set is not None - assert self._unhashables is not None - - # For an empty list we can call one extend() call instead - # of slow iterative appends. - fast_extend = len(self._wrapped_index) == 0 - - for v in values: - tv = type(v) - if tv is t.__proxy_of__: - # Fast path -- `v` is an instance of the base type. - # It has no link props, wrap it in a proxy in - # a fast way. - proxy = t.__gel_proxy_construct__(v, {}) - obj = v - elif tv is t: - # Another fast path -- `v` is already the correct proxy. - proxy = v # type: ignore [assignment] - obj = ll_getattr(v, "_p__obj__") - else: - proxy, obj = self._cast_value(v) - - oid = id(obj) - existing_proxy = self._wrapped_index.get(oid) - if existing_proxy is None: - self._wrapped_index[oid] = proxy - else: - if ( - existing_proxy.__linkprops__.__dict__ - != proxy.__linkprops__.__dict__ - ): - raise ValueError( - f"the list already contains {v!r} with " - f"a different set of link properties" - ) - - if obj.id is UNSET_UUID: - self._unhashables[id(proxy)] = proxy - else: - self._set.add(proxy) - - if not fast_extend: - self._items.append(proxy) - - if fast_extend: - self._items.extend(self._wrapped_index.values()) - - def _cast_value(self, value: Any) -> tuple[_PT_co, _BMT_co]: - cls = type(self) - t = cls.type - - assert issubclass(t, ProxyModel) - - tp_value = type(value) - - if tp_value is t.__proxy_of__: - # Fast path before we make all expensive isinstance calls. - return ( - t.__gel_proxy_construct__(value, {}), - value, - ) # type: ignore [return-value] - - if tp_value is t: - # It's a correct proxy for this link... return as is. - return ( - value, - ll_getattr(value, "_p__obj__"), - ) - - if not isinstance(value, ProxyModel) and isinstance( - value, t.__proxy_of__ - ): - # It's not a proxy, but the object is of the correct type -- - # re-wrap it in a correct proxy. - return ( - t.__gel_proxy_construct__(value, {}), - value, - ) # type: ignore [return-value] - - raise ValueError( - f"{cls!r} accepts only values of type {t.__name__} " - f"or {t.__proxy_of__.__name__}, got {tp_value!r}", - ) - - def _check_value(self, value: Any) -> _PT_co: - proxy, obj = self._cast_value(value) - - # We have to check if a proxy around the same object is already - # present in the list. - self._init_tracking() - assert self._wrapped_index is not None - try: - existing_proxy = self._wrapped_index[id(obj)] - except KeyError: - return proxy - - assert isinstance(existing_proxy, ProxyModel) - - if ( - existing_proxy.__linkprops__.__dict__ - != proxy.__linkprops__.__dict__ - ): - raise ValueError( - f"the list already contains {value!r} with " - f" a different set of link properties" - ) - # Return the already present identical proxy instead of inserting - # another one - return existing_proxy # type: ignore [return-value] - - def _find_proxied_obj(self, item: _PT_co | _BMT_co) -> _PT_co | None: - self._init_tracking() - assert self._wrapped_index is not None - - if isinstance(item, ProxyModel): - item = item._p__obj__ - - return self._wrapped_index.get(id(item), None) - - def clear(self) -> None: - super().clear() - self._wrapped_index = None - - if TYPE_CHECKING: - - def append(self, value: _PT_co | _BMT_co) -> None: ... - def insert( - self, index: SupportsIndex, value: _PT_co | _BMT_co - ) -> None: ... - def __setitem__( - self, - index: SupportsIndex | slice, - value: _PT_co | _BMT_co | Iterable[_PT_co | _BMT_co], - ) -> None: ... - def extend(self, values: Iterable[_PT_co | _BMT_co]) -> None: ... - def remove(self, value: _PT_co | _BMT_co) -> None: ... - def index( - self, - value: _PT_co | _BMT_co, - start: SupportsIndex = 0, - stop: SupportsIndex | None = None, - ) -> int: ... - def count(self, value: _PT_co | _BMT_co) -> int: ... - def __add__(self, other: Iterable[_PT_co | _BMT_co]) -> Self: ... - def __iadd__(self, other: Iterable[_PT_co | _BMT_co]) -> Self: ... - class _ComputedMultiLink( _ComputedMultiPointer[_MT_co, _BMT_co], @@ -816,13 +610,13 @@ def __get__(self, obj: None, objtype: type[Any]) -> type[_PT_co]: ... @overload def __get__( self, obj: object, objtype: Any = None - ) -> _UpcastingDistinctList[_PT_co, _BMT_co]: ... + ) -> ProxyDistinctList[_PT_co, _BMT_co]: ... def __get__( self, obj: Any, objtype: Any = None, - ) -> type[_PT_co] | _UpcastingDistinctList[_PT_co, _BMT_co] | None: ... + ) -> type[_PT_co] | ProxyDistinctList[_PT_co, _BMT_co] | None: ... def __set__( self, obj: Any, value: Sequence[_PT_co | _BMT_co] @@ -833,7 +627,7 @@ def __gel_resolve_dlist__( cls, type_args: tuple[type[Any]] | tuple[type[Any], type[Any]], ) -> _dlist.DistinctList[_MT_co]: - return _UpcastingDistinctList[ + return ProxyDistinctList[ type_args[0], # type: ignore [valid-type] type_args[1], # type: ignore [valid-type] ] # type: ignore [return-value] @@ -843,13 +637,12 @@ def _validate( cls, value: Any, generic_args: tuple[type[Any], type[Any]], - ) -> _UpcastingDistinctList[_PT_co, _BMT_co]: - lt: type[_UpcastingDistinctList[_PT_co, _BMT_co]] = ( - _UpcastingDistinctList[ - generic_args[0], # type: ignore [valid-type] - generic_args[1], # type: ignore [valid-type] - ] - ) + ) -> ProxyDistinctList[_PT_co, _BMT_co]: + lt: type[ProxyDistinctList[_PT_co, _BMT_co]] = ProxyDistinctList[ + generic_args[0], # type: ignore [valid-type] + generic_args[1], # type: ignore [valid-type] + ] + if type(lt) is list: # type: ignore [comparison-overlap] # Optimization for the most common scenario - user passes # a list of objects to the constructor. diff --git a/gel/_internal/_qbmodel/_pydantic/_pdlist.py b/gel/_internal/_qbmodel/_pydantic/_pdlist.py new file mode 100644 index 000000000..1ed8cb21d --- /dev/null +++ b/gel/_internal/_qbmodel/_pydantic/_pdlist.py @@ -0,0 +1,234 @@ +# SPDX-PackageName: gel-python +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright Gel Data Inc. and the contributors. + +from typing import ( + TYPE_CHECKING, + cast, + Any, + Generic, + SupportsIndex, + TypeVar, +) + +from collections.abc import Iterable + +from typing_extensions import ( + Self, +) + +from gel._internal import _dlist + +from ._models import GelModel, ProxyModel + +from gel._internal._unsetid import UNSET_UUID + + +_BMT_co = TypeVar("_BMT_co", bound=GelModel, covariant=True) +"""Base model type""" + +_PT_co = TypeVar("_PT_co", bound=ProxyModel[GelModel], covariant=True) +"""Proxy model""" + + +ll_getattr = object.__getattribute__ + + +class ProxyDistinctList(_dlist.DistinctList[_PT_co], Generic[_PT_co, _BMT_co]): + # Mapping of object IDs to ProxyModels that wrap them. + _wrapped_index: dict[int, _PT_co] | None = None + + def _init_tracking(self) -> None: + super()._init_tracking() + + if self._wrapped_index is None: + self._wrapped_index = {} + for item in self._items: + assert isinstance(item, ProxyModel) + wrapped = ll_getattr(item, "_p__obj__") + self._wrapped_index[id(wrapped)] = cast("_PT_co", item) + + def _track_item(self, item: _PT_co) -> None: # type: ignore [misc] + assert isinstance(item, ProxyModel) + super()._track_item(cast("_PT_co", item)) + assert self._wrapped_index is not None + wrapped = ll_getattr(item, "_p__obj__") + self._wrapped_index[id(wrapped)] = cast("_PT_co", item) + + def _untrack_item(self, item: _PT_co) -> None: # type: ignore [misc] + assert isinstance(item, ProxyModel) + super()._untrack_item(cast("_PT_co", item)) + assert self._wrapped_index is not None + wrapped = ll_getattr(item, "_p__obj__") + self._wrapped_index.pop(id(wrapped), None) + + def _is_tracked(self, item: _PT_co | _BMT_co) -> bool: + self._init_tracking() + assert self._wrapped_index is not None + + if isinstance(item, ProxyModel): + return id(item._p__obj__) in self._wrapped_index + else: + return id(item) in self._wrapped_index + + def extend(self, values: Iterable[_PT_co | _BMT_co]) -> None: + # An optimized version of `extend()` + + if not values: + # Empty list => early return + return + + if values is self: + values = list(values) + + self._ensure_snapshot() + + cls = type(self) + t = cls.type + + assert self._wrapped_index is not None + assert self._set is not None + assert self._unhashables is not None + + # For an empty list we can call one extend() call instead + # of slow iterative appends. + fast_extend = len(self._wrapped_index) == 0 + + for v in values: + tv = type(v) + if tv is t.__proxy_of__: + # Fast path -- `v` is an instance of the base type. + # It has no link props, wrap it in a proxy in + # a fast way. + proxy = t.__gel_proxy_construct__(v, {}) + obj = v + elif tv is t: + # Another fast path -- `v` is already the correct proxy. + proxy = v # type: ignore [assignment] + obj = ll_getattr(v, "_p__obj__") + else: + proxy, obj = self._cast_value(v) + + oid = id(obj) + existing_proxy = self._wrapped_index.get(oid) + if existing_proxy is None: + self._wrapped_index[oid] = proxy + else: + if ( + existing_proxy.__linkprops__.__dict__ + != proxy.__linkprops__.__dict__ + ): + raise ValueError( + f"the list already contains {v!r} with " + f"a different set of link properties" + ) + + if obj.id is UNSET_UUID: + self._unhashables[id(proxy)] = proxy + else: + self._set.add(proxy) + + if not fast_extend: + self._items.append(proxy) + + if fast_extend: + self._items.extend(self._wrapped_index.values()) + + def _cast_value(self, value: Any) -> tuple[_PT_co, _BMT_co]: + cls = type(self) + t = cls.type + + assert issubclass(t, ProxyModel) + + tp_value = type(value) + + if tp_value is t.__proxy_of__: + # Fast path before we make all expensive isinstance calls. + return ( + t.__gel_proxy_construct__(value, {}), + value, + ) # type: ignore [return-value] + + if tp_value is t: + # It's a correct proxy for this link... return as is. + return ( + value, + ll_getattr(value, "_p__obj__"), + ) + + if not isinstance(value, ProxyModel) and isinstance( + value, t.__proxy_of__ + ): + # It's not a proxy, but the object is of the correct type -- + # re-wrap it in a correct proxy. + return ( + t.__gel_proxy_construct__(value, {}), + value, + ) # type: ignore [return-value] + + raise ValueError( + f"{cls!r} accepts only values of type {t.__name__} " + f"or {t.__proxy_of__.__name__}, got {tp_value!r}", + ) + + def _check_value(self, value: Any) -> _PT_co: + proxy, obj = self._cast_value(value) + + # We have to check if a proxy around the same object is already + # present in the list. + self._init_tracking() + assert self._wrapped_index is not None + try: + existing_proxy = self._wrapped_index[id(obj)] + except KeyError: + return proxy + + assert isinstance(existing_proxy, ProxyModel) + + if ( + existing_proxy.__linkprops__.__dict__ + != proxy.__linkprops__.__dict__ + ): + raise ValueError( + f"the list already contains {value!r} with " + f" a different set of link properties" + ) + # Return the already present identical proxy instead of inserting + # another one + return existing_proxy # type: ignore [return-value] + + def _find_proxied_obj(self, item: _PT_co | _BMT_co) -> _PT_co | None: + self._init_tracking() + assert self._wrapped_index is not None + + if isinstance(item, ProxyModel): + item = item._p__obj__ + + return self._wrapped_index.get(id(item), None) + + def clear(self) -> None: + super().clear() + self._wrapped_index = None + + if TYPE_CHECKING: + + def append(self, value: _PT_co | _BMT_co) -> None: ... + def insert( + self, index: SupportsIndex, value: _PT_co | _BMT_co + ) -> None: ... + def __setitem__( + self, + index: SupportsIndex | slice, + value: _PT_co | _BMT_co | Iterable[_PT_co | _BMT_co], + ) -> None: ... + def extend(self, values: Iterable[_PT_co | _BMT_co]) -> None: ... + def remove(self, value: _PT_co | _BMT_co) -> None: ... + def index( + self, + value: _PT_co | _BMT_co, + start: SupportsIndex = 0, + stop: SupportsIndex | None = None, + ) -> int: ... + def count(self, value: _PT_co | _BMT_co) -> int: ... + def __add__(self, other: Iterable[_PT_co | _BMT_co]) -> Self: ... + def __iadd__(self, other: Iterable[_PT_co | _BMT_co]) -> Self: ... diff --git a/gel/_internal/_save.py b/gel/_internal/_save.py index 5afd33198..f999d4fbd 100644 --- a/gel/_internal/_save.py +++ b/gel/_internal/_save.py @@ -28,7 +28,7 @@ ) from gel._internal._unsetid import UNSET_UUID from gel._internal._edgeql import PointerKind, quote_ident -from gel._internal._qbmodel._pydantic._fields import _UpcastingDistinctList +from gel._internal._qbmodel._pydantic._pdlist import ProxyDistinctList if TYPE_CHECKING: import uuid @@ -255,8 +255,8 @@ def is_link_list(val: object) -> TypeGuard[DistinctList[GelModel]]: def is_proxy_link_list( val: object, -) -> TypeGuard[_UpcastingDistinctList[ProxyModel[GelModel], GelModel]]: - return isinstance(val, _UpcastingDistinctList) +) -> TypeGuard[ProxyDistinctList[ProxyModel[GelModel], GelModel]]: + return isinstance(val, ProxyDistinctList) def unwrap_proxy(val: GelModel) -> GelModel: @@ -626,7 +626,7 @@ def make_plan(objs: Iterable[GelModel]) -> SavePlan: # # First, we iterate through the list of added new objects # to the list. All of them should be ProxyModels - # (as we use UpcastingDistinctList for links with props.) + # (as we use ProxyDistinctList for links with props.) # Our goal is to segregate different combinations of # set link properties into separate groups. link_tp = type(added_proxies[0]) diff --git a/tests/test_model_generator.py b/tests/test_model_generator.py index d5078de90..d186b673d 100644 --- a/tests/test_model_generator.py +++ b/tests/test_model_generator.py @@ -34,8 +34,8 @@ from gel._internal._qbmodel._pydantic._models import GelModel from gel._internal._dlist import DistinctList from gel._internal._edgeql import Cardinality, PointerKind -from gel._internal._qbmodel._pydantic._fields import ( - _UpcastingDistinctList, +from gel._internal._qbmodel._pydantic._pdlist import ( + ProxyDistinctList, ) @@ -116,18 +116,18 @@ def assert_pointers_match( f"type is {e.type!r}", ) - if issubclass(p.type, _UpcastingDistinctList): - if not issubclass(e.type, _UpcastingDistinctList): + if issubclass(p.type, ProxyDistinctList): + if not issubclass(e.type, ProxyDistinctList): self.fail( f"{obj.__name__}.{p.name} eq_type check " - f" failed: p.type is _UpcastingDistinctList, " + f" failed: p.type is ProxyDistinctList, " f"but expected type is {e.type!r}", ) else: - if issubclass(e.type, _UpcastingDistinctList): + if issubclass(e.type, ProxyDistinctList): self.fail( f"{obj.__name__}.{p.name} eq_type check failed: " - f"p.type is not a _UpcastingDistinctList, but " + f"p.type is not a ProxyDistinctList, but " f"expected type is {e.type!r}", ) From f7abff9feab31c9f95c28f3b3bad101ae705e24e Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Wed, 18 Jun 2025 11:16:53 -0700 Subject: [PATCH 03/14] Add client.__debug_save__() --- gel/_internal/_save.py | 30 ++++++++++++++++++++---------- gel/blocking_client.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/gel/_internal/_save.py b/gel/_internal/_save.py index f999d4fbd..0c3b73e2b 100644 --- a/gel/_internal/_save.py +++ b/gel/_internal/_save.py @@ -52,7 +52,7 @@ ] = weakref.WeakKeyDictionary() -_ll_getattr = object.__getattribute__ +ll_attr = object.__getattribute__ LinkPropertiesValues = TypeAliasType( @@ -263,13 +263,13 @@ def unwrap_proxy(val: GelModel) -> GelModel: if isinstance(val, ProxyModel): # This is perf-sensitive function as it's called on # every edge of the graph multiple times. - return _ll_getattr(val, "_p__obj__") # type: ignore [no-any-return] + return ll_attr(val, "_p__obj__") # type: ignore [no-any-return] else: return val def unwrap_proxy_no_check(val: ProxyModel[GelModel]) -> GelModel: - return _ll_getattr(val, "_p__obj__") # type: ignore [no-any-return] + return ll_attr(val, "_p__obj__") # type: ignore [no-any-return] def unwrap_dlist(val: Iterable[GelModel]) -> list[GelModel]: @@ -477,7 +477,9 @@ def make_plan(objs: Iterable[GelModel]) -> SavePlan: if prop.properties: assert isinstance(val, ProxyModel) link_prop_variant = bool( - val.__linkprops__.__gel_get_changed_fields__() + ll_attr( + val, "__linkprops__" + ).__gel_get_changed_fields__() ) if link_prop_variant: @@ -486,7 +488,9 @@ def make_plan(objs: Iterable[GelModel]) -> SavePlan: ptrs = get_pointers(val.__lprops__) props = { - p.name: getattr(val.__linkprops__, p.name, None) + p.name: getattr( + ll_attr(val, "__linkprops__"), p.name, None + ) for p in ptrs } @@ -600,7 +604,9 @@ def make_plan(objs: Iterable[GelModel]) -> SavePlan: assert isinstance(el, ProxyModel) if el in added_index: continue - if el.__linkprops__.__gel_get_changed_fields__(): + + lp = ll_attr(el, "__linkprops__") + if lp.__gel_get_changed_fields__(): added.append(el) # No adds or changes for this link? Continue to the next one. @@ -611,7 +617,7 @@ def make_plan(objs: Iterable[GelModel]) -> SavePlan: # Simple case -- no link props! if not prop.properties or all( - not link.__linkprops__.__gel_get_changed_fields__() + not ll_attr(link, "__linkprops__").__gel_get_changed_fields__() for link in added_proxies ): mch = MultiLinkAdd( @@ -639,7 +645,9 @@ def make_plan(objs: Iterable[GelModel]) -> SavePlan: added=unwrap_dlist(added), added_props=[ { - p.name: getattr(link.__linkprops__, p.name, None) + p.name: getattr( + ll_attr(link, "__linkprops__"), p.name, None + ) for p in props_info } for link in cast("list[ProxyModel[GelModel]]", added) @@ -832,7 +840,9 @@ def _traverse(obj: GelModel) -> None: if prop.cardinality.is_multi(): if is_proxy_link_list(linked): for proxy in linked: - proxy.__linkprops__.__gel_commit__() + ll_attr( + proxy, "__linkprops__" + ).__gel_commit__() _traverse(unwrap_proxy_no_check(proxy)) else: assert is_link_list(linked) @@ -841,7 +851,7 @@ def _traverse(obj: GelModel) -> None: linked.__gel_commit__() else: if isinstance(linked, ProxyModel): - linked.__linkprops__.__gel_commit__() + ll_attr(linked, "__linkprops__").__gel_commit__() _traverse(unwrap_proxy_no_check(linked)) else: _traverse(cast("GelModel", linked)) diff --git a/gel/blocking_client.py b/gel/blocking_client.py index 4f40cb2a8..24019afeb 100644 --- a/gel/blocking_client.py +++ b/gel/blocking_client.py @@ -22,6 +22,7 @@ from typing import Any import contextlib +import collections import datetime import queue import socket @@ -559,6 +560,35 @@ def save(self, *objs: GelModel) -> None: executor.commit() + def __debug_save__(self, *objs: GelModel) -> dict[str, float]: + timings = collections.defaultdict(float) + nqueries = collections.defaultdict(int) + + ns = time.monotonic_ns() + make_executor = make_save_executor_constructor(objs) + timings["__create_plan__"] = time.monotonic_ns() - ns + nqueries["__create_plan__"] = 1 + + for tx in self._batch(): + with tx: + executor = make_executor() + + for batches in executor: + for batch in batches: + ns = time.monotonic_ns() + tx.send_query(batch.query, batch.args) + batch_ids = tx.wait() + timings[batch.query] += time.monotonic_ns() - ns + nqueries[batch.query] += 1 + batch.feed_ids(batch_ids[0]) + + executor.commit() + + return { + k: (v / 1_000_000.0, nqueries[k]) + for k, v in sorted(timings.items(), key=lambda kv: kv[1]) + } + def _query(self, query_context: abstract.QueryContext): return iter_coroutine(super()._query(query_context)) From 4bea547dd240eb4ef63f4d6a3dcbeacef261274b Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Wed, 18 Jun 2025 13:31:38 -0700 Subject: [PATCH 04/14] save(): Avoid nesting tuples when necessary --- gel/_internal/_save.py | 158 +++++++++++++++++++++++------------------ 1 file changed, 90 insertions(+), 68 deletions(-) diff --git a/gel/_internal/_save.py b/gel/_internal/_save.py index 0c3b73e2b..399e07046 100644 --- a/gel/_internal/_save.py +++ b/gel/_internal/_save.py @@ -98,14 +98,20 @@ def __post_init__(self) -> None: @_struct class MultiLinkAdd(BaseFieldChange): - added: Iterable[GelModel] + added: list[GelModel] - added_props: Iterable[LinkPropertiesValues] | None = None + added_props: list[LinkPropertiesValues] | None = None props_info: dict[str, GelPointerReflection] | None = None def __post_init__(self) -> None: assert not any(isinstance(o, ProxyModel) for o in self.added) assert self.info.cardinality.is_multi() + assert self.added_props is None or ( + self.props_info is not None + and len(self.added_props) > 0 + and len(self.added) == len(self.added_props) + and next(iter(self.added_props)).keys() == self.props_info.keys() + ) @_struct @@ -879,6 +885,36 @@ def _compile_change( args: list[object] = [] args_types: list[str] = [] + def arg_cast( + type_ql: str, + ) -> tuple[str, Callable[[str], str], Callable[[object], object]]: + # As a workaround for the current limitation + # of EdgeQL (tuple arguments can't have empty + # sets as elements, and free objects input + # hasn't yet landed), we represent empty set + # as an empty array, and non-empty values as + # an array of one element. More specifically, + # if a link property has type ``, then + # its value will be represented here as + # `>`. When the value is empty + # set, the argument will be an empty array, + # when it's non-empty, it will be a one-element + # array with a one-element tuple. Then to unpack: + # + # @prop := (array_unpack(val).0 limit 1) + # + # The nested tuple indirection is needed to support + # link props that have types of arrays. + if type_ql.startswith("array<"): + cast = f"array>" + ret = lambda x: f"(select array_unpack({x}).0 limit 1)" # noqa: E731 + arg_pack = lambda x: [(x,)] if x is not None else [] # noqa: E731 + else: + cast = f"array<{type_ql}>" + ret = lambda x: f"(select array_unpack({x}) limit 1)" # noqa: E731 + arg_pack = lambda x: [x] if x is not None else [] # noqa: E731 + return cast, ret, arg_pack + def add_arg( type_ql: str, value: Any, @@ -894,14 +930,13 @@ def add_arg( for ch in change.fields.values(): if isinstance(ch, PropertyChange): if ch.info.cardinality.is_optional(): + tp_ql, tp_unpack, arg_pack = arg_cast(ch.info.typexpr) arg = add_arg( - f"array>", - [(ch.value,)] if ch.value is not None else [], + tp_ql, + arg_pack(ch.value), ) shape_parts.append( - f"{quote_ident(ch.name)} := (" - f" select array_unpack({arg}).0 limit 1" - f")" + f"{quote_ident(ch.name)} := {tp_unpack(arg)}" ) else: assert ch.value is not None @@ -912,24 +947,36 @@ def add_arg( # Since we're passing a set of values packed into an array # we need to wrap elements in tuples to allow multi props # or arrays etc. - arg_t = f"array>" assign_op = ":=" if for_insert else "+=" - - arg = add_arg(arg_t, [(el,) for el in ch.added]) - shape_parts.append( - f"{quote_ident(ch.name)} {assign_op} array_unpack({arg}).0" - ) + if ch.info.typexpr.startswith("array<"): + arg_t = f"array>" + arg = add_arg(arg_t, [(el,) for el in ch.added]) + shape_parts.append( + f"{quote_ident(ch.name)} {assign_op} array_unpack({arg}).0" + ) + else: + arg_t = f"array<{ch.info.typexpr}>" + arg = add_arg(arg_t, ch.added) + shape_parts.append( + f"{quote_ident(ch.name)} {assign_op} array_unpack({arg})" + ) elif isinstance(ch, MultiPropRemove): - arg_t = f"array>" - assert not for_insert - arg = add_arg(arg_t, [(el,) for el in ch.removed]) - shape_parts.append( - f"{quote_ident(ch.name)} -= array_unpack({arg}).0" - ) + if ch.info.typexpr.startswith("array<"): + arg_t = f"array>" + arg = add_arg(arg_t, [(el,) for el in ch.removed]) + shape_parts.append( + f"{quote_ident(ch.name)} -= array_unpack({arg}).0" + ) + else: + arg_t = f"array<{ch.info.typexpr}>" + arg = add_arg(arg_t, ch.removed) + shape_parts.append( + f"{quote_ident(ch.name)} -= array_unpack({arg})" + ) elif isinstance(ch, SingleLinkChange): tid = ( @@ -941,17 +988,16 @@ def add_arg( assert ch.props_info is not None assert tid is not None - sl_subt = [ - f"array>" + arg_casts = { + k: arg_cast(ch.props_info[k].typexpr) for k in ch.props_info - ] + } + + sl_subt = [arg_casts[k][0] for k in ch.props_info] sl_args = [ tid, - *( - [] if ch.props[k] is None else [(ch.props[k],)] - for k in ch.props_info - ), + *(arg_casts[k][2](ch.props[k]) for k in ch.props_info), ] arg = add_arg( @@ -960,10 +1006,9 @@ def add_arg( ) subq_shape = [ - f"@{quote_ident(pname)} := (" - f" select array_unpack({arg}.{i}).0 limit 1" - f")" - for i, pname in enumerate(ch.props.keys(), 1) + f"@{quote_ident(pname)} := " + f"{arg_casts[pname][1](f'{arg}.{i}')}" + for i, pname in enumerate(ch.props_info, 1) ] shape_parts.append( @@ -976,13 +1021,13 @@ def add_arg( else: if ch.info.cardinality.is_optional(): arg = add_arg( - "array>", - [(tid,)] if tid is not None else [], + "array", + [tid] if tid is not None else [], ) shape_parts.append( f"{quote_ident(ch.name)} := " f"<{linked_name}>(" - f" select array_unpack({arg}).0 limit 1" + f" select array_unpack({arg}) limit 1" f")" ) else: @@ -1008,62 +1053,39 @@ def add_arg( elif isinstance(ch, MultiLinkAdd): if ch.added_props: - assert ch.props_info is not None + assert ch.props_info link_args: list[tuple[Any, ...]] = [] - prop_order = None tuple_subt: list[str] | None = None + arg_casts = { + k: arg_cast(ch.props_info[k].typexpr) + for k in ch.props_info + } + + tuple_subt = [arg_casts[k][0] for k in ch.props_info] + for addo, addp in zip( ch.added, ch.added_props, strict=True ): - if prop_order is None: - prop_order = addp.keys() - # As a workaround for the current limitation - # of EdgeQL (tuple arguments can't have empty - # sets as elements, and free objects input - # hasn't yet landed), we represent empty set - # as an empty array, and non-empty values as - # an array of one element. More specifically, - # if a link property has type ``, then - # its value will be represented here as - # `>`. When the value is empty - # set, the argument will be an empty array, - # when it's non-empty, it will be a one-element - # array with a one-element tuple. Then to unpack: - # - # @prop := (array_unpack(val).0 limit 1) - # - # The nested tuple indirection is needed to support - # link props that have types of arrays. - tuple_subt = [ - f"array>" - for k in prop_order - ] - - assert prop_order == addp.keys() - link_args.append( ( self._get_id(addo), *( - [] if addp[k] is None else [(addp[k],)] - for k in prop_order + arg_casts[k][2](addp[k]) + for k in ch.props_info ), ) ) - assert prop_order - assert tuple_subt - arg = add_arg( f"array>", link_args, ) lp_assign = ", ".join( - f"@{p} := (select array_unpack(tup.{i + 1}).0 limit 1)" - for i, p in enumerate(prop_order) + f"@{p} := {arg_casts[p][1](f'tup.{i + 1}')}" + for i, p in enumerate(ch.props_info) ) assign_op = ":=" if for_insert else "+=" From 7fb6b778f42cb027be746bd266693656559aa90c Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Wed, 18 Jun 2025 15:07:54 -0700 Subject: [PATCH 05/14] Add more debug data capture to __debug_save__ --- gel/_internal/_save.py | 15 ++++++++-- gel/blocking_client.py | 63 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 63 insertions(+), 15 deletions(-) diff --git a/gel/_internal/_save.py b/gel/_internal/_save.py index 399e07046..f11e853c6 100644 --- a/gel/_internal/_save.py +++ b/gel/_internal/_save.py @@ -742,6 +742,7 @@ def make_save_executor_constructor( class QueryBatch: executor: SaveExecutor query: str + args_query: str args: list[tuple[object, ...]] changes: list[ModelChange] insert: bool @@ -757,6 +758,7 @@ def feed_ids(self, obj_ids: Iterable[uuid.UUID]) -> None: class CompiledQuery: single_query: str multi_query: str + args_query: str arg: tuple[object, ...] change: ModelChange @@ -799,6 +801,7 @@ def _compile_batch( QueryBatch( executor=self, query=lqs[0].multi_query, + args_query=lqs[0].args_query, args=[lq.arg for lq in lqs], changes=[lq.change for lq in lqs], insert=for_insert, @@ -1084,7 +1087,7 @@ def add_arg( ) lp_assign = ", ".join( - f"@{p} := {arg_casts[p][1](f'tup.{i + 1}')}" + f"@{p} := {arg_casts[p][1](f'__tup.{i + 1}')}" for i, p in enumerate(ch.props_info) ) @@ -1093,8 +1096,8 @@ def add_arg( shape_parts.append( f"{quote_ident(ch.name)} {assign_op} " f"assert_distinct((" - f"for tup in array_unpack({arg}) union (" - f"select (<{ch.info.typexpr}>tup.0) {{ " + f"for __tup in array_unpack({arg}) union (" + f"select (<{ch.info.typexpr}>__tup.0) {{ " f"{lp_assign}" f"}})))" ) @@ -1146,9 +1149,15 @@ def add_arg( ) select __query.id """ + args_query = f""" + with __all_data := >>$0 + select count(array_unpack(__all_data)) + """ + return CompiledQuery( single_query=single_query, multi_query=multi_query, + args_query=args_query, arg=tuple(args), change=change, ) diff --git a/gel/blocking_client.py b/gel/blocking_client.py index 24019afeb..2ec04b9c0 100644 --- a/gel/blocking_client.py +++ b/gel/blocking_client.py @@ -23,6 +23,7 @@ import contextlib import collections +import dataclasses import datetime import queue import socket @@ -52,6 +53,24 @@ T = typing.TypeVar("T") +@dataclasses.dataclass +class SaveDebug: + queries: list[SaveQueryDebug] + plan_time: float + + +@dataclasses.dataclass +class SaveQueryDebug: + query: str = "" + max_args_number: int = 0 + total_execs: int = 0 + total_exec_time: float = 0 + analyze: str = "" + analyze_args: object = None + args_query: str = "" + args_analyze: object = None + + def iter_coroutine(coro: typing.Coroutine[None, None, T]) -> T: try: coro.send(None) @@ -560,14 +579,12 @@ def save(self, *objs: GelModel) -> None: executor.commit() - def __debug_save__(self, *objs: GelModel) -> dict[str, float]: - timings = collections.defaultdict(float) - nqueries = collections.defaultdict(int) - + def __debug_save__(self, *objs: GelModel) -> SaveDebug: ns = time.monotonic_ns() make_executor = make_save_executor_constructor(objs) - timings["__create_plan__"] = time.monotonic_ns() - ns - nqueries["__create_plan__"] = 1 + plan_time = time.monotonic_ns() - ns + + queries = collections.defaultdict(SaveQueryDebug) for tx in self._batch(): with tx: @@ -575,19 +592,41 @@ def __debug_save__(self, *objs: GelModel) -> dict[str, float]: for batches in executor: for batch in batches: + qdebug = queries[batch.query] + + qdebug.total_execs += 1 + qdebug.query = batch.query + qdebug.args_query = batch.args_query + + if not qdebug.analyze: + tx.send_query(f"ANALYZE {batch.query}", batch.args) + qdebug.analyze = tx.wait()[0][0] + tx.send_query( + f"ANALYZE {batch.args_query}", batch.args + ) + qdebug.args_analyze = tx.wait()[0][0] + qdebug.analyze_args = batch.args + ns = time.monotonic_ns() tx.send_query(batch.query, batch.args) batch_ids = tx.wait() - timings[batch.query] += time.monotonic_ns() - ns - nqueries[batch.query] += 1 + qdebug.total_exec_time += time.monotonic_ns() - ns + + qdebug.max_args_number = max( + qdebug.max_args_number, len(batch.args) + ) + batch.feed_ids(batch_ids[0]) executor.commit() - return { - k: (v / 1_000_000.0, nqueries[k]) - for k, v in sorted(timings.items(), key=lambda kv: kv[1]) - } + for qdebug in queries.values(): + qdebug.total_exec_time /= 1_000_000.0 + + return SaveDebug( + queries=sorted(queries.values(), key=lambda q: q.total_exec_time), + plan_time=plan_time / 1_000_000.0, + ) def _query(self, query_context: abstract.QueryContext): return iter_coroutine(super()._query(query_context)) From c1e252f778cafdc4f1d0f10c19d8e0ed2506a5d6 Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Wed, 18 Jun 2025 16:57:42 -0700 Subject: [PATCH 06/14] Implement caching of "paths" Pydantic tends to touch class-level attributes on models, which are rather complex query builder objects in our case. Cache class-level attributes and forget. --- gel/_internal/_qbmodel/_abstract/_descriptors.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/gel/_internal/_qbmodel/_abstract/_descriptors.py b/gel/_internal/_qbmodel/_abstract/_descriptors.py index 66e336b9b..2b5191d74 100644 --- a/gel/_internal/_qbmodel/_abstract/_descriptors.py +++ b/gel/_internal/_qbmodel/_abstract/_descriptors.py @@ -155,7 +155,16 @@ def __get__( raise AttributeError(f"{self.__gel_name__!r} is not set") else: assert owner is not None - return self.get(owner) + cache_attr = f"__cached_path_{self.__gel_name__}" + + try: + return object.__getattribute__(owner, cache_attr) + except AttributeError: + pass + + path = self.get(owner) + setattr(owner, cache_attr, path) + return path def field_descriptor( From 0086353a5556dc300f8d0ddb5bff878ad52e29b1 Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Wed, 18 Jun 2025 16:59:03 -0700 Subject: [PATCH 07/14] Fix signature of ProxyModel.__init__() ProxyModel's __init__ should just be inherited from the type it wraps. Fix that, adjust the implementation of .link(), fix pydantic to use `proxy.link(object)` instead of `proxy(object)`. --- gel/_internal/_codegen/_models/_pydantic.py | 86 +++++++++------------ gel/_internal/_qbmodel/_pydantic/_fields.py | 4 +- gel/_internal/_qbmodel/_pydantic/_models.py | 76 +++++++++++------- tests/test_model_generator.py | 10 ++- 4 files changed, 97 insertions(+), 79 deletions(-) diff --git a/gel/_internal/_codegen/_models/_pydantic.py b/gel/_internal/_codegen/_models/_pydantic.py index 644ff27f6..327efaf09 100644 --- a/gel/_internal/_codegen/_models/_pydantic.py +++ b/gel/_internal/_codegen/_models/_pydantic.py @@ -2826,6 +2826,32 @@ def _write_object_type_qb_methods( self.write("...") self.write() + def _generate_init_args(self, objtype: reflection.ObjectType) -> list[str]: + init_pointers = _filter_pointers( + self._get_pointer_origins(objtype), + filters=[ + lambda ptr, _: ( + (ptr.name != "id" and not ptr.is_computed) + or objtype.name.startswith("schema::") + ), + ], + exclude_id=False, + ) + init_args: list[str] = [] + if init_pointers: + init_args.extend(["/", "*"]) + for ptr, org_objtype in init_pointers: + ptr_t = self.get_ptr_type( + org_objtype, + ptr, + style="arg", + prefer_broad_target_type=True, + consider_default=True, + ) + init_args.append(f"{ptr.name}: {ptr_t}") + + return init_args + def _write_base_object_type_body( self, objtype: reflection.ObjectType, @@ -2860,28 +2886,7 @@ def _write_base_object_type_body( ) self.write() - init_pointers = _filter_pointers( - self._get_pointer_origins(objtype), - filters=[ - lambda ptr, obj: ( - (ptr.name != "id" and not ptr.is_computed) - or objtype.name.startswith("schema::") - ), - ], - exclude_id=False, - ) - init_args = [] - if init_pointers: - init_args.extend(["/", "*"]) - for ptr, org_objtype in init_pointers: - ptr_t = self.get_ptr_type( - org_objtype, - ptr, - style="arg", - prefer_broad_target_type=True, - consider_default=True, - ) - init_args.append(f"{ptr.name}: {ptr_t}") + init_args = self._generate_init_args(objtype) with self.type_checking(): with self._method_def("__init__", init_args): @@ -3004,6 +3009,7 @@ def _write_object_type_link_variant( ptr = pointer pname = ptr.name target_type = self._types[ptr.target_id] + assert isinstance(target_type, reflection.ObjectType) import_time = ( ImportTime.typecheck if is_forward_decl @@ -3086,40 +3092,20 @@ def _write_object_type_link_variant( self.write("__linkprops__: __lprops__") self.write() if is_forward_decl: - args = [f"obj: {target}", "/", "*", *lprops] - with self._method_def("__init__", args): + init_args = self._generate_init_args(target_type) + with self._method_def("__init__", init_args): self.write("...") - else: - # It's important to just forward '**link_props' to - # the constructor of `__lprops__` -- otherwise pydantic's - # tracking and our's own tracking would assume that argument - # defaults (Nones in this case) were explicitly set by the user - # leading to inefficient queries in save(). - args = ["obj", "/", "**link_props"] - with self._method_def("__init__", args): - obj = self.import_name("builtins", "object") - self.write(f"{proxymodel_t}.__init__(self, obj)") - self.write( - "lprops = self.__class__.__lprops__(**link_props)" - ) - self.write( - f'{obj}.__setattr__(self, "__linkprops__", lprops)' - ) self.write() if is_forward_decl: args = [f"obj: {target}", "/", "*", *lprops] - with self._classmethod_def("link", args, self_t): + with self._classmethod_def( + "link", + args, + self_t, + line_comment="type: ignore[override]", + ): self.write("...") - else: - args = ["obj", "/", "**link_props"] - with self._classmethod_def("link", args, self_t): - self.write( - self.format_list( - "return cls({list})", - ["obj", "**link_props"], - ), - ) self.write() diff --git a/gel/_internal/_qbmodel/_pydantic/_fields.py b/gel/_internal/_qbmodel/_pydantic/_fields.py index 248a2b343..f94699d20 100644 --- a/gel/_internal/_qbmodel/_pydantic/_fields.py +++ b/gel/_internal/_qbmodel/_pydantic/_fields.py @@ -401,7 +401,7 @@ def _validate( if value is None or isinstance(value, mt): return value elif isinstance(value, bmt): - return mt(value) # type: ignore [no-any-return] + return mt.link(value) # type: ignore [no-any-return] else: raise TypeError( f"could not convert {type(value)} to {mt.__name__}" @@ -437,7 +437,7 @@ def _validate( if isinstance(value, mt): return value # type: ignore [no-any-return] elif isinstance(value, bmt): - return mt(value) # type: ignore [no-any-return] + return mt.link(value) # type: ignore [no-any-return] else: raise TypeError( f"could not convert {type(value)} to {mt.__name__}" diff --git a/gel/_internal/_qbmodel/_pydantic/_models.py b/gel/_internal/_qbmodel/_pydantic/_models.py index 4895e05d3..bf3a38f26 100644 --- a/gel/_internal/_qbmodel/_pydantic/_models.py +++ b/gel/_internal/_qbmodel/_pydantic/_models.py @@ -492,7 +492,7 @@ class GelLinkModel(GelSourceModel): class ProxyModel(GelModel, Generic[_MT_co]): - __slots__ = ("_p__obj__",) + __slots__ = ("__linkprops__", "_p__obj__") __gel_proxied_dunders__: ClassVar[frozenset[str]] = frozenset( { @@ -507,33 +507,52 @@ class ProxyModel(GelModel, Generic[_MT_co]): __linkprops__: GelLinkModel __lprops__: ClassVar[type[GelLinkModel]] - def __init__(self, obj: _MT_co, /) -> None: - if isinstance(obj, ProxyModel): - raise TypeError( - f"ProxyModel {type(self).__qualname__} cannot wrap " - f"another ProxyModel {type(obj).__qualname__}" - ) - if not isinstance(obj, self.__proxy_of__): - # A long time of debugging revealed that it's very important to - # check `obj` being of a correct type. Pydantic can instantiate - # a ProxyModel with an incorrect type, e.g. when you pass - # a list like `[1]` into a MultiLinkWithProps field -- - # Pydantic will try to wrap `[1]` into a list of ProxyModels. - # And when it eventually fails to do so, everything is broken, - # even error reporting and repr(). - # - # Codegen'ed ProxyModel subclasses explicitly call - # ProxyModel.__init__() in their __init__() methods to - # make sure that this check is always performed. - # - # If it ever has to be removed, make sure to at least check - # that `obj` is an instance of `GelModel`. - raise ValueError( - f"only instances of {self.__proxy_of__.__name__} are allowed, " - f"got {type(obj).__name__}", - ) + def __init__(self, /, **kwargs: Any) -> None: + # We want ProxyModel to be a trasparent wrapper, so we + # forward the constructor arguments to the wrapped object. + wrapped = self.__proxy_of__(**kwargs) + ll_setattr(self, "_p__obj__", wrapped) + ll_setattr( + self, "__linkprops__", self.__lprops__.__gel_model_construct__({}) + ) + + @classmethod + def link(cls, obj: _MT_co, /, **link_props: Any) -> Self: # type: ignore [misc] + self = cls.__new__(cls) + + if type(obj) is not self.__proxy_of__: + if isinstance(obj, ProxyModel): + raise TypeError( + f"ProxyModel {type(self).__qualname__} cannot wrap " + f"another ProxyModel {type(obj).__qualname__}" + ) + if not isinstance(obj, self.__proxy_of__): + # A long time of debugging revealed that it's very important to + # check `obj` being of a correct type. Pydantic can instantiate + # a ProxyModel with an incorrect type, e.g. when you pass + # a list like `[1]` into a MultiLinkWithProps field -- + # Pydantic will try to wrap `[1]` into a list of ProxyModels. + # And when it eventually fails to do so, everything is broken, + # even error reporting and repr(). + # + # Codegen'ed ProxyModel subclasses explicitly call + # ProxyModel.__init__() in their __init__() methods to + # make sure that this check is always performed. + # + # If it ever has to be removed, make sure to at least check + # that `obj` is an instance of `GelModel`. + raise ValueError( + f"only instances of {self.__proxy_of__.__name__} " + f"are allowed, got {type(obj).__name__}", + ) + ll_setattr(self, "_p__obj__", obj) + lprops = cls.__lprops__(**link_props) + ll_setattr(self, "__linkprops__", lprops) + + return self + def __getattribute__(self, name: str) -> Any: if ( name == "_p__obj__" # noqa: PLR1714 @@ -557,6 +576,11 @@ def __getattribute__(self, name: str) -> Any: return super().__getattribute__(name) def __setattr__(self, name: str, value: Any) -> None: + if not name.startswith("_"): + base = ll_getattr(self, "_p__obj__") + setattr(base, name, value) + return + model_fields = type(self).__proxy_of__.model_fields if name in model_fields: # writing to a field: mutate the wrapped model diff --git a/tests/test_model_generator.py b/tests/test_model_generator.py index d186b673d..f96e46c5d 100644 --- a/tests/test_model_generator.py +++ b/tests/test_model_generator.py @@ -417,7 +417,7 @@ def test_modelgen_data_model_validation_1(self): with self.assertRaisesRegex( ValueError, r"(?s)only instances of User are allowed, got .*int" ): - default.GameSession.players(1) # type: ignore + default.GameSession.players.link(1) # type: ignore u = default.User(name="batman") p = default.Post(body="aaa", author=u) @@ -426,6 +426,14 @@ def test_modelgen_data_model_validation_1(self): ): default.GameSession(num=7, prayers=[p]) # type: ignore + gp = default.GameSession.players(name="johny") + self.assertIsInstance(gp, default.User) + self.assertIsInstance(gp, default.GameSession.players) + self.assertIsInstance(gp._p__obj__, default.User) + self.assertEqual(gp.name, "johny") + self.assertEqual(gp._p__obj__.name, "johny") + self.assertIsNotNone(gp.__linkprops__) + # Check that `groups` is not an allowed keyword-arg for `User.__init__` self.assertEqual( reveal_type(default.User), From bbe4c02e7111c00428be7e1864f2e6f90c66203e Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Wed, 18 Jun 2025 17:48:31 -0700 Subject: [PATCH 08/14] Speedup `ProxyModel.link` attribute resolution... ...and consequently `MyType.path.link()` calls. --- gel/_internal/_qb/_generics.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/gel/_internal/_qb/_generics.py b/gel/_internal/_qb/_generics.py index 0645e78bd..2f0206c23 100644 --- a/gel/_internal/_qb/_generics.py +++ b/gel/_internal/_qb/_generics.py @@ -16,6 +16,7 @@ import dataclasses import functools +import types from gel._internal import _edgeql from gel._internal import _typing_inspect @@ -163,6 +164,25 @@ def __repr__(self) -> str: return f"{_utils.type_repr(type(self))}[{origin}, {metadata}]" def __getattr__(self, attr: str) -> Any: + if attr == "link": + # `default.MyType.linkname.link()` is a very common operation: + # it can be called millions of times when loading data and + # the descriptor lookup with the subsequent `.get()` slow down + # instance creation ~40%. + # + # Given that "link" is a reserved EdgeQL keyword, we can just + # resolve it to `ProxyModel.link()` as fast as possible. + try: + method = type.__getattribute__(self.__gel_origin__, "link") + except AttributeError: + # Alright, let's try the descriptor lookup. + pass + else: + if isinstance(method, types.MethodType): + # Got it -- cache it so it's even faster next time. + object.__setattr__(self, "link", method) + return method + if ( not _utils.is_dunder(attr) or attr in self.__gel_proxied_dunders__ From e69af305430c1d9fd002f0238657a8c1855dd8f3 Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Wed, 18 Jun 2025 21:36:56 -0700 Subject: [PATCH 09/14] Optimize GelModel.__init__() -- 10% faster --- gel/_internal/_qbmodel/_pydantic/_models.py | 61 +++++++++++++++------ tests/test_model_generator.py | 6 +- 2 files changed, 49 insertions(+), 18 deletions(-) diff --git a/gel/_internal/_qbmodel/_pydantic/_models.py b/gel/_internal/_qbmodel/_pydantic/_models.py index bf3a38f26..bc62eb582 100644 --- a/gel/_internal/_qbmodel/_pydantic/_models.py +++ b/gel/_internal/_qbmodel/_pydantic/_models.py @@ -347,10 +347,41 @@ class GelModel( if TYPE_CHECKING: id: uuid.UUID + __gel_computed_fields__: frozenset[str] | None - def __init__(self, /, **kwargs: Any) -> None: + @classmethod + def __gel_gen_computed_fields__(cls) -> frozenset[str] | None: + cls.model_rebuild() + ret: set[str] = set() + for field_name, field in cls.__pydantic_fields__.items(): + # ignore `field.init=None` - unset, so we're fine with it. + if field.init is False: + ret.add(field_name) + ret.discard("id") + comp_fields = frozenset(ret) if ret else None + cls.__gel_computed_fields__ = comp_fields + return comp_fields + + def __init__( + self, + /, + *, + id: uuid.UUID = UNSET_UUID, # noqa: A002 + **kwargs: Any, + ) -> None: cls = type(self) + try: + comp_fields = type.__getattribute__(cls, "__gel_computed_fields__") + except AttributeError: + comp_fields = cls.__gel_gen_computed_fields__() + + if id is not UNSET_UUID: + raise ValueError( + "models do not support setting `id` on construction; " + "`id` is set automatically by the `client.save()` method" + ) + # Prohibit passing computed fields to the constructor. # Unfortunately `init=False` doesn't work with BaseModel # pydantic classes (the docs states it only works in @@ -358,16 +389,15 @@ def __init__(self, /, **kwargs: Any) -> None: # # We can potentially optimize this by caching a frozenset # of field names that are computed. - has_computed_fields = False - cls.model_rebuild() - for field_name, field in cls.__pydantic_fields__.items(): - # ignore `field.init=None` - unset, so we're fine with it. - if field.init is False: - has_computed_fields = True - if field_name in kwargs: - raise ValueError( - f"cannot set field {field_name!r} on {cls.__name__}" - ) + if comp_fields is not None and ( + comp_args := comp_fields & kwargs.keys() + ): + comp_arg = next(iter(comp_args)) + raise ValueError( + f"{cls.__qualname__} model does not accept {comp_arg!r} " + f"argument, it is a computed field " + f"(the database computes it for you)" + ) super().__init__(**kwargs) @@ -379,11 +409,10 @@ def __init__(self, /, **kwargs: Any) -> None: # want those None values to ever surface anywhere, be that # attribute access or serialization or anything else that # reads from __dict__. - if has_computed_fields: - for field_name, field in cls.__pydantic_fields__.items(): - # ignore `field.init=None` - unset, so we're fine with it. - if field.init is False and field_name != "id": - self.__dict__.pop(field_name, None) + if comp_fields is not None: + pop = self.__dict__.pop + for field_name in comp_fields: + pop(field_name, None) def __gel_is_new__(self) -> bool: return self.id is UNSET_UUID diff --git a/tests/test_model_generator.py b/tests/test_model_generator.py index f96e46c5d..6dbd3561b 100644 --- a/tests/test_model_generator.py +++ b/tests/test_model_generator.py @@ -490,13 +490,15 @@ def test_modelgen_data_model_validation_1(self): # Let's test computed link as an arg with self.assertRaisesRegex( - ValueError, r"(?s)cannot set field .groups. on User" + ValueError, + r"(?s)User model does not accept .groups.*computed field", ): default.User(name="aaaa", groups=(1, 2, 3)) # type: ignore # Let's test computed property as an arg with self.assertRaisesRegex( - ValueError, r"(?s)cannot set field .name_len. on User" + ValueError, + r"(?s)User model does not accept .name_len.*computed field", ): default.User(name="aaaa", name_len=123) # type: ignore From 49502ea2b3ff6c02690b6423c8bff91fbb5bc6dd Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Wed, 18 Jun 2025 21:43:22 -0700 Subject: [PATCH 10/14] Fix wrong condition in _MultiLinkWithProps.validate --- gel/_internal/_qbmodel/_pydantic/_fields.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gel/_internal/_qbmodel/_pydantic/_fields.py b/gel/_internal/_qbmodel/_pydantic/_fields.py index f94699d20..68bf5f6d8 100644 --- a/gel/_internal/_qbmodel/_pydantic/_fields.py +++ b/gel/_internal/_qbmodel/_pydantic/_fields.py @@ -643,7 +643,7 @@ def _validate( generic_args[1], # type: ignore [valid-type] ] - if type(lt) is list: # type: ignore [comparison-overlap] + if type(value) is list: # Optimization for the most common scenario - user passes # a list of objects to the constructor. return lt(value) From 03426ba78a5de9e30d93fce205413bd83ab8e34b Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Wed, 18 Jun 2025 21:53:09 -0700 Subject: [PATCH 11/14] Speedup __proxy_of__ attribute access on ProxyModel Makes ProxyModel.link() 50% faster --- gel/_internal/_qbmodel/_pydantic/_models.py | 30 ++++++++++++--------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/gel/_internal/_qbmodel/_pydantic/_models.py b/gel/_internal/_qbmodel/_pydantic/_models.py index bc62eb582..1758ffdf9 100644 --- a/gel/_internal/_qbmodel/_pydantic/_models.py +++ b/gel/_internal/_qbmodel/_pydantic/_models.py @@ -250,6 +250,7 @@ def _process_pydantic_fields( # Low-level attribute functions ll_setattr = object.__setattr__ ll_getattr = object.__getattribute__ +ll_type_getattr = type.__getattribute__ class GelSourceModel( @@ -547,15 +548,16 @@ def __init__(self, /, **kwargs: Any) -> None: @classmethod def link(cls, obj: _MT_co, /, **link_props: Any) -> Self: # type: ignore [misc] - self = cls.__new__(cls) + proxy_of = ll_type_getattr(cls, "__proxy_of__") + lprops_cls = ll_type_getattr(cls, "__lprops__") - if type(obj) is not self.__proxy_of__: + if type(obj) is not proxy_of: if isinstance(obj, ProxyModel): raise TypeError( - f"ProxyModel {type(self).__qualname__} cannot wrap " + f"ProxyModel {cls.__qualname__} cannot wrap " f"another ProxyModel {type(obj).__qualname__}" ) - if not isinstance(obj, self.__proxy_of__): + if not isinstance(obj, proxy_of): # A long time of debugging revealed that it's very important to # check `obj` being of a correct type. Pydantic can instantiate # a ProxyModel with an incorrect type, e.g. when you pass @@ -571,23 +573,27 @@ def link(cls, obj: _MT_co, /, **link_props: Any) -> Self: # type: ignore [misc] # If it ever has to be removed, make sure to at least check # that `obj` is an instance of `GelModel`. raise ValueError( - f"only instances of {self.__proxy_of__.__name__} " + f"only instances of {proxy_of.__name__} " f"are allowed, got {type(obj).__name__}", ) - ll_setattr(self, "_p__obj__", obj) + self = cls.__new__(cls) - lprops = cls.__lprops__(**link_props) + lprops = lprops_cls(**link_props) ll_setattr(self, "__linkprops__", lprops) + ll_setattr(self, "_p__obj__", obj) + return self def __getattribute__(self, name: str) -> Any: - if ( - name == "_p__obj__" # noqa: PLR1714 - or name == "__linkprops__" - or name == "__class__" - ): + if name in { + "_p__obj__", + "__linkprops__", + "__proxy_of__", + "__class__", + "__lprops__", + }: # Fast path for the wrapped object itself / linkprops model # (this optimization is informed by profiling model # instantiation and save() operation) From f996564c6739d45d2bfae40b6c9111d8eeb25b6e Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Wed, 18 Jun 2025 22:33:50 -0700 Subject: [PATCH 12/14] Make proxy dlist 15% faster Apparently extending an empty list is much slower than throwing it away and constructing a new one. --- gel/_internal/_qbmodel/_pydantic/_pdlist.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/gel/_internal/_qbmodel/_pydantic/_pdlist.py b/gel/_internal/_qbmodel/_pydantic/_pdlist.py index 1ed8cb21d..b3536e9b9 100644 --- a/gel/_internal/_qbmodel/_pydantic/_pdlist.py +++ b/gel/_internal/_qbmodel/_pydantic/_pdlist.py @@ -85,6 +85,7 @@ def extend(self, values: Iterable[_PT_co | _BMT_co]) -> None: cls = type(self) t = cls.type + proxy_of = t.__proxy_of__ assert self._wrapped_index is not None assert self._set is not None @@ -92,11 +93,11 @@ def extend(self, values: Iterable[_PT_co | _BMT_co]) -> None: # For an empty list we can call one extend() call instead # of slow iterative appends. - fast_extend = len(self._wrapped_index) == 0 + empty_items = len(self._wrapped_index) == len(self._items) == 0 for v in values: tv = type(v) - if tv is t.__proxy_of__: + if tv is proxy_of: # Fast path -- `v` is an instance of the base type. # It has no link props, wrap it in a proxy in # a fast way. @@ -128,11 +129,12 @@ def extend(self, values: Iterable[_PT_co | _BMT_co]) -> None: else: self._set.add(proxy) - if not fast_extend: + if not empty_items: self._items.append(proxy) - if fast_extend: - self._items.extend(self._wrapped_index.values()) + if empty_items: + # A LOT faster than `extend()` ¯\_(ツ)_/¯ + self._items = list(self._wrapped_index.values()) def _cast_value(self, value: Any) -> tuple[_PT_co, _BMT_co]: cls = type(self) From 6ac0b26467d920a722ffd32a1087c1a3e4362997 Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Wed, 18 Jun 2025 23:23:16 -0700 Subject: [PATCH 13/14] wip --- gel/_internal/_save.py | 128 ++++++++++++++++++++++++++++------------- gel/blocking_client.py | 6 +- 2 files changed, 92 insertions(+), 42 deletions(-) diff --git a/gel/_internal/_save.py b/gel/_internal/_save.py index f11e853c6..f3dda89b0 100644 --- a/gel/_internal/_save.py +++ b/gel/_internal/_save.py @@ -3,6 +3,7 @@ import collections import dataclasses import weakref +import uuid from typing import ( TYPE_CHECKING, @@ -62,6 +63,8 @@ _dataclass = dataclasses.dataclass(frozen=True, kw_only=True) +_ZERO_UUID = uuid.UUID(int=0) + @dataclass_transform( frozen_default=True, @@ -754,6 +757,14 @@ def feed_ids(self, obj_ids: Iterable[uuid.UUID]) -> None: self.executor.object_ids[id(change.model)] = obj_id +@_struct +class ArgCast: + cast: str + ret: Callable[[str], str] + arg_pack: Callable[[object], object] + unnest: bool + + @_struct class CompiledQuery: single_query: str @@ -880,34 +891,57 @@ def _get_id(self, obj: GelModel) -> uuid.UUID: else: return self.object_ids[id(obj)] - def _compile_change( - self, change: ModelChange, /, *, for_insert: bool - ) -> CompiledQuery: - shape_parts: list[str] = [] - - args: list[object] = [] - args_types: list[str] = [] - - def arg_cast( - type_ql: str, - ) -> tuple[str, Callable[[str], str], Callable[[object], object]]: - # As a workaround for the current limitation - # of EdgeQL (tuple arguments can't have empty - # sets as elements, and free objects input - # hasn't yet landed), we represent empty set - # as an empty array, and non-empty values as - # an array of one element. More specifically, - # if a link property has type ``, then - # its value will be represented here as - # `>`. When the value is empty - # set, the argument will be an empty array, - # when it's non-empty, it will be a one-element - # array with a one-element tuple. Then to unpack: - # - # @prop := (array_unpack(val).0 limit 1) - # - # The nested tuple indirection is needed to support - # link props that have types of arrays. + def _default_for(self, type_ql: str) -> object: + match type_ql: + case ( + "std::int64" + | "std::int32" + | "std::int16" + | "std::float64" + | "std::float32" + | "std::bigint" + ): + return 0 + case a if a.startswith("array<"): + return [] + case "std::str" | "std::json": + return "" + case "std::bytes": + return b"" + case "std::bool": + return False + case "std::uuid": + return _ZERO_UUID + return None + + def _arg_cast(self, type_ql: str) -> ArgCast: + # As a workaround for the current limitation + # of EdgeQL (tuple arguments can't have empty + # sets as elements, and free objects input + # hasn't yet landed), we represent empty set + # as an empty array, and non-empty values as + # an array of one element. More specifically, + # if a link property has type ``, then + # its value will be represented here as + # `>`. When the value is empty + # set, the argument will be an empty array, + # when it's non-empty, it will be a one-element + # array with a one-element tuple. Then to unpack: + # + # @prop := (array_unpack(val).0 limit 1) + # + # The nested tuple indirection is needed to support + # link props that have types of arrays. + dfl = self._default_for(type_ql) + unnest = bool(dfl) + arg_pack: Callable[[object], object] + if dfl is not None: + cast = f"tuple" + ret = lambda x: f"({x}.1 if {x}.0 else <{type_ql}>{{}})" # noqa: E731 + arg_pack = ( # noqa: E731 + lambda x: (True, x) if x is not None else (False, dfl) + ) + else: if type_ql.startswith("array<"): cast = f"array>" ret = lambda x: f"(select array_unpack({x}).0 limit 1)" # noqa: E731 @@ -916,7 +950,16 @@ def arg_cast( cast = f"array<{type_ql}>" ret = lambda x: f"(select array_unpack({x}) limit 1)" # noqa: E731 arg_pack = lambda x: [x] if x is not None else [] # noqa: E731 - return cast, ret, arg_pack + + return ArgCast(cast=cast, ret=ret, arg_pack=arg_pack, unnest=unnest) + + def _compile_change( + self, change: ModelChange, /, *, for_insert: bool + ) -> CompiledQuery: + shape_parts: list[str] = [] + + args: list[object] = [] + args_types: list[str] = [] def add_arg( type_ql: str, @@ -933,13 +976,13 @@ def add_arg( for ch in change.fields.values(): if isinstance(ch, PropertyChange): if ch.info.cardinality.is_optional(): - tp_ql, tp_unpack, arg_pack = arg_cast(ch.info.typexpr) + ac = self._arg_cast(ch.info.typexpr) arg = add_arg( - tp_ql, - arg_pack(ch.value), + ac.cast, + ac.arg_pack(ch.value), ) shape_parts.append( - f"{quote_ident(ch.name)} := {tp_unpack(arg)}" + f"{quote_ident(ch.name)} := {ac.ret(arg)}" ) else: assert ch.value is not None @@ -992,15 +1035,18 @@ def add_arg( assert tid is not None arg_casts = { - k: arg_cast(ch.props_info[k].typexpr) + k: self._arg_cast(ch.props_info[k].typexpr) for k in ch.props_info } - sl_subt = [arg_casts[k][0] for k in ch.props_info] + sl_subt = [arg_casts[k].cast for k in ch.props_info] sl_args = [ tid, - *(arg_casts[k][2](ch.props[k]) for k in ch.props_info), + *( + arg_casts[k].arg_pack(ch.props[k]) + for k in ch.props_info + ), ] arg = add_arg( @@ -1010,7 +1056,7 @@ def add_arg( subq_shape = [ f"@{quote_ident(pname)} := " - f"{arg_casts[pname][1](f'{arg}.{i}')}" + f"{arg_casts[pname].ret(f'{arg}.{i}')}" for i, pname in enumerate(ch.props_info, 1) ] @@ -1062,11 +1108,11 @@ def add_arg( tuple_subt: list[str] | None = None arg_casts = { - k: arg_cast(ch.props_info[k].typexpr) + k: self._arg_cast(ch.props_info[k].typexpr) for k in ch.props_info } - tuple_subt = [arg_casts[k][0] for k in ch.props_info] + tuple_subt = [arg_casts[k].cast for k in ch.props_info] for addo, addp in zip( ch.added, ch.added_props, strict=True @@ -1075,7 +1121,7 @@ def add_arg( ( self._get_id(addo), *( - arg_casts[k][2](addp[k]) + arg_casts[k].arg_pack(addp[k]) for k in ch.props_info ), ) @@ -1087,7 +1133,7 @@ def add_arg( ) lp_assign = ", ".join( - f"@{p} := {arg_casts[p][1](f'__tup.{i + 1}')}" + f"@{p} := {arg_casts[p].ret(f'__tup.{i + 1}')}" for i, p in enumerate(ch.props_info) ) diff --git a/gel/blocking_client.py b/gel/blocking_client.py index 2ec04b9c0..49c707e7f 100644 --- a/gel/blocking_client.py +++ b/gel/blocking_client.py @@ -57,6 +57,7 @@ class SaveDebug: queries: list[SaveQueryDebug] plan_time: float + total_time: float @dataclasses.dataclass @@ -580,7 +581,7 @@ def save(self, *objs: GelModel) -> None: executor.commit() def __debug_save__(self, *objs: GelModel) -> SaveDebug: - ns = time.monotonic_ns() + started_at = ns = time.monotonic_ns() make_executor = make_save_executor_constructor(objs) plan_time = time.monotonic_ns() - ns @@ -620,12 +621,15 @@ def __debug_save__(self, *objs: GelModel) -> SaveDebug: executor.commit() + total_time = time.monotonic_ns() - started_at + for qdebug in queries.values(): qdebug.total_exec_time /= 1_000_000.0 return SaveDebug( queries=sorted(queries.values(), key=lambda q: q.total_exec_time), plan_time=plan_time / 1_000_000.0, + total_time=total_time / 1_000_000.0, ) def _query(self, query_context: abstract.QueryContext): From 5f3f9e70a1033039f541b33352c7e544645a7189 Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Thu, 19 Jun 2025 00:05:11 -0700 Subject: [PATCH 14/14] unnest kinda works --- gel/_internal/_save.py | 67 ++++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 22 deletions(-) diff --git a/gel/_internal/_save.py b/gel/_internal/_save.py index f3dda89b0..98e5338d9 100644 --- a/gel/_internal/_save.py +++ b/gel/_internal/_save.py @@ -761,6 +761,7 @@ def feed_ids(self, obj_ids: Iterable[uuid.UUID]) -> None: class ArgCast: cast: str ret: Callable[[str], str] + ret_unnested: Callable[[str, int], str] | None arg_pack: Callable[[object], object] unnest: bool @@ -914,7 +915,9 @@ def _default_for(self, type_ql: str) -> object: return _ZERO_UUID return None - def _arg_cast(self, type_ql: str) -> ArgCast: + def _arg_cast( + self, type_ql: str, *, allow_unnest: bool = False + ) -> ArgCast: # As a workaround for the current limitation # of EdgeQL (tuple arguments can't have empty # sets as elements, and free objects input @@ -933,11 +936,17 @@ def _arg_cast(self, type_ql: str) -> ArgCast: # The nested tuple indirection is needed to support # link props that have types of arrays. dfl = self._default_for(type_ql) - unnest = bool(dfl) + unnest = False arg_pack: Callable[[object], object] - if dfl is not None: - cast = f"tuple" + ret_unnested: Callable[[str, int], str] | None = None + if allow_unnest and dfl is not None: + unnest = True + cast = f"std::bool,{type_ql}" ret = lambda x: f"({x}.1 if {x}.0 else <{type_ql}>{{}})" # noqa: E731 + ret_unnested = ( # noqa: E731 + lambda x, + pos: f"( {x}.{pos + 1} if {x}.{pos} else <{type_ql}>{{}} )" + ) arg_pack = ( # noqa: E731 lambda x: (True, x) if x is not None else (False, dfl) ) @@ -951,7 +960,13 @@ def _arg_cast(self, type_ql: str) -> ArgCast: ret = lambda x: f"(select array_unpack({x}) limit 1)" # noqa: E731 arg_pack = lambda x: [x] if x is not None else [] # noqa: E731 - return ArgCast(cast=cast, ret=ret, arg_pack=arg_pack, unnest=unnest) + return ArgCast( + cast=cast, + ret=ret, + arg_pack=arg_pack, + unnest=unnest, + ret_unnested=ret_unnested, + ) def _compile_change( self, change: ModelChange, /, *, for_insert: bool @@ -1105,37 +1120,45 @@ def add_arg( assert ch.props_info link_args: list[tuple[Any, ...]] = [] - tuple_subt: list[str] | None = None arg_casts = { - k: self._arg_cast(ch.props_info[k].typexpr) + k: self._arg_cast( + ch.props_info[k].typexpr, allow_unnest=True + ) for k in ch.props_info } - tuple_subt = [arg_casts[k].cast for k in ch.props_info] - for addo, addp in zip( ch.added, ch.added_props, strict=True ): - link_args.append( - ( - self._get_id(addo), - *( - arg_casts[k].arg_pack(addp[k]) - for k in ch.props_info - ), - ) - ) + link_arg: list[Any] = [self._get_id(addo)] + for k, ac in arg_casts.items(): + if ac.unnest: + link_arg.extend(ac.arg_pack(addp[k])) + else: + link_arg.append(ac.arg_pack(addp[k])) + + link_args.append(tuple(link_arg)) + tuple_subt = [arg_casts[k].cast for k in ch.props_info] arg = add_arg( f"array>", link_args, ) - lp_assign = ", ".join( - f"@{p} := {arg_casts[p].ret(f'__tup.{i + 1}')}" - for i, p in enumerate(ch.props_info) - ) + ass: list[str] = [] + idx = 1 + for k, ac in arg_casts.items(): + if ac.unnest: + ass.append( + f"@{k} := {ac.ret_unnested('__tup', idx)}" + ) + idx += 2 + else: + ass.append(f"@{k} := {ac.ret(f'__tup.{idx}')}") + idx += 1 + + lp_assign = ", ".join(ass) assign_op = ":=" if for_insert else "+="