_pytree.py 53 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604
  1. """
  2. Contains utility functions for working with nested python data structures.
  3. A *pytree* is Python nested data structure. It is a tree in the sense that
  4. nodes are Python collections (e.g., list, tuple, dict) and the leaves are
  5. Python values. Furthermore, a pytree should not contain reference cycles.
  6. pytrees are useful for working with nested collections of Tensors. For example,
  7. one can use `tree_map` to map a function over all Tensors inside some nested
  8. collection of Tensors and `tree_leaves` to get a flat list of all Tensors
  9. inside some nested collection. pytrees are helpful for implementing nested
  10. collection support for PyTorch APIs.
  11. This pytree implementation is not very performant due to Python overhead
  12. To improve the performance we can move parts of the implementation to C++.
  13. """
  14. import dataclasses
  15. import functools
  16. import importlib
  17. import json
  18. import sys
  19. import threading
  20. import types
  21. import warnings
  22. from collections import defaultdict, deque, namedtuple, OrderedDict
  23. from typing import (
  24. Any,
  25. Callable,
  26. cast,
  27. DefaultDict,
  28. Deque,
  29. Dict,
  30. FrozenSet,
  31. Generic,
  32. Hashable,
  33. Iterable,
  34. List,
  35. Mapping,
  36. NamedTuple,
  37. Optional,
  38. OrderedDict as GenericOrderedDict,
  39. overload,
  40. Protocol,
  41. Sequence,
  42. Tuple,
  43. Type,
  44. TypeVar,
  45. Union,
  46. )
  47. from typing_extensions import deprecated
  48. __all__ = [
  49. "PyTree",
  50. "Context",
  51. "FlattenFunc",
  52. "UnflattenFunc",
  53. "DumpableContext",
  54. "ToDumpableContextFn",
  55. "FromDumpableContextFn",
  56. "TreeSpec",
  57. "LeafSpec",
  58. "keystr",
  59. "key_get",
  60. "register_pytree_node",
  61. "tree_flatten",
  62. "tree_flatten_with_path",
  63. "tree_unflatten",
  64. "tree_iter",
  65. "tree_leaves",
  66. "tree_leaves_with_path",
  67. "tree_structure",
  68. "tree_map",
  69. "tree_map_with_path",
  70. "tree_map_",
  71. "tree_map_only",
  72. "tree_map_only_",
  73. "tree_all",
  74. "tree_any",
  75. "tree_all_only",
  76. "tree_any_only",
  77. "treespec_dumps",
  78. "treespec_loads",
  79. "treespec_pprint",
  80. ]
  81. T = TypeVar("T")
  82. S = TypeVar("S")
  83. U = TypeVar("U")
  84. R = TypeVar("R")
  85. DEFAULT_TREESPEC_SERIALIZATION_PROTOCOL = 1
  86. NO_SERIALIZED_TYPE_NAME_FOUND = "NO_SERIALIZED_TYPE_NAME_FOUND"
  87. class KeyEntry(Protocol):
  88. def __hash__(self) -> int:
  89. ...
  90. def __eq__(self, other: object) -> bool:
  91. ...
  92. def __str__(self) -> str:
  93. ...
  94. def get(self, parent: Any) -> Any:
  95. ...
  96. Context = Any
  97. PyTree = Any
  98. FlattenFunc = Callable[[PyTree], Tuple[List[Any], Context]]
  99. UnflattenFunc = Callable[[Iterable[Any], Context], PyTree]
  100. DumpableContext = Any # Any json dumpable text
  101. ToDumpableContextFn = Callable[[Context], DumpableContext]
  102. FromDumpableContextFn = Callable[[DumpableContext], Context]
  103. ToStrFunc = Callable[["TreeSpec", List[str]], str]
  104. MaybeFromStrFunc = Callable[[str], Optional[Tuple[Any, Context, str]]]
  105. KeyPath = Tuple[KeyEntry, ...]
  106. FlattenWithKeysFunc = Callable[[PyTree], Tuple[List[Tuple[KeyEntry, Any]], Any]]
  107. # A NodeDef holds two callables:
  108. # - flatten_fn should take the collection and return a flat list of values.
  109. # It can also return some context that is used in reconstructing the
  110. # collection.
  111. # - unflatten_fn should take a flat list of values and some context
  112. # (returned by flatten_fn). It returns the collection by reconstructing
  113. # it from the list and the context.
  114. # - flatten_with_keys_fn, which is a callable that takes a
  115. # pytree and returns a list of (keypath, value) pairs and a context.
  116. class NodeDef(NamedTuple):
  117. type: Type[Any]
  118. flatten_fn: FlattenFunc
  119. unflatten_fn: UnflattenFunc
  120. flatten_with_keys_fn: Optional[FlattenWithKeysFunc]
  121. _NODE_REGISTRY_LOCK = threading.Lock()
  122. SUPPORTED_NODES: Dict[Type[Any], NodeDef] = {}
  123. # _SerializeNodeDef holds the following:
  124. # - typ: the type of the node (e.g., "Dict", "List", etc)
  125. # - serialized_type_name: the fully qualified name of the type, e.g. "collections.OrderedDict"
  126. # - to_dumpable_context takes a TreeSpec, and returns a serialized string format of the
  127. # context, and the version number
  128. # - from_dumpable_context takes in a string representation of the context, and the
  129. # version, and returns the deserialized context
  130. class _SerializeNodeDef(NamedTuple):
  131. typ: Type[Any]
  132. serialized_type_name: str
  133. to_dumpable_context: Optional[ToDumpableContextFn]
  134. from_dumpable_context: Optional[FromDumpableContextFn]
  135. SUPPORTED_SERIALIZED_TYPES: Dict[Type[Any], _SerializeNodeDef] = {}
  136. SERIALIZED_TYPE_TO_PYTHON_TYPE: Dict[str, Type[Any]] = {}
  137. def register_pytree_node(
  138. cls: Type[Any],
  139. flatten_fn: FlattenFunc,
  140. unflatten_fn: UnflattenFunc,
  141. *,
  142. serialized_type_name: Optional[str] = None,
  143. to_dumpable_context: Optional[ToDumpableContextFn] = None,
  144. from_dumpable_context: Optional[FromDumpableContextFn] = None,
  145. flatten_with_keys_fn: Optional[FlattenWithKeysFunc] = None,
  146. ) -> None:
  147. """Register a container-like type as pytree node.
  148. Args:
  149. cls: the type to register
  150. flatten_fn: A callable that takes a pytree and returns a flattened
  151. representation of the pytree and additional context to represent the
  152. flattened pytree.
  153. unflatten_fn: A callable that takes a flattened version of the pytree,
  154. additional context, and returns an unflattened pytree.
  155. serialized_type_name: A keyword argument used to specify the fully qualified
  156. name used when serializing the tree spec.
  157. to_dumpable_context: An optional keyword argument to custom specify how
  158. to convert the context of the pytree to a custom json dumpable
  159. representation. This is used for json serialization, which is being
  160. used in torch.export right now.
  161. from_dumpable_context: An optional keyword argument to custom specify how
  162. to convert the custom json dumpable representation of the context
  163. back to the original context. This is used for json deserialization,
  164. which is being used in torch.export right now.
  165. flatten_with_keys_fn: An optional keyword argument to specify how to
  166. access each pytree leaf's keypath when flattening and tree-mapping.
  167. Like ``flatten_fn``, but in place of a List[leaf], it should return
  168. a List[(keypath, leaf)].
  169. """
  170. with _NODE_REGISTRY_LOCK:
  171. if cls in SUPPORTED_NODES:
  172. raise ValueError(f"{cls} is already registered as pytree node.")
  173. _private_register_pytree_node(
  174. cls,
  175. flatten_fn,
  176. unflatten_fn,
  177. serialized_type_name=serialized_type_name,
  178. to_dumpable_context=to_dumpable_context,
  179. from_dumpable_context=from_dumpable_context,
  180. flatten_with_keys_fn=flatten_with_keys_fn,
  181. )
  182. try:
  183. from . import _cxx_pytree as cxx
  184. except ImportError:
  185. pass
  186. else:
  187. cxx._private_register_pytree_node(
  188. cls,
  189. flatten_fn,
  190. unflatten_fn,
  191. serialized_type_name=serialized_type_name,
  192. to_dumpable_context=to_dumpable_context,
  193. from_dumpable_context=from_dumpable_context,
  194. )
  195. def _register_namedtuple(
  196. cls: Type[Any],
  197. *,
  198. serialized_type_name: str,
  199. ) -> None:
  200. """
  201. Registers a namedtuple as a valid pytree node. By default namedtuples are
  202. valid pytree nodes, but they are not serializable. This API provides the
  203. argument `serialized_type_name` which allows these namedtuples to be
  204. serialized.
  205. Args:
  206. cls: the dataclass type to register
  207. serialized_type_name: The serialized name for the dataclass. This is
  208. required if you want to serialize the pytree TreeSpec containing this
  209. namedtuple.
  210. """
  211. _private_register_pytree_node(
  212. cls,
  213. _namedtuple_flatten,
  214. _namedtuple_unflatten,
  215. serialized_type_name=serialized_type_name,
  216. to_dumpable_context=_namedtuple_serialize,
  217. from_dumpable_context=_namedtuple_deserialize,
  218. flatten_with_keys_fn=_namedtuple_flatten_with_keys,
  219. )
  220. @deprecated(
  221. "`torch.utils._pytree._register_pytree_node` is deprecated. "
  222. "Please use `torch.utils._pytree.register_pytree_node` instead.",
  223. category=FutureWarning,
  224. )
  225. def _register_pytree_node(
  226. cls: Type[Any],
  227. flatten_fn: FlattenFunc,
  228. unflatten_fn: UnflattenFunc,
  229. to_str_fn: Optional[ToStrFunc] = None, # deprecated
  230. maybe_from_str_fn: Optional[MaybeFromStrFunc] = None, # deprecated
  231. *,
  232. serialized_type_name: Optional[str] = None,
  233. to_dumpable_context: Optional[ToDumpableContextFn] = None,
  234. from_dumpable_context: Optional[FromDumpableContextFn] = None,
  235. flatten_with_keys_fn: Optional[FlattenWithKeysFunc] = None,
  236. ) -> None:
  237. """Register a container-like type as pytree node for the Python pytree only.
  238. Args:
  239. cls: the type to register
  240. flatten_fn: A callable that takes a pytree and returns a flattened
  241. representation of the pytree and additional context to represent the
  242. flattened pytree.
  243. unflatten_fn: A callable that takes a flattened version of the pytree,
  244. additional context, and returns an unflattened pytree.
  245. serialized_type_name: A keyword argument used to specify the fully qualified
  246. name used when serializing the tree spec.
  247. to_dumpable_context: An optional keyword argument to custom specify how
  248. to convert the context of the pytree to a custom json dumpable
  249. representation. This is used for json serialization, which is being
  250. used in torch.export right now.
  251. from_dumpable_context: An optional keyword argument to custom specify how
  252. to convert the custom json dumpable representation of the context
  253. back to the original context. This is used for json deserialization,
  254. which is being used in torch.export right now.
  255. flatten_with_keys_fn: An optional keyword argument to specify how to
  256. access each pytree leaf's keypath when flattening and tree-mapping.
  257. Like ``flatten_fn``, but in place of a List[leaf], it should return
  258. a List[(keypath, leaf)].
  259. """
  260. if to_str_fn is not None or maybe_from_str_fn is not None:
  261. warnings.warn(
  262. "`to_str_fn` and `maybe_from_str_fn` is deprecated. "
  263. "Please use `to_dumpable_context` and `from_dumpable_context` instead.",
  264. FutureWarning,
  265. stacklevel=2,
  266. )
  267. _private_register_pytree_node(
  268. cls,
  269. flatten_fn,
  270. unflatten_fn,
  271. serialized_type_name=serialized_type_name,
  272. to_dumpable_context=to_dumpable_context,
  273. from_dumpable_context=from_dumpable_context,
  274. flatten_with_keys_fn=flatten_with_keys_fn,
  275. )
  276. def _private_register_pytree_node(
  277. cls: Type[Any],
  278. flatten_fn: FlattenFunc,
  279. unflatten_fn: UnflattenFunc,
  280. *,
  281. serialized_type_name: Optional[str] = None,
  282. to_dumpable_context: Optional[ToDumpableContextFn] = None,
  283. from_dumpable_context: Optional[FromDumpableContextFn] = None,
  284. flatten_with_keys_fn: Optional[FlattenWithKeysFunc] = None,
  285. ) -> None:
  286. """This is an internal function that is used to register a pytree node type
  287. for the Python pytree only. End-users should use :func:`register_pytree_node`
  288. instead.
  289. """
  290. with _NODE_REGISTRY_LOCK:
  291. if cls in SUPPORTED_NODES:
  292. # TODO: change this warning to an error after OSS/internal stabilize
  293. warnings.warn(
  294. f"{cls} is already registered as pytree node. "
  295. "Overwriting the previous registration.",
  296. )
  297. node_def = NodeDef(cls, flatten_fn, unflatten_fn, flatten_with_keys_fn)
  298. SUPPORTED_NODES[cls] = node_def
  299. if (to_dumpable_context is None) ^ (from_dumpable_context is None):
  300. raise ValueError(
  301. f"Both to_dumpable_context and from_dumpable_context for {cls} must "
  302. "be None or registered."
  303. )
  304. if serialized_type_name is None:
  305. serialized_type_name = NO_SERIALIZED_TYPE_NAME_FOUND
  306. serialize_node_def = _SerializeNodeDef(
  307. cls,
  308. serialized_type_name,
  309. to_dumpable_context,
  310. from_dumpable_context,
  311. )
  312. SUPPORTED_SERIALIZED_TYPES[cls] = serialize_node_def
  313. SERIALIZED_TYPE_TO_PYTHON_TYPE[serialized_type_name] = cls
  314. @dataclasses.dataclass(frozen=True)
  315. class SequenceKey(Generic[T]):
  316. idx: int
  317. def __str__(self) -> str:
  318. return f"[{self.idx!r}]"
  319. def get(self, sequence: Sequence[T]) -> T:
  320. return sequence[self.idx]
  321. K = TypeVar("K", bound=Hashable)
  322. @dataclasses.dataclass(frozen=True)
  323. class MappingKey(Generic[K, T]):
  324. key: K
  325. def __str__(self) -> str:
  326. return f"[{self.key!r}]"
  327. def get(self, mapping: Mapping[K, T]) -> T:
  328. return mapping[self.key]
  329. @dataclasses.dataclass(frozen=True)
  330. class GetAttrKey:
  331. name: str
  332. def __str__(self) -> str:
  333. return f".{self.name}"
  334. def get(self, obj: Any) -> Any:
  335. return getattr(obj, self.name)
  336. def _tuple_flatten(d: Tuple[Any, ...]) -> Tuple[List[Any], Context]:
  337. return list(d), None
  338. def _tuple_flatten_with_keys(
  339. d: Tuple[Any, ...]
  340. ) -> Tuple[List[Tuple[KeyEntry, Any]], Context]:
  341. values, context = _tuple_flatten(d)
  342. return [(SequenceKey(i), v) for i, v in enumerate(values)], context
  343. def _tuple_unflatten(values: Iterable[Any], context: Context) -> Tuple[Any, ...]:
  344. return tuple(values)
  345. def _list_flatten(d: List[Any]) -> Tuple[List[Any], Context]:
  346. return d, None
  347. def _list_flatten_with_keys(d: List[Any]) -> Tuple[List[Tuple[KeyEntry, Any]], Context]:
  348. values, context = _list_flatten(d)
  349. return [(SequenceKey(i), v) for i, v in enumerate(values)], context
  350. def _list_unflatten(values: Iterable[Any], context: Context) -> List[Any]:
  351. return list(values)
  352. def _dict_flatten(d: Dict[Any, Any]) -> Tuple[List[Any], Context]:
  353. return list(d.values()), list(d.keys())
  354. def _dict_flatten_with_keys(
  355. d: Dict[Any, Any]
  356. ) -> Tuple[List[Tuple[KeyEntry, Any]], Context]:
  357. values, context = _dict_flatten(d)
  358. return [(MappingKey(k), v) for k, v in zip(context, values)], context
  359. def _dict_unflatten(values: Iterable[Any], context: Context) -> Dict[Any, Any]:
  360. return dict(zip(context, values))
  361. def _namedtuple_flatten(d: NamedTuple) -> Tuple[List[Any], Context]:
  362. return list(d), type(d)
  363. def _namedtuple_flatten_with_keys(
  364. d: NamedTuple,
  365. ) -> Tuple[List[Tuple[KeyEntry, Any]], Context]:
  366. values, context = _namedtuple_flatten(d)
  367. return (
  368. [(GetAttrKey(field), v) for field, v in zip(context._fields, values)],
  369. context,
  370. )
  371. def _namedtuple_unflatten(values: Iterable[Any], context: Context) -> NamedTuple:
  372. return cast(NamedTuple, context(*values))
  373. def _namedtuple_serialize(context: Context) -> DumpableContext:
  374. if context not in SUPPORTED_SERIALIZED_TYPES:
  375. raise NotImplementedError(
  376. f"Can't serialize TreeSpec of namedtuple class {context} because we "
  377. "didn't register a serializated_type_name. Please register using "
  378. "`_register_namedtuple`."
  379. )
  380. serialize_node_def = SUPPORTED_SERIALIZED_TYPES[context]
  381. serialized_type_name = serialize_node_def.serialized_type_name
  382. if serialized_type_name == NO_SERIALIZED_TYPE_NAME_FOUND:
  383. raise NotImplementedError(
  384. f"Can't serialize TreeSpec of namedtuple class {context} because we "
  385. "couldn't find a serializated_type_name. Please register using "
  386. "`_register_namedtuple`."
  387. )
  388. return serialized_type_name
  389. def _namedtuple_deserialize(dumpable_context: DumpableContext) -> Context:
  390. if dumpable_context not in SERIALIZED_TYPE_TO_PYTHON_TYPE:
  391. raise NotImplementedError(
  392. f"Can't deserialize TreeSpec of namedtuple class {dumpable_context} "
  393. "because we couldn't find a serializated name."
  394. )
  395. typ = SERIALIZED_TYPE_TO_PYTHON_TYPE[dumpable_context]
  396. return typ
  397. def _ordereddict_flatten(d: GenericOrderedDict[Any, Any]) -> Tuple[List[Any], Context]:
  398. return list(d.values()), list(d.keys())
  399. def _ordereddict_flatten_with_keys(
  400. d: GenericOrderedDict[Any, Any]
  401. ) -> Tuple[List[Tuple[KeyEntry, Any]], Context]:
  402. values, context = _ordereddict_flatten(d)
  403. return [(MappingKey(k), v) for k, v in zip(context, values)], context
  404. def _ordereddict_unflatten(
  405. values: Iterable[Any],
  406. context: Context,
  407. ) -> GenericOrderedDict[Any, Any]:
  408. return OrderedDict((key, value) for key, value in zip(context, values))
  409. _odict_flatten = _ordereddict_flatten
  410. _odict_unflatten = _ordereddict_unflatten
  411. def _defaultdict_flatten(d: DefaultDict[Any, Any]) -> Tuple[List[Any], Context]:
  412. values, dict_context = _dict_flatten(d)
  413. return values, [d.default_factory, dict_context]
  414. def _defaultdict_flatten_with_keys(
  415. d: DefaultDict[Any, Any]
  416. ) -> Tuple[List[Tuple[KeyEntry, Any]], Context]:
  417. values, context = _defaultdict_flatten(d)
  418. _, dict_context = context
  419. return [(MappingKey(k), v) for k, v in zip(dict_context, values)], context
  420. def _defaultdict_unflatten(
  421. values: Iterable[Any],
  422. context: Context,
  423. ) -> DefaultDict[Any, Any]:
  424. default_factory, dict_context = context
  425. return defaultdict(default_factory, _dict_unflatten(values, dict_context))
  426. def _defaultdict_serialize(context: Context) -> DumpableContext:
  427. default_factory, dict_context = context
  428. json_defaultdict = {
  429. "default_factory_module": default_factory.__module__,
  430. "default_factory_name": default_factory.__qualname__,
  431. "dict_context": dict_context,
  432. }
  433. return json_defaultdict
  434. def _defaultdict_deserialize(dumpable_context: DumpableContext) -> Context:
  435. assert isinstance(dumpable_context, dict)
  436. assert set(dumpable_context) == {
  437. "default_factory_module",
  438. "default_factory_name",
  439. "dict_context",
  440. }
  441. default_factory_module = dumpable_context["default_factory_module"]
  442. default_factory_name = dumpable_context["default_factory_name"]
  443. assert isinstance(default_factory_module, str)
  444. assert isinstance(default_factory_name, str)
  445. module = importlib.import_module(default_factory_module)
  446. default_factory = getattr(module, default_factory_name)
  447. dict_context = dumpable_context["dict_context"]
  448. return [default_factory, dict_context]
  449. def _deque_flatten(d: Deque[Any]) -> Tuple[List[Any], Context]:
  450. return list(d), d.maxlen
  451. def _deque_flatten_with_keys(
  452. d: Deque[Any],
  453. ) -> Tuple[List[Tuple[KeyEntry, Any]], Context]:
  454. values, context = _deque_flatten(d)
  455. return [(SequenceKey(i), v) for i, v in enumerate(values)], context
  456. def _deque_unflatten(values: Iterable[Any], context: Context) -> Deque[Any]:
  457. return deque(values, maxlen=context)
  458. _private_register_pytree_node(
  459. tuple,
  460. _tuple_flatten,
  461. _tuple_unflatten,
  462. serialized_type_name="builtins.tuple",
  463. flatten_with_keys_fn=_tuple_flatten_with_keys,
  464. )
  465. _private_register_pytree_node(
  466. list,
  467. _list_flatten,
  468. _list_unflatten,
  469. serialized_type_name="builtins.list",
  470. flatten_with_keys_fn=_list_flatten_with_keys,
  471. )
  472. _private_register_pytree_node(
  473. dict,
  474. _dict_flatten,
  475. _dict_unflatten,
  476. serialized_type_name="builtins.dict",
  477. flatten_with_keys_fn=_dict_flatten_with_keys,
  478. )
  479. _private_register_pytree_node(
  480. namedtuple, # type: ignore[arg-type]
  481. _namedtuple_flatten,
  482. _namedtuple_unflatten,
  483. serialized_type_name="collections.namedtuple",
  484. to_dumpable_context=_namedtuple_serialize,
  485. from_dumpable_context=_namedtuple_deserialize,
  486. flatten_with_keys_fn=_namedtuple_flatten_with_keys,
  487. )
  488. _private_register_pytree_node(
  489. OrderedDict,
  490. _ordereddict_flatten,
  491. _ordereddict_unflatten,
  492. serialized_type_name="collections.OrderedDict",
  493. flatten_with_keys_fn=_ordereddict_flatten_with_keys,
  494. )
  495. _private_register_pytree_node(
  496. defaultdict,
  497. _defaultdict_flatten,
  498. _defaultdict_unflatten,
  499. serialized_type_name="collections.defaultdict",
  500. to_dumpable_context=_defaultdict_serialize,
  501. from_dumpable_context=_defaultdict_deserialize,
  502. flatten_with_keys_fn=_defaultdict_flatten_with_keys,
  503. )
  504. _private_register_pytree_node(
  505. deque,
  506. _deque_flatten,
  507. _deque_unflatten,
  508. serialized_type_name="collections.deque",
  509. flatten_with_keys_fn=_deque_flatten_with_keys,
  510. )
  511. STANDARD_DICT_TYPES: FrozenSet[type] = frozenset(
  512. {dict, OrderedDict, defaultdict},
  513. )
  514. BUILTIN_TYPES: FrozenSet[type] = frozenset(
  515. {tuple, list, dict, namedtuple, OrderedDict, defaultdict, deque}, # type: ignore[arg-type]
  516. )
  517. # h/t https://stackoverflow.com/questions/2166818/how-to-check-if-an-object-is-an-instance-of-a-namedtuple
  518. def _is_namedtuple_instance(tree: Any) -> bool:
  519. typ = type(tree)
  520. bases = typ.__bases__
  521. if len(bases) != 1 or bases[0] != tuple:
  522. return False
  523. fields = getattr(typ, "_fields", None)
  524. if not isinstance(fields, tuple):
  525. return False
  526. return all(type(entry) == str for entry in fields)
  527. def _get_node_type(tree: Any) -> Any:
  528. if _is_namedtuple_instance(tree):
  529. return namedtuple
  530. return type(tree)
  531. # A leaf is defined as anything that is not a Node.
  532. def _is_leaf(tree: PyTree, is_leaf: Optional[Callable[[PyTree], bool]] = None) -> bool:
  533. return (is_leaf is not None and is_leaf(tree)) or _get_node_type(
  534. tree
  535. ) not in SUPPORTED_NODES
  536. # A TreeSpec represents the structure of a pytree. It holds:
  537. # "type": the type of root Node of the pytree
  538. # context: some context that is useful in unflattening the pytree
  539. # children_specs: specs for each child of the root Node
  540. # num_leaves: the number of leaves
  541. @dataclasses.dataclass(init=True, frozen=True, eq=True, repr=False)
  542. class TreeSpec:
  543. type: Any
  544. context: Context
  545. children_specs: List["TreeSpec"]
  546. num_nodes: int = dataclasses.field(init=False)
  547. num_leaves: int = dataclasses.field(init=False)
  548. num_children: int = dataclasses.field(init=False)
  549. def __post_init__(self) -> None:
  550. num_nodes = sum((spec.num_nodes for spec in self.children_specs), start=1)
  551. num_leaves = sum(spec.num_leaves for spec in self.children_specs)
  552. num_children = len(self.children_specs)
  553. object.__setattr__(self, "num_nodes", num_nodes)
  554. object.__setattr__(self, "num_leaves", num_leaves)
  555. object.__setattr__(self, "num_children", num_children)
  556. def __repr__(self, indent: int = 0) -> str:
  557. repr_prefix: str = f"TreeSpec({self.type.__name__}, {self.context}, ["
  558. children_specs_str: str = ""
  559. if self.num_children > 0:
  560. indent += 2
  561. children_specs_str += self.children_specs[0].__repr__(indent)
  562. children_specs_str += "," if self.num_children > 1 else ""
  563. children_specs_str += ",".join(
  564. [
  565. "\n" + " " * indent + child.__repr__(indent)
  566. for child in self.children_specs[1:]
  567. ]
  568. )
  569. repr_suffix: str = f"{children_specs_str}])"
  570. return repr_prefix + repr_suffix
  571. def is_leaf(self) -> bool:
  572. return self.num_nodes == 1 and self.num_leaves == 1
  573. def _flatten_up_to_helper(self, tree: PyTree, subtrees: List[PyTree]) -> None:
  574. if self.is_leaf():
  575. subtrees.append(tree)
  576. return
  577. node_type = _get_node_type(tree)
  578. if self.type not in BUILTIN_TYPES:
  579. # Always require custom node types to match exactly
  580. if node_type != self.type:
  581. raise ValueError(
  582. f"Type mismatch; "
  583. f"expected {self.type!r}, but got {node_type!r}.",
  584. )
  585. flatten_fn = SUPPORTED_NODES[node_type].flatten_fn
  586. child_pytrees, context = flatten_fn(tree)
  587. if len(child_pytrees) != self.num_children:
  588. raise ValueError(
  589. f"Node arity mismatch; "
  590. f"expected {self.num_children}, but got {len(child_pytrees)}.",
  591. )
  592. if context != self.context:
  593. raise ValueError(
  594. f"Node context mismatch for custom node type {self.type!r}.",
  595. )
  596. else:
  597. # For builtin dictionary types, we allow some flexibility
  598. # Otherwise, we require exact matches
  599. both_standard_dict = (
  600. self.type in STANDARD_DICT_TYPES and node_type in STANDARD_DICT_TYPES
  601. )
  602. if node_type != self.type and not both_standard_dict:
  603. raise ValueError(
  604. f"Node type mismatch; "
  605. f"expected {self.type!r}, but got {node_type!r}.",
  606. )
  607. if len(tree) != self.num_children:
  608. raise ValueError(
  609. f"Node arity mismatch; "
  610. f"expected {self.num_children}, but got {len(tree)}.",
  611. )
  612. if both_standard_dict: # dictionary types are compatible with each other
  613. dict_context = (
  614. self.context
  615. if self.type is not defaultdict
  616. # ignore mismatch of `default_factory` for defaultdict
  617. else self.context[1]
  618. )
  619. expected_keys = dict_context
  620. got_key_set = set(tree)
  621. expected_key_set = set(expected_keys)
  622. if got_key_set != expected_key_set:
  623. missing_keys = expected_key_set.difference(got_key_set)
  624. extra_keys = got_key_set.difference(expected_key_set)
  625. message = ""
  626. if missing_keys:
  627. message += f"; missing key(s): {missing_keys}"
  628. if extra_keys:
  629. message += f"; extra key(s): {extra_keys}"
  630. raise ValueError(f"Node keys mismatch{message}.")
  631. child_pytrees = [tree[key] for key in expected_keys]
  632. else:
  633. flatten_fn = SUPPORTED_NODES[node_type].flatten_fn
  634. child_pytrees, context = flatten_fn(tree)
  635. if (
  636. context != self.context
  637. and self.type is not deque # ignore mismatch of `maxlen` for deque
  638. ):
  639. raise ValueError(
  640. f"Node context mismatch for node type {self.type!r}; "
  641. f"expected {self.context!r}, but got {context!r}.", # namedtuple type mismatch
  642. )
  643. for child_pytree, child_spec in zip(child_pytrees, self.children_specs):
  644. child_spec._flatten_up_to_helper(child_pytree, subtrees)
  645. def flatten_up_to(self, tree: PyTree) -> List[PyTree]:
  646. subtrees: List[PyTree] = []
  647. self._flatten_up_to_helper(tree, subtrees)
  648. return subtrees
  649. def unflatten(self, leaves: Iterable[Any]) -> PyTree:
  650. if not isinstance(leaves, (list, tuple)):
  651. leaves = list(leaves)
  652. if len(leaves) != self.num_leaves:
  653. raise ValueError(
  654. f"treespec.unflatten(leaves): `leaves` has length {len(leaves)} "
  655. f"but the spec refers to a pytree that holds {self.num_leaves} "
  656. f"items ({self}).",
  657. )
  658. if self.is_leaf():
  659. return leaves[0]
  660. unflatten_fn = SUPPORTED_NODES[self.type].unflatten_fn
  661. # Recursively unflatten the children
  662. start = 0
  663. end = 0
  664. child_pytrees = []
  665. for child_spec in self.children_specs:
  666. end += child_spec.num_leaves
  667. child_pytrees.append(child_spec.unflatten(leaves[start:end]))
  668. start = end
  669. return unflatten_fn(child_pytrees, self.context)
  670. class LeafSpec(TreeSpec):
  671. def __init__(self) -> None:
  672. super().__init__(None, None, [])
  673. def __post_init__(self) -> None:
  674. object.__setattr__(self, "num_nodes", 1)
  675. object.__setattr__(self, "num_leaves", 1)
  676. object.__setattr__(self, "num_children", 0)
  677. def __repr__(self, indent: int = 0) -> str:
  678. return "*"
  679. # All leaves are equivalent, so represent with a single object to save on
  680. # object construction time
  681. _LEAF_SPEC = LeafSpec()
  682. def _tree_flatten_helper(
  683. tree: PyTree,
  684. leaves: List[Any],
  685. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  686. ) -> TreeSpec:
  687. if _is_leaf(tree, is_leaf=is_leaf):
  688. leaves.append(tree)
  689. return _LEAF_SPEC
  690. node_type = _get_node_type(tree)
  691. flatten_fn = SUPPORTED_NODES[node_type].flatten_fn
  692. child_pytrees, context = flatten_fn(tree)
  693. # Recursively flatten the children
  694. children_specs = [
  695. _tree_flatten_helper(child, leaves, is_leaf=is_leaf) for child in child_pytrees
  696. ]
  697. return TreeSpec(node_type, context, children_specs)
  698. def tree_flatten(
  699. tree: PyTree,
  700. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  701. ) -> Tuple[List[Any], TreeSpec]:
  702. """Flattens a pytree into a list of values and a TreeSpec that can be used
  703. to reconstruct the pytree.
  704. """
  705. leaves: List[Any] = []
  706. spec = _tree_flatten_helper(tree, leaves, is_leaf=is_leaf)
  707. return leaves, spec
  708. def tree_unflatten(leaves: Iterable[Any], treespec: TreeSpec) -> PyTree:
  709. """Given a list of values and a TreeSpec, builds a pytree.
  710. This is the inverse operation of `tree_flatten`.
  711. """
  712. if not isinstance(treespec, TreeSpec):
  713. raise TypeError(
  714. f"tree_unflatten(leaves, treespec): Expected `treespec` to be "
  715. f"instance of TreeSpec but got item of type {type(treespec)}.",
  716. )
  717. return treespec.unflatten(leaves)
  718. def tree_iter(
  719. tree: PyTree,
  720. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  721. ) -> Iterable[Any]:
  722. """Get an iterator over the leaves of a pytree."""
  723. if _is_leaf(tree, is_leaf=is_leaf):
  724. yield tree
  725. else:
  726. node_type = _get_node_type(tree)
  727. flatten_fn = SUPPORTED_NODES[node_type].flatten_fn
  728. child_pytrees, _ = flatten_fn(tree)
  729. # Recursively flatten the children
  730. for child in child_pytrees:
  731. yield from tree_iter(child, is_leaf=is_leaf)
  732. def tree_leaves(
  733. tree: PyTree,
  734. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  735. ) -> List[Any]:
  736. """Get a list of leaves of a pytree."""
  737. return list(tree_iter(tree, is_leaf=is_leaf))
  738. def tree_structure(
  739. tree: PyTree,
  740. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  741. ) -> TreeSpec:
  742. """Get the TreeSpec for a pytree."""
  743. return tree_flatten(tree, is_leaf=is_leaf)[1]
  744. def tree_map(
  745. func: Callable[..., Any],
  746. tree: PyTree,
  747. *rests: PyTree,
  748. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  749. ) -> PyTree:
  750. """Map a multi-input function over pytree args to produce a new pytree.
  751. See also :func:`tree_map_`.
  752. >>> tree_map(lambda x: x + 1, {'x': 7, 'y': (42, 64)})
  753. {'x': 8, 'y': (43, 65)}
  754. >>> tree_map(lambda x: x is None, {'x': 7, 'y': (42, 64), 'z': None})
  755. {'x': False, 'y': (False, False), 'z': True}
  756. If multiple inputs are given, the structure of the tree is taken from the first input;
  757. subsequent inputs need only have ``tree`` as a prefix:
  758. >>> tree_map(lambda x, y: [x] + y, [5, 6], [[7, 9], [1, 2]])
  759. [[5, 7, 9], [6, 1, 2]]
  760. Args:
  761. func (callable): A function that takes ``1 + len(rests)`` arguments, to be applied at the
  762. corresponding leaves of the pytrees.
  763. tree (pytree): A pytree to be mapped over, with each leaf providing the first positional
  764. argument to function ``func``.
  765. rests (tuple of pytree): A tuple of pytrees, each of which has the same structure as
  766. ``tree`` or has ``tree`` as a prefix.
  767. is_leaf (callable, optional): An extra leaf predicate function that will be called at each
  768. flattening step. The function should have a single argument with signature
  769. ``is_leaf(node) -> bool``. If it returns :data:`True`, the whole subtree being treated
  770. as a leaf. Otherwise, the default pytree registry will be used to determine a node is a
  771. leaf or not. If the function is not specified, the default pytree registry will be used.
  772. Returns:
  773. A new pytree with the same structure as ``tree`` but with the value at each leaf given by
  774. ``func(x, *xs)`` where ``x`` is the value at the corresponding leaf in ``tree`` and ``xs``
  775. is the tuple of values at corresponding nodes in ``rests``.
  776. """
  777. leaves, treespec = tree_flatten(tree, is_leaf=is_leaf)
  778. flat_args = [leaves] + [treespec.flatten_up_to(r) for r in rests]
  779. return treespec.unflatten(map(func, *flat_args))
  780. def tree_map_(
  781. func: Callable[..., Any],
  782. tree: PyTree,
  783. *rests: PyTree,
  784. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  785. ) -> PyTree:
  786. """Like :func:`tree_map`, but do an inplace call on each leaf and return the original tree.
  787. See also :func:`tree_map`.
  788. Args:
  789. func (callable): A function that takes ``1 + len(rests)`` arguments, to be applied at the
  790. corresponding leaves of the pytrees.
  791. tree (pytree): A pytree to be mapped over, with each leaf providing the first positional
  792. argument to function ``func``.
  793. rests (tuple of pytree): A tuple of pytrees, each of which has the same structure as
  794. ``tree`` or has ``tree`` as a prefix.
  795. is_leaf (callable, optional): An extra leaf predicate function that will be called at each
  796. flattening step. The function should have a single argument with signature
  797. ``is_leaf(node) -> bool``. If it returns :data:`True`, the whole subtree being treated
  798. as a leaf. Otherwise, the default pytree registry will be used to determine a node is a
  799. leaf or not. If the function is not specified, the default pytree registry will be used.
  800. Returns:
  801. The original ``tree`` with the value at each leaf is given by the side-effect of function
  802. ``func(x, *xs)`` (not the return value) where ``x`` is the value at the corresponding leaf
  803. in ``tree`` and ``xs`` is the tuple of values at values at corresponding nodes in ``rests``.
  804. """
  805. leaves, treespec = tree_flatten(tree, is_leaf=is_leaf)
  806. flat_args = [leaves] + [treespec.flatten_up_to(r) for r in rests]
  807. tuple(map(func, *flat_args)) # consume and exhaust the iterable
  808. return tree
  809. Type2 = Tuple[Type[T], Type[S]]
  810. Type3 = Tuple[Type[T], Type[S], Type[U]]
  811. if sys.version_info >= (3, 10):
  812. TypeAny = Union[Type[Any], Tuple[Type[Any], ...], types.UnionType]
  813. else:
  814. TypeAny = Union[Type[Any], Tuple[Type[Any], ...]]
  815. Fn2 = Callable[[Union[T, S]], R]
  816. Fn3 = Callable[[Union[T, S, U]], R]
  817. Fn = Callable[[T], R]
  818. FnAny = Callable[[Any], R]
  819. MapOnlyFn = Callable[[T], Callable[[Any], Any]]
  820. # These specializations help with type inference on the lambda passed to this
  821. # function
  822. @overload
  823. def map_only(__type_or_types_or_pred: Type2[T, S]) -> MapOnlyFn[Fn2[T, S, Any]]:
  824. ...
  825. @overload
  826. def map_only(__type_or_types_or_pred: Type3[T, S, U]) -> MapOnlyFn[Fn3[T, S, U, Any]]:
  827. ...
  828. @overload
  829. def map_only(__type_or_types_or_pred: Type[T]) -> MapOnlyFn[Fn[T, Any]]:
  830. ...
  831. # This specialization is needed for the implementations below that call
  832. @overload
  833. def map_only(__type_or_types_or_pred: TypeAny) -> MapOnlyFn[FnAny[Any]]:
  834. ...
  835. @overload
  836. def map_only(__type_or_types_or_pred: Callable[[Any], bool]) -> MapOnlyFn[FnAny[Any]]:
  837. ...
  838. def map_only(
  839. __type_or_types_or_pred: Union[TypeAny, Callable[[Any], bool]]
  840. ) -> MapOnlyFn[FnAny[Any]]:
  841. """
  842. Suppose you are writing a tree_map over tensors, leaving everything
  843. else unchanged. Ordinarily you would have to write:
  844. def go(t):
  845. if isinstance(t, Tensor):
  846. return ...
  847. else:
  848. return t
  849. With this function, you only need to write:
  850. @map_only(Tensor)
  851. def go(t):
  852. return ...
  853. You can also directly use 'tree_map_only'
  854. """
  855. if isinstance(__type_or_types_or_pred, (type, tuple)) or (
  856. sys.version_info >= (3, 10)
  857. and isinstance(__type_or_types_or_pred, types.UnionType)
  858. ):
  859. def pred(x: Any) -> bool:
  860. return isinstance(x, __type_or_types_or_pred) # type: ignore[arg-type]
  861. elif callable(__type_or_types_or_pred):
  862. pred = __type_or_types_or_pred # type: ignore[assignment]
  863. else:
  864. raise TypeError("Argument must be a type, a tuple of types, or a callable.")
  865. def wrapper(func: Callable[[T], Any]) -> Callable[[Any], Any]:
  866. @functools.wraps(func)
  867. def wrapped(x: T) -> Any:
  868. if pred(x):
  869. return func(x)
  870. return x
  871. return wrapped
  872. return wrapper
  873. @overload
  874. def tree_map_only(
  875. __type_or_types_or_pred: Type[T],
  876. func: Fn[T, Any],
  877. tree: PyTree,
  878. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  879. ) -> PyTree:
  880. ...
  881. @overload
  882. def tree_map_only(
  883. __type_or_types_or_pred: Type2[T, S],
  884. func: Fn2[T, S, Any],
  885. tree: PyTree,
  886. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  887. ) -> PyTree:
  888. ...
  889. @overload
  890. def tree_map_only(
  891. __type_or_types_or_pred: Type3[T, S, U],
  892. func: Fn3[T, S, U, Any],
  893. tree: PyTree,
  894. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  895. ) -> PyTree:
  896. ...
  897. @overload
  898. def tree_map_only(
  899. __type_or_types_or_pred: Callable[[Any], bool],
  900. func: FnAny[Any],
  901. tree: PyTree,
  902. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  903. ) -> PyTree:
  904. ...
  905. def tree_map_only(
  906. __type_or_types_or_pred: Union[TypeAny, Callable[[Any], bool]],
  907. func: FnAny[Any],
  908. tree: PyTree,
  909. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  910. ) -> PyTree:
  911. return tree_map(map_only(__type_or_types_or_pred)(func), tree, is_leaf=is_leaf)
  912. @overload
  913. def tree_map_only_(
  914. __type_or_types_or_pred: Type[T],
  915. func: Fn[T, Any],
  916. tree: PyTree,
  917. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  918. ) -> PyTree:
  919. ...
  920. @overload
  921. def tree_map_only_(
  922. __type_or_types_or_pred: Type2[T, S],
  923. func: Fn2[T, S, Any],
  924. tree: PyTree,
  925. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  926. ) -> PyTree:
  927. ...
  928. @overload
  929. def tree_map_only_(
  930. __type_or_types_or_pred: Type3[T, S, U],
  931. func: Fn3[T, S, U, Any],
  932. tree: PyTree,
  933. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  934. ) -> PyTree:
  935. ...
  936. @overload
  937. def tree_map_only_(
  938. __type_or_types_or_pred: Callable[[Any], bool],
  939. func: FnAny[Any],
  940. tree: PyTree,
  941. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  942. ) -> PyTree:
  943. ...
  944. def tree_map_only_(
  945. __type_or_types_or_pred: Union[TypeAny, Callable[[Any], bool]],
  946. func: FnAny[Any],
  947. tree: PyTree,
  948. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  949. ) -> PyTree:
  950. return tree_map_(map_only(__type_or_types_or_pred)(func), tree, is_leaf=is_leaf)
  951. def tree_all(
  952. pred: Callable[[Any], bool],
  953. tree: PyTree,
  954. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  955. ) -> bool:
  956. flat_args = tree_iter(tree, is_leaf=is_leaf)
  957. return all(map(pred, flat_args))
  958. def tree_any(
  959. pred: Callable[[Any], bool],
  960. tree: PyTree,
  961. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  962. ) -> bool:
  963. flat_args = tree_iter(tree, is_leaf=is_leaf)
  964. return any(map(pred, flat_args))
  965. @overload
  966. def tree_all_only(
  967. __type_or_types: Type[T],
  968. pred: Fn[T, bool],
  969. tree: PyTree,
  970. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  971. ) -> bool:
  972. ...
  973. @overload
  974. def tree_all_only(
  975. __type_or_types: Type2[T, S],
  976. pred: Fn2[T, S, bool],
  977. tree: PyTree,
  978. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  979. ) -> bool:
  980. ...
  981. @overload
  982. def tree_all_only(
  983. __type_or_types: Type3[T, S, U],
  984. pred: Fn3[T, S, U, bool],
  985. tree: PyTree,
  986. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  987. ) -> bool:
  988. ...
  989. def tree_all_only(
  990. __type_or_types: TypeAny,
  991. pred: FnAny[bool],
  992. tree: PyTree,
  993. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  994. ) -> bool:
  995. flat_args = tree_iter(tree, is_leaf=is_leaf)
  996. return all(pred(x) for x in flat_args if isinstance(x, __type_or_types))
  997. @overload
  998. def tree_any_only(
  999. __type_or_types: Type[T],
  1000. pred: Fn[T, bool],
  1001. tree: PyTree,
  1002. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  1003. ) -> bool:
  1004. ...
  1005. @overload
  1006. def tree_any_only(
  1007. __type_or_types: Type2[T, S],
  1008. pred: Fn2[T, S, bool],
  1009. tree: PyTree,
  1010. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  1011. ) -> bool:
  1012. ...
  1013. @overload
  1014. def tree_any_only(
  1015. __type_or_types: Type3[T, S, U],
  1016. pred: Fn3[T, S, U, bool],
  1017. tree: PyTree,
  1018. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  1019. ) -> bool:
  1020. ...
  1021. def tree_any_only(
  1022. __type_or_types: TypeAny,
  1023. pred: FnAny[bool],
  1024. tree: PyTree,
  1025. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  1026. ) -> bool:
  1027. flat_args = tree_iter(tree, is_leaf=is_leaf)
  1028. return any(pred(x) for x in flat_args if isinstance(x, __type_or_types))
  1029. # Broadcasts a pytree to the provided TreeSpec and returns the flattened
  1030. # values. If this is not possible, then this function returns None.
  1031. #
  1032. # For example, given pytree=0 and spec=TreeSpec(list, None, [LeafSpec(), LeafSpec()]),
  1033. # would return [0, 0]. This is useful for part of the vmap implementation:
  1034. # a user can pass in vmap(fn, in_dims)(*inputs). `in_dims` should be
  1035. # broadcastable to the tree structure of `inputs` and we use
  1036. # _broadcast_to_and_flatten to check this.
  1037. def _broadcast_to_and_flatten(
  1038. tree: PyTree,
  1039. treespec: TreeSpec,
  1040. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  1041. ) -> Optional[List[Any]]:
  1042. assert isinstance(treespec, TreeSpec)
  1043. if _is_leaf(tree, is_leaf=is_leaf):
  1044. return [tree] * treespec.num_leaves
  1045. if treespec.is_leaf():
  1046. return None
  1047. node_type = _get_node_type(tree)
  1048. if node_type != treespec.type:
  1049. return None
  1050. flatten_fn = SUPPORTED_NODES[node_type].flatten_fn
  1051. child_pytrees, ctx = flatten_fn(tree)
  1052. # Check if the Node is different from the spec
  1053. if len(child_pytrees) != treespec.num_children or ctx != treespec.context:
  1054. return None
  1055. # Recursively flatten the children
  1056. result: List[Any] = []
  1057. for child, child_spec in zip(child_pytrees, treespec.children_specs):
  1058. flat = _broadcast_to_and_flatten(child, child_spec, is_leaf=is_leaf)
  1059. if flat is not None:
  1060. result += flat
  1061. else:
  1062. return None
  1063. return result
  1064. @dataclasses.dataclass
  1065. class _TreeSpecSchema:
  1066. """
  1067. _TreeSpecSchema is the schema used to serialize the TreeSpec
  1068. It contains the following fields:
  1069. - type: A string name of the type. null for the case of a LeafSpec.
  1070. - context: Any format which is json dumpable
  1071. - children_spec: A list of children serialized specs.
  1072. """
  1073. type: Optional[str]
  1074. context: DumpableContext
  1075. children_spec: List["_TreeSpecSchema"]
  1076. class _ProtocolFn(NamedTuple):
  1077. treespec_to_json: Callable[[TreeSpec], DumpableContext]
  1078. json_to_treespec: Callable[[DumpableContext], TreeSpec]
  1079. _SUPPORTED_PROTOCOLS: Dict[int, _ProtocolFn] = {}
  1080. def _treespec_to_json(treespec: TreeSpec) -> _TreeSpecSchema:
  1081. if treespec.is_leaf():
  1082. return _TreeSpecSchema(None, None, [])
  1083. if treespec.type not in SUPPORTED_SERIALIZED_TYPES:
  1084. raise NotImplementedError(
  1085. f"Serializing {treespec.type} in pytree is not registered.",
  1086. )
  1087. serialize_node_def = SUPPORTED_SERIALIZED_TYPES[treespec.type]
  1088. serialized_type_name = serialize_node_def.serialized_type_name
  1089. if serialized_type_name == NO_SERIALIZED_TYPE_NAME_FOUND:
  1090. raise NotImplementedError(
  1091. f"No registered serialization name for {treespec.type} found. "
  1092. "Please update your _register_pytree_node call with a `serialized_type_name` kwarg."
  1093. )
  1094. if serialize_node_def.to_dumpable_context is None:
  1095. try:
  1096. serialized_context = json.dumps(treespec.context)
  1097. except TypeError as e:
  1098. raise TypeError(
  1099. "Unable to serialize context. "
  1100. "Please make the context json dump-able, or register a "
  1101. "custom serializer using _register_pytree_node."
  1102. ) from e
  1103. else:
  1104. serialized_context = serialize_node_def.to_dumpable_context(treespec.context)
  1105. child_schemas = [_treespec_to_json(child) for child in treespec.children_specs]
  1106. return _TreeSpecSchema(serialized_type_name, serialized_context, child_schemas)
  1107. def _json_to_treespec(json_schema: DumpableContext) -> TreeSpec:
  1108. if (
  1109. json_schema["type"] is None
  1110. and json_schema["context"] is None
  1111. and len(json_schema["children_spec"]) == 0
  1112. ):
  1113. return _LEAF_SPEC
  1114. if json_schema["type"] not in SERIALIZED_TYPE_TO_PYTHON_TYPE:
  1115. raise NotImplementedError(
  1116. f'Deserializing {json_schema["type"]} in pytree is not registered.',
  1117. )
  1118. typ = SERIALIZED_TYPE_TO_PYTHON_TYPE[json_schema["type"]]
  1119. serialize_node_def = SUPPORTED_SERIALIZED_TYPES[typ]
  1120. if serialize_node_def.from_dumpable_context is None:
  1121. try:
  1122. context = json.loads(json_schema["context"])
  1123. except TypeError as ex:
  1124. raise TypeError(
  1125. "Unable to deserialize context. "
  1126. "Please make the context json load-able, or register a "
  1127. "custom serializer using _register_pytree_node.",
  1128. ) from ex
  1129. else:
  1130. context = serialize_node_def.from_dumpable_context(json_schema["context"])
  1131. children_specs = []
  1132. for child_string in json_schema["children_spec"]:
  1133. children_specs.append(_json_to_treespec(child_string))
  1134. return TreeSpec(typ, context, children_specs)
  1135. _SUPPORTED_PROTOCOLS[1] = _ProtocolFn(_treespec_to_json, _json_to_treespec)
  1136. def treespec_dumps(treespec: TreeSpec, protocol: Optional[int] = None) -> str:
  1137. if not isinstance(treespec, TreeSpec):
  1138. raise TypeError(
  1139. f"treespec_dumps(treespec, protocol): Expected `treespec` to be instance of "
  1140. f"TreeSpec but got item of type {type(treespec)}.",
  1141. )
  1142. if protocol is None:
  1143. protocol = DEFAULT_TREESPEC_SERIALIZATION_PROTOCOL
  1144. if protocol in _SUPPORTED_PROTOCOLS:
  1145. json_spec = _SUPPORTED_PROTOCOLS[protocol].treespec_to_json(treespec)
  1146. else:
  1147. raise ValueError(
  1148. f"Unknown protocol {protocol}. "
  1149. f"Available protocols: {list(_SUPPORTED_PROTOCOLS.keys())}",
  1150. )
  1151. str_spec = json.dumps((protocol, dataclasses.asdict(json_spec)))
  1152. return str_spec
  1153. def treespec_loads(serialized: str) -> TreeSpec:
  1154. protocol, json_schema = json.loads(serialized)
  1155. if protocol in _SUPPORTED_PROTOCOLS:
  1156. return _SUPPORTED_PROTOCOLS[protocol].json_to_treespec(json_schema)
  1157. raise ValueError(
  1158. f"Unknown protocol {protocol}. "
  1159. f"Available protocols: {list(_SUPPORTED_PROTOCOLS.keys())}",
  1160. )
  1161. class _DummyLeaf:
  1162. def __repr__(self) -> str:
  1163. return "*"
  1164. def treespec_pprint(treespec: TreeSpec) -> str:
  1165. dummy_tree = tree_unflatten(
  1166. [_DummyLeaf() for _ in range(treespec.num_leaves)],
  1167. treespec,
  1168. )
  1169. return repr(dummy_tree)
  1170. # TODO(angelayi): remove this function after OSS/internal stabilize
  1171. @deprecated(
  1172. "`pytree_to_str` is deprecated. Please use `treespec_dumps` instead.",
  1173. category=FutureWarning,
  1174. )
  1175. def pytree_to_str(treespec: TreeSpec) -> str:
  1176. return treespec_dumps(treespec)
  1177. # TODO(angelayi): remove this function after OSS/internal stabilize
  1178. @deprecated(
  1179. "`str_to_pytree` is deprecated. Please use `treespec_loads` instead.",
  1180. category=FutureWarning,
  1181. )
  1182. def str_to_pytree(json: str) -> TreeSpec:
  1183. return treespec_loads(json)
  1184. def arg_tree_leaves(*args: PyTree, **kwargs: PyTree) -> List[Any]:
  1185. """Get a flat list of arguments to this function
  1186. A slightly faster version of tree_leaves((args, kwargs))
  1187. """
  1188. leaves: List[Any] = []
  1189. for a in args:
  1190. leaves.extend(tree_iter(a))
  1191. for a in kwargs.values():
  1192. leaves.extend(tree_iter(a))
  1193. return leaves
  1194. def tree_flatten_with_path(
  1195. tree: PyTree,
  1196. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  1197. ) -> Tuple[List[Tuple[KeyPath, Any]], TreeSpec]:
  1198. """Flattens a pytree like :func:`tree_flatten`, but also returns each leaf's key path.
  1199. Args:
  1200. tree: a pytree to flatten. If it contains a custom type, that type must be
  1201. registered with an appropriate `tree_flatten_with_path_fn` when registered
  1202. with :func:`register_pytree_node`.
  1203. is_leaf: An extra leaf predicate function that will be called at each
  1204. flattening step. The function should have a single argument with signature
  1205. ``is_leaf(node) -> bool``. If it returns :data:`True`, the whole subtree being treated
  1206. as a leaf. Otherwise, the default pytree registry will be used to determine a node is a
  1207. leaf or not. If the function is not specified, the default pytree registry will be used.
  1208. Returns:
  1209. A tuple where the first element is a list of (key path, leaf) pairs, and the
  1210. second element is a :class:`TreeSpec` representing the structure of the flattened
  1211. tree.
  1212. """
  1213. _, treespec = tree_flatten(tree, is_leaf)
  1214. return list(_generate_key_paths((), tree, is_leaf)), treespec
  1215. def tree_leaves_with_path(
  1216. tree: PyTree,
  1217. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  1218. ) -> List[Tuple[KeyPath, Any]]:
  1219. """Gets the leaves of a pytree like ``tree_leaves`` and returns each leaf's key path.
  1220. Args:
  1221. tree: a pytree. If it contains a custom type, that type must be
  1222. registered with an appropriate `tree_flatten_with_path_fn` when registered
  1223. with :func:`register_pytree_node`.
  1224. is_leaf: An extra leaf predicate function that will be called at each
  1225. flattening step. The function should have a single argument with signature
  1226. ``is_leaf(node) -> bool``. If it returns :data:`True`, the whole subtree being treated
  1227. as a leaf. Otherwise, the default pytree registry will be used to determine a node is a
  1228. leaf or not. If the function is not specified, the default pytree registry will be used.
  1229. Returns:
  1230. A list of (key path, leaf) pairs.
  1231. """
  1232. return list(_generate_key_paths((), tree, is_leaf))
  1233. def _generate_key_paths(
  1234. key_path: KeyPath,
  1235. tree: PyTree,
  1236. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  1237. ) -> Iterable[Tuple[KeyPath, Any]]:
  1238. if is_leaf and is_leaf(tree):
  1239. yield key_path, tree
  1240. return
  1241. node_type = _get_node_type(tree)
  1242. handler = SUPPORTED_NODES.get(node_type)
  1243. if not handler:
  1244. # This is a leaf
  1245. yield key_path, tree
  1246. return
  1247. flatten_with_keys = handler.flatten_with_keys_fn
  1248. if flatten_with_keys:
  1249. key_children, _ = flatten_with_keys(tree)
  1250. for k, c in key_children:
  1251. yield from _generate_key_paths((*key_path, k), c, is_leaf)
  1252. else:
  1253. # We registered this pytree but didn't add a flatten_with_keys_fn, complain.
  1254. raise ValueError(
  1255. f"Did not find a flatten_with_keys_fn for type: {node_type}. "
  1256. "Please pass a flatten_with_keys_fn argument to register_pytree_node."
  1257. )
  1258. def tree_map_with_path(
  1259. func: Callable[..., Any],
  1260. tree: PyTree,
  1261. *rests: PyTree,
  1262. is_leaf: Optional[Callable[[PyTree], bool]] = None,
  1263. ) -> PyTree:
  1264. """Like :func:`tree_map`, but the provided callable takes an additional key path argument.
  1265. Args:
  1266. func: A function that takes ``2 + len(rests)`` arguments, to be applied at the
  1267. corresponding leaves of the pytrees. The first positional argument
  1268. to ``func`` is the key path of the leaf in question. The second
  1269. positional argument is the value of the leaf.
  1270. tree: A pytree to be mapped over, with each leaf providing the first positional
  1271. argument to function ``func``.
  1272. rests: A tuple of pytrees, each of which has the same structure as
  1273. ``tree`` or has ``tree`` as a prefix.
  1274. is_leaf: An extra leaf predicate function that will be called at each
  1275. flattening step. The function should have a single argument with signature
  1276. ``is_leaf(node) -> bool``. If it returns :data:`True`, the whole subtree being treated
  1277. as a leaf. Otherwise, the default pytree registry will be used to determine a node is a
  1278. leaf or not. If the function is not specified, the default pytree registry will be used.
  1279. Returns
  1280. A new pytree with the same structure as ``tree`` but with the value at each leaf given by
  1281. ``func(keypath, x, *xs)`` where ``keypath`` is the key path at the
  1282. corresponding leaf in ``tree``, ``x`` is the value at that leaf, and
  1283. ``xs`` is the tuple of values at corresponding nodes in ``rests``.
  1284. """
  1285. keypath_leaves, treespec = tree_flatten_with_path(tree, is_leaf)
  1286. keypath_leaves = list(zip(*keypath_leaves))
  1287. all_keypath_leaves = keypath_leaves + [treespec.flatten_up_to(r) for r in rests]
  1288. return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))
  1289. def keystr(kp: KeyPath) -> str:
  1290. """Given a key path, return a pretty-printed representation."""
  1291. return "".join([str(k) for k in kp])
  1292. def key_get(obj: Any, kp: KeyPath) -> Any:
  1293. """Given an object and a key path, return the value at the key path."""
  1294. for k in kp:
  1295. obj = k.get(obj)
  1296. return obj