-
-
Notifications
You must be signed in to change notification settings - Fork 399
refactor/metadata package #3919
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
17e74a9
8da469f
e43bd36
28efcde
23aed89
43eefba
f26e1bd
27b000c
2a45d2d
d530cb4
266a8eb
e431dee
c547f55
1517cd8
b90fb68
bb0183c
fc09be6
33bfc99
1098718
b437812
2578ad8
51a1df3
d06fad4
a2c2960
d42a508
e7ff23c
99b2571
a88716b
7571dbc
bb98cde
08c7643
2feb4be
a12fe70
ec22950
275bf55
374181a
9554ca3
8d2bd63
e6139a6
ac0304c
ef4b773
b7b055e
0ae8db9
1b62c4c
c6fcde9
c90c9a0
331ea93
33c9a80
700d916
84d5ca1
c30a768
fa66cc9
cf12cdc
82e10d6
4b5bd11
6a0be8c
e6e5920
a732fb2
8691138
a6d0e5e
039fd7e
b8d67fe
cdedda2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| # zarr-metadata | ||
|
|
||
| Spec-defined metadata types for Zarr v2 and v3, distributed as pure-typing | ||
| artifacts (TypedDicts, type aliases, unions). No runtime logic, no numpy, | ||
| no storage backends. | ||
|
|
||
| `zarr-metadata` is developed in the [zarr-python](https://github.com/zarr-developers/zarr-python) | ||
| repository at `packages/zarr-metadata/`. | ||
|
|
||
| ## Principle | ||
|
|
||
| Every type that models a spec artifact (v2 or v3 array/group/consolidated | ||
| metadata, chunk grids, codec metadata, dtype shapes) belongs in | ||
| `zarr-metadata`. Zarr-python implementation details (runtime codecs, | ||
| config dataclasses, numcodecs-derived helpers) stay in `zarr`. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| [build-system] | ||
| requires = ["hatchling>=1.29.0"] | ||
| build-backend = "hatchling.build" | ||
|
|
||
| [project] | ||
| name = "zarr-metadata" | ||
| version = "0.1.0" | ||
| description = "Spec-defined metadata types for Zarr v2 and v3." | ||
| readme = "README.md" | ||
| requires-python = ">=3.11" | ||
| license = "MIT" | ||
| authors = [ | ||
| { name = "Davis Bennett", email = "davis.v.bennett@gmail.com" }, | ||
| ] | ||
| classifiers = [ | ||
| "Development Status :: 4 - Beta", | ||
| "Intended Audience :: Developers", | ||
| "License :: OSI Approved :: MIT License", | ||
| "Programming Language :: Python", | ||
| "Programming Language :: Python :: 3", | ||
| "Programming Language :: Python :: 3.11", | ||
| "Programming Language :: Python :: 3.12", | ||
| "Programming Language :: Python :: 3.13", | ||
| "Programming Language :: Python :: 3.14", | ||
| "Typing :: Typed", | ||
| ] | ||
| dependencies = [ | ||
| "typing_extensions>=4.13", | ||
| ] | ||
|
|
||
| [project.optional-dependencies] | ||
| test = ["pytest"] | ||
|
|
||
| [tool.hatch.build.targets.wheel] | ||
| packages = ["src/zarr_metadata"] | ||
|
|
||
| [tool.numpydoc_validation] | ||
| checks = [ | ||
| "GL10", | ||
| "SS04", | ||
| "PR02", | ||
| "PR03", | ||
| "PR05", | ||
| "PR06", | ||
| ] | ||
|
|
||
| [tool.pyright] | ||
| include = ["src"] | ||
| enableExperimentalFeatures = true | ||
| typeCheckingMode = "strict" | ||
| pythonVersion = "3.11" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| from zarr_metadata.common import JSON, NamedConfig | ||
| from zarr_metadata.v2.array import ArrayMetadataV2 | ||
| from zarr_metadata.v2.group import GroupMetadataV2 | ||
| from zarr_metadata.v3.array import ArrayMetadataV3 | ||
| from zarr_metadata.v3.group import GroupMetadataV3 | ||
|
|
||
| ArrayMetadata = ArrayMetadataV2 | ArrayMetadataV3 | ||
| """Any Zarr array metadata document (v2 or v3).""" | ||
|
|
||
| GroupMetadata = GroupMetadataV2 | GroupMetadataV3 | ||
| """Any Zarr group metadata document (v2 or v3).""" | ||
|
|
||
|
|
||
| __all__ = [ | ||
| "JSON", | ||
| "ArrayMetadata", | ||
| "ArrayMetadataV2", | ||
| "ArrayMetadataV3", | ||
| "GroupMetadata", | ||
| "GroupMetadataV2", | ||
| "GroupMetadataV3", | ||
| "NamedConfig", | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| """ | ||
| Top-level cross-version primitives for Zarr metadata. | ||
|
|
||
| Version-specific types live under `zarr_metadata.v2` and `zarr_metadata.v3`. | ||
| Codec and dtype spec types live under `zarr_metadata.v3.codec` and | ||
| `zarr_metadata.v3.data_type`. | ||
| """ | ||
|
|
||
| from collections.abc import Mapping, Sequence | ||
| from typing import NotRequired, TypedDict | ||
|
|
||
| JSON = str | int | float | bool | Mapping[str, "JSON"] | Sequence["JSON"] | None | ||
| """Any valid JSON value.""" | ||
|
|
||
|
|
||
| class NamedConfig(TypedDict): | ||
| """ | ||
| Externally-tagged union member for a metadata field. | ||
|
|
||
| Generic with two parameters: name literal and configuration mapping. | ||
| """ | ||
|
|
||
| name: str | ||
| configuration: NotRequired[Mapping[str, JSON]] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| """Zarr v2 metadata types.""" | ||
|
|
||
| from zarr_metadata.v2.array import ArrayMetadataV2, DataTypeV2, DataTypeV2Structured | ||
| from zarr_metadata.v2.codec import NumcodecsConfig | ||
| from zarr_metadata.v2.consolidated import ConsolidatedMetadataV2 | ||
| from zarr_metadata.v2.group import GroupMetadataV2 | ||
|
|
||
| __all__ = [ | ||
| "ArrayMetadataV2", | ||
| "ConsolidatedMetadataV2", | ||
| "DataTypeV2", | ||
| "DataTypeV2Structured", | ||
| "GroupMetadataV2", | ||
| "NumcodecsConfig", | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,55 @@ | ||
| """Zarr v2 array metadata types.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| from typing import TYPE_CHECKING, Literal, NotRequired, TypedDict | ||
|
|
||
| if TYPE_CHECKING: | ||
| from zarr_metadata.common import JSON | ||
| from zarr_metadata.v2.codec import NumcodecsConfig | ||
|
|
||
|
|
||
| DataTypeV2Structured = tuple[str, str] | tuple[str, str, tuple[int, ...]] | ||
| """ | ||
| A single field entry inside a structured v2 dtype. | ||
|
|
||
| Spec-faithful: `datatype` is a numpy-style dtype string; `shape` is | ||
| present only when the field is a subarray field. | ||
|
|
||
| See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html#data-type-encoding | ||
| """ | ||
|
|
||
| DataTypeV2 = str | tuple[DataTypeV2Structured, ...] | ||
| """The v2 dtype representation. | ||
|
|
||
| Simple dtypes are numpy-style strings (e.g. `"<f8"`, `"|S10"`). | ||
| Structured dtypes are lists of field records. Endianness is encoded in the | ||
| prefix character of the string; parsing it out is a caller concern, not | ||
| part of this type. | ||
| """ | ||
|
|
||
|
|
||
| class ArrayMetadataV2(TypedDict): | ||
| """ | ||
| Zarr v2 array metadata document (the `.zarray` content). | ||
|
|
||
| See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html | ||
| """ | ||
|
|
||
| zarr_format: Literal[2] | ||
| shape: tuple[int, ...] | ||
| chunks: tuple[int, ...] | ||
| dtype: DataTypeV2 | ||
| compressor: NumcodecsConfig | None | ||
| fill_value: JSON | ||
| order: Literal["C", "F"] | ||
| filters: tuple[NumcodecsConfig, ...] | None | ||
| dimension_separator: NotRequired[Literal[".", "/"]] | ||
| attributes: JSON | ||
|
|
||
|
|
||
| __all__ = [ | ||
| "ArrayMetadataV2", | ||
| "DataTypeV2", | ||
| "DataTypeV2Structured", | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| """ | ||
| Zarr v2 codec configuration shape. | ||
|
|
||
| V2 compressors and filters are numcodecs configuration dicts: a required | ||
| `id` field naming the codec, plus arbitrary codec-specific extra fields. | ||
| """ | ||
|
|
||
| from typing_extensions import TypedDict | ||
|
|
||
| from zarr_metadata.common import JSON | ||
|
|
||
|
|
||
| class NumcodecsConfig(TypedDict, extra_items=JSON): # type: ignore[call-arg] | ||
| """ | ||
| A numcodecs configuration dict, used as a v2 compressor or filter. | ||
|
|
||
| The required `id` field names the codec; codec-specific parameters | ||
| (e.g. `cname`, `clevel` for blosc) appear as extra fields. | ||
|
|
||
| See the "compressor" and "filters" sections of | ||
| https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html | ||
| """ | ||
|
|
||
| id: str | ||
|
|
||
|
|
||
| __all__ = [ | ||
| "NumcodecsConfig", | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| """Zarr v2 consolidated metadata (`.zmetadata` file). | ||
|
|
||
| This module models the de-facto `.zmetadata` file used by the reference | ||
| Python implementation of Zarr v2. **This is NOT a spec artifact.** There | ||
| is no Zarr v2 specification that defines `.zmetadata`; it is a | ||
| canonical-implementation convention. | ||
| """ | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| from typing import TYPE_CHECKING, TypedDict | ||
|
|
||
| if TYPE_CHECKING: | ||
| from collections.abc import Mapping | ||
|
|
||
| from .array import ArrayMetadataV2 | ||
| from .group import GroupMetadataV2 | ||
|
|
||
|
|
||
| class ConsolidatedMetadataV2(TypedDict): | ||
| """ | ||
| `.zmetadata` file contents. | ||
|
|
||
| The `metadata` map uses flat path keys (`"foo/bar/.zarray"`, | ||
| `"foo/.zattrs"`, etc.) pointing to the JSON contents of the file at | ||
| that path. The keys include the filename suffix, not just the node path. | ||
| """ | ||
|
|
||
| zarr_consolidated_format: int | ||
| metadata: Mapping[str, GroupMetadataV2 | ArrayMetadataV2] | ||
|
|
||
|
|
||
| __all__ = [ | ||
| "ConsolidatedMetadataV2", | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| """Zarr v2 group metadata types.""" | ||
|
|
||
| from typing import Literal, TypedDict | ||
|
|
||
|
|
||
| class GroupMetadataV2(TypedDict): | ||
| """ | ||
| Zarr v2 group metadata document (the `.zgroup` content). | ||
|
|
||
| Attributes live in a sibling `.zattrs` file, so they are not part | ||
| of this dict. | ||
| """ | ||
|
|
||
| zarr_format: Literal[2] | ||
|
|
||
|
|
||
| __all__ = [ | ||
| "GroupMetadataV2", | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| """Zarr v3 metadata types.""" | ||
|
|
||
| from zarr_metadata.v3.array import AllowedExtraField, ArrayMetadataV3, MetadataField | ||
| from zarr_metadata.v3.consolidated import ConsolidatedMetadataV3 | ||
| from zarr_metadata.v3.group import GroupMetadataV3 | ||
|
|
||
| __all__ = [ | ||
| "AllowedExtraField", | ||
| "ArrayMetadataV3", | ||
| "ConsolidatedMetadataV3", | ||
| "GroupMetadataV3", | ||
| "MetadataField", | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| """Zarr v3 array metadata types.""" | ||
|
|
||
| from collections.abc import Mapping | ||
| from typing import Literal, NotRequired | ||
|
|
||
| from typing_extensions import TypedDict | ||
|
|
||
| from zarr_metadata.common import JSON, NamedConfig | ||
|
|
||
|
|
||
| class AllowedExtraField(TypedDict, extra_items=JSON): # type: ignore[call-arg] | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd slightly prefer |
||
| """ | ||
| Extra field on a v3 array metadata document. | ||
| Extras must include `must_understand: false` and may carry arbitrary | ||
| additional JSON data. | ||
| """ | ||
|
|
||
| must_understand: Literal[False] | ||
|
|
||
|
|
||
| MetadataField = str | NamedConfig | ||
| """A string or a {name: str, configuration: {...}} key value pair, where the 'configuration' key may be omitted. """ | ||
|
|
||
|
|
||
| class ArrayMetadataV3(TypedDict, extra_items=AllowedExtraField): # type: ignore[call-arg] | ||
| """ | ||
| Zarr v3 array metadata document (the `zarr.json` content for an array). | ||
| Extra keys are permitted if they conform to `AllowedExtraField`. | ||
| See https://zarr-specs.readthedocs.io/en/latest/v3/core/index.html#array-metadata | ||
| """ | ||
|
|
||
| zarr_format: Literal[3] | ||
| node_type: Literal["array"] | ||
| data_type: MetadataField | ||
| shape: tuple[int, ...] | ||
| chunk_grid: MetadataField | ||
| chunk_key_encoding: MetadataField | ||
| fill_value: JSON | ||
| codecs: tuple[MetadataField, ...] | ||
| attributes: NotRequired[Mapping[str, JSON]] | ||
| storage_transformers: NotRequired[tuple[MetadataField, ...]] | ||
|
Comment on lines
+37
to
+44
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there are more restrictions on data_type, chunk_grid, chunk_key_encoding, and codecs than implied by
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. which restrictions are you thinking of? per the spec they can all be strings or
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was thinking of strict type definitions for
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. such as? we don't know anything more about the
Comment on lines
+37
to
+44
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. storage_transformers should be empty array until one is defined as an extension
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why? |
||
| dimension_names: NotRequired[tuple[str | None, ...]] | ||
|
|
||
|
|
||
| __all__ = [ | ||
| "AllowedExtraField", | ||
| "ArrayMetadataV3", | ||
| "MetadataField", | ||
| ] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| """ | ||
| Zarr v3 chunk grid metadata types. | ||
|
|
||
| Each chunk grid lives in its own submodule: | ||
|
|
||
| - `regular` -- core v3 spec | ||
| - `rectilinear` -- zarr-extensions | ||
|
|
||
| See https://zarr-specs.readthedocs.io/en/latest/v3/core/index.html#chunk-grids | ||
| """ |
Uh oh!
There was an error while loading. Please reload this page.