From 1d3ecfeff24389fb1255f7ed8194895c70283028 Mon Sep 17 00:00:00 2001 From: Yuya Ebihara Date: Sun, 26 Apr 2026 11:37:47 +0900 Subject: [PATCH] Add support for 'overwrite' option in register_table --- mkdocs/docs/api.md | 10 +++++++++ pyiceberg/catalog/__init__.py | 3 ++- pyiceberg/catalog/bigquery_metastore.py | 6 +++++- pyiceberg/catalog/dynamodb.py | 3 ++- pyiceberg/catalog/glue.py | 6 +++++- pyiceberg/catalog/hive.py | 6 +++++- pyiceberg/catalog/noop.py | 3 ++- pyiceberg/catalog/rest/__init__.py | 5 ++++- pyiceberg/catalog/sql.py | 6 +++++- tests/catalog/test_rest.py | 27 +++++++++++++++++++++++++ 10 files changed, 67 insertions(+), 8 deletions(-) diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md index 65f91c9619..22d6b2e3c7 100644 --- a/mkdocs/docs/api.md +++ b/mkdocs/docs/api.md @@ -196,6 +196,16 @@ catalog.register_table( ) ``` +To overwrite a table using existing metadata: + +```python +catalog.register_table( + identifier="docs_example.bids", + metadata_location="s3://warehouse/path/to/metadata.json", + overwrite=True +) +``` + ## Load a table There are two ways of reading an Iceberg table; through a catalog, and by pointing at the Iceberg metadata directly. Reading through a catalog is preferred, and directly pointing at the metadata is read-only. diff --git a/pyiceberg/catalog/__init__.py b/pyiceberg/catalog/__init__.py index 5797e1f050..154118ba2c 100644 --- a/pyiceberg/catalog/__init__.py +++ b/pyiceberg/catalog/__init__.py @@ -493,12 +493,13 @@ def namespace_exists(self, namespace: str | Identifier) -> bool: """ @abstractmethod - def register_table(self, identifier: str | Identifier, metadata_location: str) -> Table: + def register_table(self, identifier: str | Identifier, metadata_location: str, overwrite: bool = False) -> Table: """Register a new table using existing metadata. Args: identifier (Union[str, Identifier]): Table identifier for the table metadata_location (str): The location to the metadata + overwrite (bool): Whether to overwrite the existing table, default False Returns: Table: The newly registered table diff --git a/pyiceberg/catalog/bigquery_metastore.py b/pyiceberg/catalog/bigquery_metastore.py index 8739e83969..6568e29aed 100644 --- a/pyiceberg/catalog/bigquery_metastore.py +++ b/pyiceberg/catalog/bigquery_metastore.py @@ -270,12 +270,13 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: datasets_iterator = self.client.list_datasets() return [(dataset.dataset_id,) for dataset in datasets_iterator] - def register_table(self, identifier: str | Identifier, metadata_location: str) -> Table: + def register_table(self, identifier: str | Identifier, metadata_location: str, overwrite: bool = False) -> Table: """Register a new table using existing metadata. Args: identifier (str | Identifier): Table identifier for the table metadata_location (str): The location to the metadata + overwrite (bool): Whether to overwrite the existing table, default False Returns: Table: The newly registered table @@ -283,6 +284,9 @@ def register_table(self, identifier: str | Identifier, metadata_location: str) - Raises: TableAlreadyExistsError: If the table already exists """ + if overwrite: + raise NotImplementedError("`overwrite` isn't supported") + dataset_name, table_name = self.identifier_to_database_and_table(identifier) dataset_ref = DatasetReference(project=self.project_id, dataset_id=dataset_name) diff --git a/pyiceberg/catalog/dynamodb.py b/pyiceberg/catalog/dynamodb.py index b36bce8c41..a4d900e160 100644 --- a/pyiceberg/catalog/dynamodb.py +++ b/pyiceberg/catalog/dynamodb.py @@ -210,12 +210,13 @@ def create_table( return self.load_table(identifier=identifier) - def register_table(self, identifier: str | Identifier, metadata_location: str) -> Table: + def register_table(self, identifier: str | Identifier, metadata_location: str, overwrite: bool = False) -> Table: """Register a new table using existing metadata. Args: identifier (Union[str, Identifier]): Table identifier for the table metadata_location (str): The location to the metadata + overwrite (bool): Whether to overwrite the existing table, default False Returns: Table: The newly registered table diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py index 83c06c3438..81c4c57d6a 100644 --- a/pyiceberg/catalog/glue.py +++ b/pyiceberg/catalog/glue.py @@ -601,12 +601,13 @@ def create_table( catalog=self, ) - def register_table(self, identifier: str | Identifier, metadata_location: str) -> Table: + def register_table(self, identifier: str | Identifier, metadata_location: str, overwrite: bool = False) -> Table: """Register a new table using existing metadata. Args: identifier (Union[str, Identifier]): Table identifier for the table metadata_location (str): The location to the metadata + overwrite (bool): Whether to overwrite the existing table, default False Returns: Table: The newly registered table @@ -614,6 +615,9 @@ def register_table(self, identifier: str | Identifier, metadata_location: str) - Raises: TableAlreadyExistsError: If the table already exists """ + if overwrite: + raise NotImplementedError("`overwrite` isn't supported") + database_name, table_name = self.identifier_to_database_and_table(identifier) properties = EMPTY_DICT io = self._load_file_io(location=metadata_location) diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index cc6aca2167..3453f9266a 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -446,12 +446,13 @@ def create_view( ) -> View: raise NotImplementedError - def register_table(self, identifier: str | Identifier, metadata_location: str) -> Table: + def register_table(self, identifier: str | Identifier, metadata_location: str, overwrite: bool = False) -> Table: """Register a new table using existing metadata. Args: identifier (Union[str, Identifier]): Table identifier for the table metadata_location (str): The location to the metadata + overwrite (bool): Whether to overwrite the existing table, default False Returns: Table: The newly registered table @@ -459,6 +460,9 @@ def register_table(self, identifier: str | Identifier, metadata_location: str) - Raises: TableAlreadyExistsError: If the table already exists """ + if overwrite: + raise NotImplementedError("`overwrite` isn't supported") + database_name, table_name = self.identifier_to_database_and_table(identifier) io = self._load_file_io(location=metadata_location) metadata_file = io.new_input(metadata_location) diff --git a/pyiceberg/catalog/noop.py b/pyiceberg/catalog/noop.py index 47a7d89740..cb714e1c5a 100644 --- a/pyiceberg/catalog/noop.py +++ b/pyiceberg/catalog/noop.py @@ -70,12 +70,13 @@ def load_table(self, identifier: str | Identifier) -> Table: def table_exists(self, identifier: str | Identifier) -> bool: raise NotImplementedError - def register_table(self, identifier: str | Identifier, metadata_location: str) -> Table: + def register_table(self, identifier: str | Identifier, metadata_location: str, overwrite: bool = False) -> Table: """Register a new table using existing metadata. Args: identifier (Union[str, Identifier]): Table identifier for the table metadata_location (str): The location to the metadata + overwrite (bool): Whether to overwrite the existing table, default False Returns: Table: The newly registered table diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index d06fd3885b..ca0ff75e8c 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -316,6 +316,7 @@ def transform_properties_dict_value_to_str(cls, properties: Properties) -> dict[ class RegisterTableRequest(IcebergBaseModel): name: str metadata_location: str = Field(..., alias="metadata-location") + overwrite: bool class ConfigResponse(IcebergBaseModel): @@ -976,12 +977,13 @@ def create_view( return self._response_to_view(self.identifier_to_tuple(identifier), view_response) @retry(**_RETRY_ARGS) - def register_table(self, identifier: str | Identifier, metadata_location: str) -> Table: + def register_table(self, identifier: str | Identifier, metadata_location: str, overwrite: bool = False) -> Table: """Register a new table using existing metadata. Args: identifier (Union[str, Identifier]): Table identifier for the table metadata_location (str): The location to the metadata + overwrite (bool): Whether to overwrite the existing table, default False Returns: Table: The newly registered table @@ -994,6 +996,7 @@ def register_table(self, identifier: str | Identifier, metadata_location: str) - request = RegisterTableRequest( name=self._identifier_to_validated_tuple(identifier)[-1], metadata_location=metadata_location, + overwrite=overwrite, ) serialized_json = request.model_dump_json().encode(UTF8) response = self._session.post( diff --git a/pyiceberg/catalog/sql.py b/pyiceberg/catalog/sql.py index e18a0598b9..a65b5f0d5d 100644 --- a/pyiceberg/catalog/sql.py +++ b/pyiceberg/catalog/sql.py @@ -237,12 +237,13 @@ def create_table( return self.load_table(identifier=identifier) - def register_table(self, identifier: str | Identifier, metadata_location: str) -> Table: + def register_table(self, identifier: str | Identifier, metadata_location: str, overwrite: bool = False) -> Table: """Register a new table using existing metadata. Args: identifier (Union[str, Identifier]): Table identifier for the table metadata_location (str): The location to the metadata + overwrite (bool): Whether to overwrite the existing table, default False Returns: Table: The newly registered table @@ -251,6 +252,9 @@ def register_table(self, identifier: str | Identifier, metadata_location: str) - TableAlreadyExistsError: If the table already exists NoSuchNamespaceError: If namespace does not exist """ + if overwrite: + raise NotImplementedError("`overwrite` isn't supported") + namespace_tuple = Catalog.namespace_from(identifier) namespace = Catalog.namespace_to_string(namespace_tuple) table_name = Catalog.table_name_from(identifier) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index aa9a467381..7977892635 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -1583,6 +1583,33 @@ def test_register_table_409(rest_mock: Mocker, table_schema_simple: Schema) -> N assert "Table already exists" in str(e.value) +def test_register_table_overwrite( + rest_mock: Mocker, table_schema_simple: Schema, example_table_metadata_no_snapshot_v1_rest_json: dict[str, Any] +) -> None: + rest_mock.post( + f"{TEST_URI}v1/namespaces/default/register", + json=example_table_metadata_no_snapshot_v1_rest_json, + status_code=200, + request_headers=TEST_HEADERS, + ) + catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN) + actual = catalog.register_table( + identifier=("default", "registered_table"), + metadata_location="s3://warehouse/database/table/metadata.json", + overwrite=True, + ) + expected = Table( + identifier=("default", "registered_table"), + metadata_location=example_table_metadata_no_snapshot_v1_rest_json["metadata-location"], + metadata=TableMetadataV1(**example_table_metadata_no_snapshot_v1_rest_json["metadata"]), + io=load_file_io(), + catalog=catalog, + ) + assert actual.metadata.model_dump() == expected.metadata.model_dump() + assert actual.metadata_location == expected.metadata_location + assert actual.name() == expected.name() + + def test_delete_namespace_204(rest_mock: Mocker) -> None: namespace = "example" rest_mock.delete(