diff --git a/CHANGELOG.md b/CHANGELOG.md index 847470b..3919593 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,15 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased - proposed 1.0.0?] + +### Added +- `output_dir` parameter to `write_search_results_to_file`, defaulting to `./tmp` +- `fetch_file` now defaults `output_dir` to `./tmp` when not supplied + +### Removed +- **BREAKING**: `default_storage_dir` constructor parameter removed from `TINDClient`; pass `output_dir` directly to `fetch_file` and `write_search_results_to_file` instead + ## [0.2.2] ### Changed diff --git a/README.md b/README.md index 5926422..d8895a8 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,6 @@ Create a `TINDClient` with optional configuration values: - `api_key` (optional): Your TIND API token. Falls back to the `TIND_API_KEY` environment variable. - `api_url` (optional): Base URL of the TIND instance (e.g. `https://tind.example.edu`). Falls back to the `TIND_API_URL` environment variable. -- `default_storage_dir` (optional): Default output directory for downloaded files. Defaults to `./tmp`. ## Usage @@ -43,7 +42,6 @@ from tind_client import TINDClient client = TINDClient( api_key="your-token", api_url="https://tind.example.edu", - default_storage_dir="/tmp", ) ``` @@ -79,8 +77,8 @@ records = client.fetch_search_metadata("collection:'Disabled Students Program Ph xml_results = client.search("collection:'Disabled Students Program Photos'", result_format="xml") pymarc_results = client.search("collection:'Disabled Students Program Photos'", result_format="pymarc") -# search Tind with a query and write results to an XML file in the default storage directory -records_written = client.write_search_results_to_file("Old Emperor Norton", "full_norton_results.xml") +# search Tind with a query and write results to an XML file +records_written = client.write_search_results_to_file("Old Emperor Norton", "full_norton_results.xml", output_dir="/data") ``` ## Running tests diff --git a/tests/conftest.py b/tests/conftest.py index 06c5d94..eeb90f0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,7 +24,6 @@ def tind_env(monkeypatch: pytest.MonkeyPatch) -> None: """Set required TIND environment variables for a test.""" monkeypatch.setenv("TIND_API_KEY", "test-api-key") monkeypatch.setenv("TIND_API_URL", "https://tind.example.edu") - monkeypatch.setenv("DEFAULT_STORAGE_DIR", "/tmp") @pytest.fixture @@ -39,5 +38,4 @@ def client() -> TINDClient: return TINDClient( api_key="test-api-key", api_url="https://tind.example.edu", - default_storage_dir="/tmp", ) diff --git a/tests/test_fetch.py b/tests/test_fetch.py index 768725b..18bf90a 100644 --- a/tests/test_fetch.py +++ b/tests/test_fetch.py @@ -186,13 +186,12 @@ def test_write_search_results_to_file_zero_hits( tmp_path: Path, ) -> None: """write_search_results_to_file returns 0 immediately when the query has no hits.""" - client.default_storage_dir = str(tmp_path) requests_mock.get( f"{BASE_URL}/search", text=json.dumps({"hits": []}), status_code=200, ) - assert client.write_search_results_to_file("collection:'empty'") == 0 + assert client.write_search_results_to_file("collection:'empty'", output_dir=str(tmp_path)) == 0 assert not (tmp_path / "tind.xml").exists() @@ -202,7 +201,6 @@ def test_write_search_results_to_file_success( tmp_path: Path, ) -> None: """write_search_results_to_file writes 3 records and returns 3.""" - client.default_storage_dir = str(tmp_path) requests_mock.get( f"{BASE_URL}/search", response_list=[ @@ -214,7 +212,9 @@ def test_write_search_results_to_file_success( {"text": (FIXTURES / "end-of-batch-tind-response.xml").read_text(), "status_code": 200}, ], ) - count = client.write_search_results_to_file("collection:'test'", "out.xml") + count = client.write_search_results_to_file( + "collection:'test'", "out.xml", output_dir=str(tmp_path) + ) assert count == 3 marc21_ns = "http://www.loc.gov/MARC21/slim" @@ -233,7 +233,6 @@ def test_write_search_results_to_file_matched_but_no_records_returned( tmp_path: Path, ) -> None: """write_search_results_to_file raises TINDError when API returns no records for matched IDs""" - client.default_storage_dir = str(tmp_path) requests_mock.get( f"{BASE_URL}/search", response_list=[ @@ -244,7 +243,9 @@ def test_write_search_results_to_file_matched_but_no_records_returned( ], ) with pytest.raises(TINDError, match="API did not return any."): - client.write_search_results_to_file("collection:'test'", "mismatch.xml") + client.write_search_results_to_file( + "collection:'test'", "mismatch.xml", output_dir=str(tmp_path) + ) def test_write_search_results_to_file_matched_but_api_mismatch( @@ -253,7 +254,6 @@ def test_write_search_results_to_file_matched_but_api_mismatch( tmp_path: Path, ) -> None: """write_search_results_to_file raises TINDError when streamed record count != ID count.""" - client.default_storage_dir = str(tmp_path) requests_mock.get( f"{BASE_URL}/search", response_list=[ @@ -269,7 +269,9 @@ def test_write_search_results_to_file_matched_but_api_mismatch( ], ) with pytest.raises(TINDError, match="Expected 4 records"): - client.write_search_results_to_file("collection:'test'", "mismatch.xml") + client.write_search_results_to_file( + "collection:'test'", "mismatch.xml", output_dir=str(tmp_path) + ) def test_write_search_results_to_file_malformed_xml_response( @@ -278,7 +280,6 @@ def test_write_search_results_to_file_malformed_xml_response( tmp_path: Path, ) -> None: """write_search_results_to_file raises TINDError when the API returns malformed XML.""" - client.default_storage_dir = str(tmp_path) requests_mock.get( f"{BASE_URL}/search", response_list=[ @@ -287,4 +288,6 @@ def test_write_search_results_to_file_malformed_xml_response( ], ) with pytest.raises(TINDError, match="Failed to parse"): - client.write_search_results_to_file("collection:'test'", "malformed.xml") + client.write_search_results_to_file( + "collection:'test'", "malformed.xml", output_dir=str(tmp_path) + ) diff --git a/tind_client/client.py b/tind_client/client.py index f044520..c7001fa 100644 --- a/tind_client/client.py +++ b/tind_client/client.py @@ -29,19 +29,15 @@ class TINDClient: :param str api_key: Your TIND API token. :param str api_url: Base URL of the TIND instance, e.g. ``https://tind.example.edu``. - :param str default_storage_dir: Default directory used by :meth:`fetch_file` - when no ``output_dir`` is supplied. """ def __init__( self, api_key: str = "", api_url: str = "", - default_storage_dir: str = "./tmp", ) -> None: self.api_key = api_key or os.environ.get("TIND_API_KEY", "") self.api_url = api_url or os.environ.get("TIND_API_URL", "") - self.default_storage_dir = default_storage_dir def fetch_metadata(self, record: str) -> Record: """Fetch the MARC XML metadata for a given record. @@ -69,12 +65,12 @@ def fetch_metadata(self, record: str) -> Record: return records[0] - def fetch_file(self, file_url: str, output_dir: str = "") -> str: + def fetch_file(self, file_url: str, output_dir: str = "./tmp") -> str: """Download a file from TIND and save it locally. :param str file_url: The TIND file download URL. :param str output_dir: Directory in which to save the file. - Falls back to ``default_storage_dir`` when empty. + Defaults to ``./tmp``. :raises AuthorizationError: When the TIND API key is invalid or the file is restricted. :raises ValueError: When ``file_url`` is not a valid TIND file download URL. :raises RecordNotFoundError: When the file is invalid or not found. @@ -83,8 +79,7 @@ def fetch_file(self, file_url: str, output_dir: str = "") -> str: if not re.match(r"^http.*/download(/)?(\?version=\d+)?$", file_url): raise ValueError("URL is not a valid TIND file download URL.") - output_target = output_dir or self.default_storage_dir - (status, saved_to) = tind_download(file_url, output_dir=output_target, api_key=self.api_key) + (status, saved_to) = tind_download(file_url, output_dir=output_dir, api_key=self.api_key) if status != 200: raise RecordNotFoundError("Referenced file could not be downloaded.") @@ -178,12 +173,13 @@ def search(self, query: str, result_format: str = "xml") -> list[Any]: return recs def write_search_results_to_file( - self, query: str = "", output_file_name: str = "tind.xml" + self, query: str = "", output_file_name: str = "tind.xml", output_dir: str = "./tmp" ) -> int: """Search TIND and stream results to an XML file. :param str query: A TIND search query string. :param str output_file_name: filename for the output XML file. + :param str output_dir: Directory in which to save the file. Defaults to ``./tmp``. :returns int: The number of records written to the file. """ @@ -192,7 +188,7 @@ def write_search_results_to_file( return 0 recs_written = 0 - output_path = os.path.join(self.default_storage_dir, output_file_name) + output_path = Path(output_dir) / output_file_name try: with open(output_path, "w", encoding="utf-8") as f: f.write(f'\n\n') @@ -206,7 +202,7 @@ def write_search_results_to_file( raise TINDError(f"Matched {total_hits} tind ids, but API did not return any.") f.write("\n") except Exception: - Path(output_path).unlink(missing_ok=True) + output_path.unlink(missing_ok=True) raise if recs_written != total_hits: