From 16099ab3f1584955b444a508dca474626ff3d3c9 Mon Sep 17 00:00:00 2001 From: jdsika Date: Thu, 2 Apr 2026 17:03:54 +0200 Subject: [PATCH 1/3] fix(generators): add --xsd-anyuri-as-iri flag for cross-generator IRI consistency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JSON-LD processors treat xsd:anyURI as an opaque string literal, so range:uri/uriorcurie slots get xsd:anyURI coercion instead of proper IRI node semantics (@type:@id, owl:ObjectProperty, sh:IRI). Add an opt-in --xsd-anyuri-as-iri flag that promotes xsd:anyURI ranges to IRI semantics across all three generators: - JSON-LD context: @type: xsd:anyURI → @type: @id - OWL: DatatypeProperty → ObjectProperty (no rdfs:range restriction) - SHACL: sh:datatype xsd:anyURI → sh:nodeKind sh:IRI The flag only affects types whose XSD mapping is xsd:anyURI (uri and uriorcurie). The curie type (xsd:string) is correctly excluded via is_xsd_anyuri_range() to maintain cross-generator consistency. Standards basis: - OWL 2 §5.3-5.4 (ObjectProperty vs DatatypeProperty) - SHACL §4.8.1 (sh:nodeKind sh:IRI) - JSON-LD 1.1 §4.2.2 (type coercion with @id) - RDF 1.1 §3.2-3.3 (IRIs as first-class nodes, not string literals) Signed-off-by: jdsika --- .../linkml/generators/common/subproperty.py | 33 ++ .../src/linkml/generators/jsonldcontextgen.py | 22 +- .../linkml/src/linkml/generators/owlgen.py | 49 ++- .../test_generators/test_jsonldcontextgen.py | 372 ++++++++++++++++++ 4 files changed, 468 insertions(+), 8 deletions(-) diff --git a/packages/linkml/src/linkml/generators/common/subproperty.py b/packages/linkml/src/linkml/generators/common/subproperty.py index 4687c38218..9b136e2429 100644 --- a/packages/linkml/src/linkml/generators/common/subproperty.py +++ b/packages/linkml/src/linkml/generators/common/subproperty.py @@ -15,6 +15,10 @@ CURIE_TYPES: frozenset[str] = frozenset({"uriorcurie", "curie"}) URI_TYPES: frozenset[str] = frozenset({"uri"}) +# Types whose XSD mapping is xsd:anyURI (not xsd:string). +# ``curie`` maps to xsd:string and is deliberately excluded. +_ANYURI_TYPES: frozenset[str] = frozenset({"uri", "uriorcurie"}) + def is_uri_range(sv: SchemaView, range_type: str | None) -> bool: """ @@ -63,6 +67,35 @@ def is_curie_range(sv: SchemaView, range_type: str | None) -> bool: return False +def is_xsd_anyuri_range(sv: SchemaView, range_type: str | None) -> bool: + """Check if range type resolves to ``xsd:anyURI``. + + Returns True for ``uri``, ``uriorcurie``, and types that inherit from them. + Returns False for ``curie`` (which maps to ``xsd:string``). + + This is the correct predicate for the ``--xsd-anyuri-as-iri`` flag: only + types whose XSD representation is ``xsd:anyURI`` should be promoted from + literal to IRI semantics. ``curie`` is a compact string representation + that resolves to ``xsd:string`` and must not be affected. + + :param sv: SchemaView for type ancestry lookup + :param range_type: The range type to check + :return: True if range type maps to xsd:anyURI + """ + if range_type is None: + return False + + if range_type in _ANYURI_TYPES: + return True + + if range_type in sv.all_types(): + type_ancestors = set(sv.type_ancestors(range_type)) + if type_ancestors & _ANYURI_TYPES: + return True + + return False + + def format_slot_value_for_range(sv: SchemaView, slot_name: str, range_type: str | None) -> str: """ Format slot value according to the declared range type. diff --git a/packages/linkml/src/linkml/generators/jsonldcontextgen.py b/packages/linkml/src/linkml/generators/jsonldcontextgen.py index 5298a602f9..3c342f9de8 100644 --- a/packages/linkml/src/linkml/generators/jsonldcontextgen.py +++ b/packages/linkml/src/linkml/generators/jsonldcontextgen.py @@ -23,6 +23,10 @@ URI_RANGES = (SHEX.nonliteral, SHEX.bnode, SHEX.iri) +# Extended URI_RANGES that also treats xsd:anyURI as an IRI reference (@id) +# rather than a typed literal. Opt-in via --xsd-anyuri-as-iri flag. +URI_RANGES_WITH_XSD = (*URI_RANGES, XSD.anyURI) + ENUM_CONTEXT = { "text": "skos:notation", "description": "skos:prefLabel", @@ -72,6 +76,12 @@ class ContextGenerator(Generator): _local_slots: set | None = field(default=None, repr=False) _external_classes: set | None = field(default=None, repr=False) _external_slots: set | None = field(default=None, repr=False) + xsd_anyuri_as_iri: bool = False + """Map xsd:anyURI-typed ranges (uri, uriorcurie) to ``@type: @id`` instead of ``@type: xsd:anyURI``. + + This aligns the JSON-LD context with the SHACL generator, which emits + ``sh:nodeKind sh:IRI`` for the same types. + """ # Framing (opt-in via CLI flag) emit_frame: bool = False @@ -263,6 +273,7 @@ def _literal_coercion_for_ranges(self, ranges: list[str]) -> tuple[bool, str | N and "could not resolve safely because the branches disagree". """ coercions: set[str | None] = set() + uri_ranges = URI_RANGES_WITH_XSD if self.xsd_anyuri_as_iri else URI_RANGES for range_name in ranges: if range_name not in self.schema.types: continue @@ -271,7 +282,7 @@ def _literal_coercion_for_ranges(self, ranges: list[str]) -> tuple[bool, str | N range_uri = self.namespaces.uri_for(range_type.uri) if range_uri == XSD.string: coercions.add(None) - elif range_uri in URI_RANGES: + elif range_uri in uri_ranges: coercions.add("@id") else: coercions.add(range_type.uri) @@ -316,9 +327,10 @@ def visit_slot(self, aliased_slot_name: str, slot: SlotDefinition) -> None: self.emit_prefixes.add(skos) else: range_type = self.schema.types[slot.range] + uri_ranges = URI_RANGES_WITH_XSD if self.xsd_anyuri_as_iri else URI_RANGES if self.namespaces.uri_for(range_type.uri) == XSD.string: pass - elif self.namespaces.uri_for(range_type.uri) in URI_RANGES: + elif self.namespaces.uri_for(range_type.uri) in uri_ranges: slot_def["@type"] = "@id" else: slot_def["@type"] = range_type.uri @@ -438,6 +450,12 @@ def serialize( help="Exclude elements from URL-based external vocabulary imports while keeping local file imports. " "Useful when extending ontologies (e.g. W3C VC v2) whose terms are @protected in their own JSON-LD context.", ) +@click.option( + "--xsd-anyuri-as-iri/--no-xsd-anyuri-as-iri", + default=False, + show_default=True, + help="Map xsd:anyURI-typed ranges (uri, uriorcurie) to @type: @id instead of @type: xsd:anyURI.", +) @click.version_option(__version__, "-V", "--version") def cli(yamlfile, emit_frame, embed_context_in_frame, output, **args): """Generate jsonld @context definition from LinkML model""" diff --git a/packages/linkml/src/linkml/generators/owlgen.py b/packages/linkml/src/linkml/generators/owlgen.py index 3688cc4453..38f47823bf 100644 --- a/packages/linkml/src/linkml/generators/owlgen.py +++ b/packages/linkml/src/linkml/generators/owlgen.py @@ -19,6 +19,7 @@ from linkml import METAMODEL_NAMESPACE_NAME from linkml._version import __version__ +from linkml.generators.common.subproperty import is_xsd_anyuri_range from linkml.utils.deprecation import deprecation_warning from linkml.utils.generator import Generator, shared_arguments from linkml_runtime import SchemaView @@ -215,6 +216,24 @@ def _present(values: Iterable[_T | None]) -> list[_T]: return [value for value in values if value is not None] + xsd_anyuri_as_iri: bool = False + """Treat ``range: uri`` / ``range: uriorcurie`` slots as ``owl:ObjectProperty`` + instead of ``owl:DatatypeProperty`` with ``rdfs:range xsd:anyURI``. + + This aligns the OWL output with the SHACL generator (which emits + ``sh:nodeKind sh:IRI``) and the JSON-LD context generator (which emits + ``@type: @id`` when its own ``--xsd-anyuri-as-iri`` flag is set). + + Without this flag, ``range: uri`` produces a semantic inconsistency: + OWL says the value is a literal (``DatatypeProperty``), while SHACL and + JSON-LD say it is an IRI node. Enabling the flag makes all three + generators consistent. + + When enabled, URI-range slots: + - become ``owl:ObjectProperty`` (not ``owl:DatatypeProperty``) + - have no ``rdfs:range`` restriction (any IRI is valid) + """ + def as_graph(self) -> Graph: """ Generate an rdflib Graph from the LinkML schema. @@ -785,10 +804,14 @@ def _get_slot_nodes( this_owl_types: set[OWL_TYPE] = set() if slot_range: if slot_range in sv.all_types(imports=True): - self.slot_is_literal_map[main_slot.name].add(True) - this_owl_types.add(RDFS.Literal) - typ = sv.get_type(slot_range) - owl_exprs.append(self._type_uri(typ.name)) + if self.xsd_anyuri_as_iri and is_xsd_anyuri_range(sv, slot_range): + self.slot_is_literal_map[main_slot.name].add(False) + this_owl_types.add(OWL.Thing) + else: + self.slot_is_literal_map[main_slot.name].add(True) + this_owl_types.add(RDFS.Literal) + typ = sv.get_type(slot_range) + owl_exprs.append(self._type_uri(typ.name)) elif slot_range in sv.all_enums(imports=True): # TODO: enums fill this in owl_exprs.append(self._enum_uri(EnumDefinitionName(slot_range))) @@ -1388,8 +1411,9 @@ def _boolean_expression( def _range_is_datatype(self, slot: SlotDefinition) -> bool: if self.type_objects: return False - else: - return slot.range in self.schema.types + if self.xsd_anyuri_as_iri and is_xsd_anyuri_range(self.schemaview, slot.range): + return False + return slot.range in self.schema.types def _range_uri(self, slot: SlotDefinition) -> URIRef: if slot.range in self.schema.types: @@ -1508,6 +1532,8 @@ def slot_owl_type(self, slot: SlotDefinition) -> URIRef: elif slot_range in sv.all_enums(): return OWL.ObjectProperty elif slot_range in sv.all_types(): + if self.xsd_anyuri_as_iri and is_xsd_anyuri_range(sv, slot_range): + return OWL.ObjectProperty return OWL.DatatypeProperty else: raise Exception(f"Unknown range: {slot.range}") @@ -1630,6 +1656,17 @@ def slot_owl_type(self, slot: SlotDefinition) -> URIRef: "By default such axioms are emitted for every abstract class that has direct is_a children." ), ) +@click.option( + "--xsd-anyuri-as-iri/--no-xsd-anyuri-as-iri", + default=False, + show_default=True, + help=( + "Treat range: uri / range: uriorcurie slots as owl:ObjectProperty (IRI node) " + "instead of owl:DatatypeProperty with rdfs:range xsd:anyURI (literal). " + "Aligns OWL output with the SHACL generator (sh:nodeKind sh:IRI) and " + "the JSON-LD context generator (--xsd-anyuri-as-iri → @type: @id)." + ), +) @click.version_option(__version__, "-V", "--version") def cli(yamlfile: str, metadata_profile: str, **kwargs: Any) -> None: """Generate an OWL representation of a LinkML model diff --git a/tests/linkml/test_generators/test_jsonldcontextgen.py b/tests/linkml/test_generators/test_jsonldcontextgen.py index ff5b75e662..10123e58ad 100644 --- a/tests/linkml/test_generators/test_jsonldcontextgen.py +++ b/tests/linkml/test_generators/test_jsonldcontextgen.py @@ -862,3 +862,375 @@ def test_exclude_external_imports_works_with_mergeimports_false(tmp_path): # External vocabulary terms must be excluded assert "issuer" not in ctx, "External slot 'issuer' should be excluded with mergeimports=False" assert "ExternalCredential" not in ctx, "External class should be excluded with mergeimports=False" + + +def test_xsd_anyuri_as_iri_flag(): + """Test that --xsd-anyuri-as-iri maps uri ranges to @type: @id. + + By default, ``range: uri`` (type_uri ``xsd:anyURI``) produces + ``@type: xsd:anyURI`` (typed literal). With ``xsd_anyuri_as_iri=True``, + it produces ``@type: @id`` (IRI node reference), aligning the JSON-LD + context with the SHACL generator which already emits ``sh:nodeKind sh:IRI`` + for the same type. + + See: + - W3C SHACL §4.8.1 sh:nodeKind (https://www.w3.org/TR/shacl/#NodeKindConstraintComponent) + - JSON-LD 1.1 §4.2.2 Type Coercion (https://www.w3.org/TR/json-ld11/#type-coercion) + - RDF 1.1 §3.3 Literals vs §3.2 IRIs (https://www.w3.org/TR/rdf11-concepts/) + """ + schema_yaml = """ +id: https://example.org/test-uri-context +name: test_uri_context + +prefixes: + ex: https://example.org/ + linkml: https://w3id.org/linkml/ + +imports: + - linkml:types + +default_prefix: ex +default_range: string + +slots: + homepage: + range: uri + slot_uri: ex:homepage + node_ref: + range: nodeidentifier + slot_uri: ex:nodeRef + name: + range: string + slot_uri: ex:name + +classes: + Thing: + slots: + - homepage + - node_ref + - name +""" + # Default behaviour: uri → xsd:anyURI (backward compatible) + ctx_default = json.loads(ContextGenerator(schema_yaml).serialize())["@context"] + assert ctx_default["homepage"]["@type"] == "xsd:anyURI" + + # Opt-in: uri → @id (aligned with SHACL sh:nodeKind sh:IRI) + ctx_iri = json.loads(ContextGenerator(schema_yaml, xsd_anyuri_as_iri=True).serialize())["@context"] + assert ctx_iri["homepage"]["@type"] == "@id", ( + f"Expected @type: @id for uri range with xsd_anyuri_as_iri=True, got {ctx_iri['homepage'].get('@type')}" + ) + + # nodeidentifier is unaffected by the flag (not xsd:anyURI-typed) + # Its default @type depends on URI_RANGES matching shex:nonLiteral; + # we only verify the flag doesn't change its behaviour. + assert ctx_default["node_ref"]["@type"] == ctx_iri["node_ref"]["@type"] + + # string → no @type regardless of flag + assert "@type" not in ctx_default.get("name", {}) + assert "@type" not in ctx_iri.get("name", {}) + + +def test_xsd_anyuri_as_iri_with_any_of(): + """The --xsd-anyuri-as-iri flag must also apply to ``any_of`` slots + whose type branches include ``uri`` mixed with class ranges. + + ``_literal_coercion_for_ranges`` resolves mixed any_of type branches + and must use the extended URI_RANGES when the flag is active. + """ + schema_yaml = """ +id: https://example.org/test-anyof-uri +name: test_anyof_uri + +prefixes: + ex: https://example.org/ + linkml: https://w3id.org/linkml/ + +imports: + - linkml:types + +default_prefix: ex +default_range: string + +classes: + Container: + slots: + - mixed_slot + Target: + class_uri: ex:Target + +slots: + mixed_slot: + slot_uri: ex:mixed + any_of: + - range: Target + - range: uri +""" + # Default: mixed class+uri any_of — uri resolves to xsd:anyURI literal, + # which disagrees with @id from the class branch → no coercion emitted + ctx_default = json.loads(ContextGenerator(schema_yaml).serialize())["@context"] + default_type = ctx_default.get("mixed_slot", {}).get("@type") + assert default_type != "@id", f"Without flag, mixed any_of should not resolve to @id, got {default_type}" + + # With flag: uri branch now also resolves to @id, matching the class branch + # → all branches agree → @id is emitted + ctx_iri = json.loads(ContextGenerator(schema_yaml, xsd_anyuri_as_iri=True).serialize())["@context"] + assert ctx_iri["mixed_slot"]["@type"] == "@id", ( + f"Expected @id for mixed any_of with flag, got {ctx_iri.get('mixed_slot', {}).get('@type')}" + ) + + +def test_xsd_anyuri_as_iri_owl(): + """OWL generator must produce owl:ObjectProperty for uri ranges when flag is set. + + Without the flag, ``range: uri`` produces ``owl:DatatypeProperty`` with + ``rdfs:range xsd:anyURI``. With ``xsd_anyuri_as_iri=True``, it should + produce ``owl:ObjectProperty`` (no rdfs:range restriction), aligning + with the SHACL generator's ``sh:nodeKind sh:IRI``. + """ + from rdflib import OWL, RDF, URIRef + + from linkml.generators.owlgen import OwlSchemaGenerator + + schema_yaml = """ +id: https://example.org/test-owl-uri +name: test_owl_uri +prefixes: + ex: https://example.org/ + linkml: https://w3id.org/linkml/ +imports: + - linkml:types +default_prefix: ex +default_range: string +slots: + homepage: + range: uri + slot_uri: ex:homepage + name: + range: string + slot_uri: ex:name +classes: + Thing: + slots: + - homepage + - name +""" + # Default: uri → DatatypeProperty (must disable type_objects which + # unconditionally returns ObjectProperty for all type-ranged slots) + gen_default = OwlSchemaGenerator(schema_yaml, type_objects=False) + g_default = gen_default.as_graph() + homepage_uri = URIRef("https://example.org/homepage") + default_rdf_type = set(g_default.objects(homepage_uri, RDF.type)) + assert OWL.DatatypeProperty in default_rdf_type, ( + f"Without flag, homepage should be DatatypeProperty, got {default_rdf_type}" + ) + + # With flag: uri → ObjectProperty + gen_iri = OwlSchemaGenerator(schema_yaml, xsd_anyuri_as_iri=True, type_objects=False) + g_iri = gen_iri.as_graph() + iri_rdf_type = set(g_iri.objects(homepage_uri, RDF.type)) + assert OWL.ObjectProperty in iri_rdf_type, f"With flag, homepage should be ObjectProperty, got {iri_rdf_type}" + assert OWL.DatatypeProperty not in iri_rdf_type, ( + f"With flag, homepage should NOT be DatatypeProperty, got {iri_rdf_type}" + ) + + # String slot must remain DatatypeProperty regardless of flag + name_uri = URIRef("https://example.org/name") + name_rdf_type = set(g_iri.objects(name_uri, RDF.type)) + assert OWL.DatatypeProperty in name_rdf_type, f"String slot should remain DatatypeProperty, got {name_rdf_type}" + + +def test_xsd_anyuri_as_iri_uriorcurie_range(): + """``uriorcurie`` also maps to ``xsd:anyURI`` and must behave identically + to ``uri`` when the ``--xsd-anyuri-as-iri`` flag is active. + + This is a high-priority coverage gap: ``uriorcurie`` is distinct from + ``uri`` at the LinkML level but shares the same XSD type. + """ + schema_yaml = """ +id: https://example.org/test-uriorcurie +name: test_uriorcurie + +prefixes: + ex: https://example.org/ + linkml: https://w3id.org/linkml/ + +imports: + - linkml:types + +default_prefix: ex +default_range: string + +slots: + reference: + range: uriorcurie + slot_uri: ex:reference + homepage: + range: uri + slot_uri: ex:homepage + +classes: + Thing: + slots: + - reference + - homepage +""" + ctx_default = json.loads(ContextGenerator(schema_yaml).serialize())["@context"] + assert ctx_default["reference"]["@type"] == "xsd:anyURI" + assert ctx_default["homepage"]["@type"] == "xsd:anyURI" + + ctx_iri = json.loads(ContextGenerator(schema_yaml, xsd_anyuri_as_iri=True).serialize())["@context"] + assert ctx_iri["reference"]["@type"] == "@id", "uriorcurie should map to @id with xsd_anyuri_as_iri=True" + assert ctx_iri["homepage"]["@type"] == "@id", "uri should map to @id with xsd_anyuri_as_iri=True" + + +def test_xsd_anyuri_as_iri_curie_range_unchanged(): + """``curie`` maps to ``xsd:string`` (not ``xsd:anyURI``), so the flag + must NOT affect its coercion. + + This documents the cross-type boundary: ``uri`` and ``uriorcurie`` + share ``xsd:anyURI``, but ``curie`` uses ``xsd:string``. + """ + schema_yaml = """ +id: https://example.org/test-curie +name: test_curie + +prefixes: + ex: https://example.org/ + linkml: https://w3id.org/linkml/ + +imports: + - linkml:types + +default_prefix: ex +default_range: string + +slots: + curie_slot: + range: curie + slot_uri: ex:curieSlot + uri_slot: + range: uri + slot_uri: ex:uriSlot + +classes: + Thing: + slots: + - curie_slot + - uri_slot +""" + ctx_default = json.loads(ContextGenerator(schema_yaml).serialize())["@context"] + ctx_iri = json.loads(ContextGenerator(schema_yaml, xsd_anyuri_as_iri=True).serialize())["@context"] + + # curie (xsd:string) must be unaffected by the flag + curie_default = ctx_default.get("curie_slot", {}).get("@type") + curie_iri = ctx_iri.get("curie_slot", {}).get("@type") + assert curie_default == curie_iri, f"curie coercion should not change with flag: {curie_default} vs {curie_iri}" + + # uri (xsd:anyURI) must change — sanity check + assert ctx_iri["uri_slot"]["@type"] == "@id" + + +def test_xsd_anyuri_as_iri_owl_curie_unchanged(): + """OWL generator must keep ``range: curie`` as DatatypeProperty even with flag. + + ``curie`` maps to ``xsd:string`` (not ``xsd:anyURI``), so the + ``--xsd-anyuri-as-iri`` flag must not promote it to ObjectProperty. + This verifies cross-generator consistency: the JSON-LD context generator + already correctly excludes ``curie`` via ``URI_RANGES_WITH_XSD``; the + OWL generator must match via ``is_xsd_anyuri_range()``. + """ + from rdflib import OWL, RDF, URIRef + + from linkml.generators.owlgen import OwlSchemaGenerator + + schema_yaml = """ +id: https://example.org/test-owl-curie +name: test_owl_curie +prefixes: + ex: https://example.org/ + linkml: https://w3id.org/linkml/ +imports: + - linkml:types +default_prefix: ex +default_range: string +slots: + compact_id: + range: curie + slot_uri: ex:compactId + homepage: + range: uri + slot_uri: ex:homepage +classes: + Thing: + slots: + - compact_id + - homepage +""" + compact_id_uri = URIRef("https://example.org/compact_id") + homepage_uri = URIRef("https://example.org/homepage") + + # With flag: curie must stay DatatypeProperty, uri must become ObjectProperty + gen = OwlSchemaGenerator(schema_yaml, xsd_anyuri_as_iri=True, type_objects=False) + g = gen.as_graph() + + curie_types = set(g.objects(compact_id_uri, RDF.type)) + assert OWL.DatatypeProperty in curie_types, f"curie slot must remain DatatypeProperty with flag, got {curie_types}" + assert OWL.ObjectProperty not in curie_types, ( + f"curie slot must NOT become ObjectProperty with flag, got {curie_types}" + ) + + # Sanity: uri must become ObjectProperty + uri_types = set(g.objects(homepage_uri, RDF.type)) + assert OWL.ObjectProperty in uri_types, f"uri slot should be ObjectProperty with flag, got {uri_types}" + + +def test_xsd_anyuri_as_iri_cli_flag(): + """Verify the ``--xsd-anyuri-as-iri`` flag is wired through Click.""" + import tempfile + from pathlib import Path + + from click.testing import CliRunner + + from linkml.generators.jsonldcontextgen import cli + + schema_yaml = """ +id: https://example.org/test-cli +name: test_cli + +prefixes: + ex: https://example.org/ + linkml: https://w3id.org/linkml/ + +imports: + - linkml:types + +default_prefix: ex +default_range: string + +slots: + homepage: + range: uri + slot_uri: ex:homepage + +classes: + Thing: + slots: + - homepage +""" + with tempfile.TemporaryDirectory() as tmpdir: + schema_path = Path(tmpdir) / "test.yaml" + schema_path.write_text(schema_yaml) + + runner = CliRunner() + + # Without flag + result_default = runner.invoke(cli, [str(schema_path)]) + assert result_default.exit_code == 0, result_default.output + ctx_default = json.loads(result_default.output)["@context"] + assert ctx_default["homepage"]["@type"] == "xsd:anyURI" + + # With flag + result_iri = runner.invoke(cli, [str(schema_path), "--xsd-anyuri-as-iri"]) + assert result_iri.exit_code == 0, result_iri.output + ctx_iri = json.loads(result_iri.output)["@context"] + assert ctx_iri["homepage"]["@type"] == "@id" From 7d27a1920b7d20736a87df35d56a9fd21eb326e6 Mon Sep 17 00:00:00 2001 From: Jonny Saunders Date: Mon, 4 May 2026 11:08:31 -0700 Subject: [PATCH 2/3] Use `linkml.validator` in `linkml.utils.converter` Co-authored-by: Patrick Kalita --- packages/linkml/src/linkml/converter/cli.py | 10 ++++++---- .../linkml/generators/pydanticgen/array.py | 20 +++++++++---------- .../linkml/src/linkml/utils/exceptions.py | 10 +++++++++- .../linkml/src/linkml/validator/__init__.py | 3 +-- .../linkml/src/linkml/validator/report.py | 18 +++++++++++++++++ .../test_generators/test_pydanticgen.py | 2 +- tests/linkml/test_validator/test_validator.py | 4 ++++ 7 files changed, 49 insertions(+), 18 deletions(-) diff --git a/packages/linkml/src/linkml/converter/cli.py b/packages/linkml/src/linkml/converter/cli.py index e8963e9cc5..397493612a 100644 --- a/packages/linkml/src/linkml/converter/cli.py +++ b/packages/linkml/src/linkml/converter/cli.py @@ -2,6 +2,7 @@ import os import pathlib import sys +from typing import TYPE_CHECKING import click import yaml @@ -27,6 +28,9 @@ from linkml_runtime.utils.inference_utils import infer_all_slot_values from linkml_runtime.utils.schemaview import SchemaView +if TYPE_CHECKING: + from linkml_runtime.utils.yamlutils import YAMLRoot + logger = logging.getLogger(__name__) @@ -197,7 +201,7 @@ def cli( target_class = infer_root_class(sv) if target_class is None: raise Exception("target class not specified and could not be inferred") - py_target_class = python_module.__dict__[target_class] + py_target_class: YAMLRoot = python_module.__dict__[target_class] input_format = _get_format(input, input_format) loader = get_loader(input_format) @@ -237,9 +241,7 @@ def cli( raise Exception("--schema must be passed in order to validate. Suppress with --no-validate") obj_dict = json_dumper.to_dict(obj) report = run_validation(obj_dict, schema, target_class) - if report.results: - errors = "\n".join(r.message for r in report.results) - raise Exception(f"Validation failed:\n{errors}") + report.raise_for_results() output_format = _get_format(output, output_format, default="json") if output_format == "json-ld": diff --git a/packages/linkml/src/linkml/generators/pydanticgen/array.py b/packages/linkml/src/linkml/generators/pydanticgen/array.py index 1475d37f11..d1f04eb97e 100644 --- a/packages/linkml/src/linkml/generators/pydanticgen/array.py +++ b/packages/linkml/src/linkml/generators/pydanticgen/array.py @@ -18,7 +18,7 @@ from linkml.generators.pydanticgen.build import RangeResult from linkml.generators.pydanticgen.template import ConditionalImport, Import, Imports, ObjectImport -from linkml.utils.exceptions import ValidationError +from linkml.utils.exceptions import SchemaValidationError class ArrayRepresentation(Enum): @@ -77,7 +77,7 @@ def validate(cls, array: ArrayExpression): Validate an array expression. Raises: - :class:`.ValidationError` if invalid + :class:`.SchemaValidationError` if invalid """ cls.array_exact_dimensions(array) cls.array_consistent_n_dimensions(array) @@ -94,7 +94,7 @@ def validate_dimension(cls, dimension: DimensionExpression): Validate a single array dimension Raises: - :class:`.ValidationError` if invalid + :class:`.SchemaValidationError` if invalid """ cls.dimension_exact_cardinality(dimension) cls.dimension_ordinal(dimension) @@ -105,7 +105,7 @@ def array_exact_dimensions(array: ArrayExpression): if array.exact_number_dimensions is not None and ( array.minimum_number_dimensions is not None or array.maximum_number_dimensions is not None ): - raise ValidationError( + raise SchemaValidationError( f"Can only specify EITHER exact_number_dimensions OR minimum/maximum dimensions, got: {array}" ) @@ -121,7 +121,7 @@ def array_consistent_n_dimensions(array: ArrayExpression): for field_name in _BOUNDED_ARRAY_FIELDS: field = getattr(array, field_name, None) if field and field < len(array.dimensions): - raise ValidationError( + raise SchemaValidationError( "if exact/minimum/maximum_number_dimensions is provided, " "it must be greater than the parameterized dimensions. " f"got\n- {field_name}: {field}\n- dimensions: {array.dimensions}" @@ -134,7 +134,7 @@ def array_dimensions_ordinal(array: ArrayExpression): """ if array.minimum_number_dimensions is not None and array.maximum_number_dimensions: if array.minimum_number_dimensions > array.maximum_number_dimensions: - raise ValidationError( + raise SchemaValidationError( "minimum_number_dimensions must be lesser than maximum_number_dimensions when both are set. " f"got minimum: {array.minimum_number_dimensions}, maximum: {array.maximum_number_dimensions}" ) @@ -148,7 +148,7 @@ def array_explicitly_unbounded(array: ArrayExpression): dimensions to avoid ambiguity. """ if array.minimum_number_dimensions is not None and array.maximum_number_dimensions is None and array.dimensions: - raise ValidationError( + raise SchemaValidationError( "Cannot specify a minimum_number_dimensions while maximum is None while using labeled dimensions - " "either use exact_number_dimensions > len(dimensions) for extra parameterized dimensions or set " "maximum_number_dimensions explicitly to False for unbounded dimensions" @@ -160,7 +160,7 @@ def dimension_exact_cardinality(dimension: DimensionExpression): if dimension.exact_cardinality is not None and ( dimension.minimum_cardinality is not None or dimension.maximum_cardinality is not None ): - raise ValidationError( + raise SchemaValidationError( f"Can only specify EITHER exact_cardinality OR minimum/maximum cardinality, got: {dimension}" ) @@ -169,7 +169,7 @@ def dimension_ordinal(dimension: DimensionExpression): """minimum_cardinality must be less than maximum_cardinality when both are set""" if dimension.minimum_cardinality is not None and dimension.maximum_cardinality is not None: if dimension.minimum_cardinality > dimension.maximum_cardinality: - raise ValidationError( + raise SchemaValidationError( "minimum_cardinality must be lesser than maximum_cardinality when both are set. " f"got minimum: {dimension.minimum_cardinality}, maximum: {dimension.maximum_cardinality}" ) @@ -232,7 +232,7 @@ def validate(self): rather than when an array is generated Raises: - :class:`.ValidationError` if the schema is invalid + :class:`.SchemaValidationError` if the schema is invalid """ ArrayValidator.validate(self.array) diff --git a/packages/linkml/src/linkml/utils/exceptions.py b/packages/linkml/src/linkml/utils/exceptions.py index 34038bb2e3..e83467595b 100644 --- a/packages/linkml/src/linkml/utils/exceptions.py +++ b/packages/linkml/src/linkml/utils/exceptions.py @@ -7,5 +7,13 @@ class SchemaError(ValueError): """Base class for errors relating to schema specification, parsing, structure, etc.""" -class ValidationError(SchemaError): +class DataError(ValueError): + """Base class for errors relating to linkml instance data""" + + +class SchemaValidationError(SchemaError): """Schema is invalid!""" + + +class ValidationError(DataError): + """Data is invalid relative to a schema""" diff --git a/packages/linkml/src/linkml/validator/__init__.py b/packages/linkml/src/linkml/validator/__init__.py index e437750996..1654185b43 100644 --- a/packages/linkml/src/linkml/validator/__init__.py +++ b/packages/linkml/src/linkml/validator/__init__.py @@ -61,8 +61,7 @@ def validate( :param strict: If ``True``, validation will stop after the first validation error is found, Otherwise all validation problems will be reported. Defaults to ``False``. - :raises ValueError: If a valid ``SchemaDefinition`` cannot be constructed - from the ``schema`` parameter. + :raises ValidationError: If requested to raise and validation errors are found. :return: A validation report :rtype: ValidationReport """ diff --git a/packages/linkml/src/linkml/validator/report.py b/packages/linkml/src/linkml/validator/report.py index ce9cd18169..e846106620 100644 --- a/packages/linkml/src/linkml/validator/report.py +++ b/packages/linkml/src/linkml/validator/report.py @@ -1,8 +1,12 @@ +import textwrap from enum import Enum +from pprint import pformat from typing import Any from pydantic import BaseModel, Field +from linkml.utils.exceptions import ValidationError + class Severity(str, Enum): """ @@ -33,6 +37,9 @@ class ValidationResult(BaseModel): # The source object that caused this validation result source: Any = Field(None, description="The source of this validation result", exclude=True) + def __str__(self) -> str: + return pformat(self.model_dump(exclude_none=True, exclude_unset=True)) + class ValidationReport(BaseModel): """ @@ -42,3 +49,14 @@ class ValidationReport(BaseModel): """ results: list[ValidationResult] + + def raise_for_results(self) -> None: + """ + If any results, raise them as a :class:`.ValidationError` + + :raises ValidationError: if any validation results are present + """ + if self.results: + res = textwrap.indent("\n".join(str(res) for res in self.results), " ") + msg = f"Error(s) validating data: \n{res}" + raise ValidationError(msg) diff --git a/tests/linkml/test_generators/test_pydanticgen.py b/tests/linkml/test_generators/test_pydanticgen.py index 7e78b9bc92..600bb3a76d 100644 --- a/tests/linkml/test_generators/test_pydanticgen.py +++ b/tests/linkml/test_generators/test_pydanticgen.py @@ -39,7 +39,7 @@ PydanticTemplateModel, PydanticValidator, ) -from linkml.utils.exceptions import ValidationError as ArrayValidationError +from linkml.utils.exceptions import SchemaValidationError as ArrayValidationError from linkml_runtime import SchemaView from linkml_runtime.dumpers import yaml_dumper from linkml_runtime.linkml_model import ClassDefinition, Definition, SchemaDefinition, SlotDefinition diff --git a/tests/linkml/test_validator/test_validator.py b/tests/linkml/test_validator/test_validator.py index c3a6a25cba..4e98461cc5 100644 --- a/tests/linkml/test_validator/test_validator.py +++ b/tests/linkml/test_validator/test_validator.py @@ -2,6 +2,7 @@ import pytest +from linkml.utils.exceptions import ValidationError from linkml.validator import Validator from linkml.validator.loaders import Loader from linkml.validator.plugins import ValidationPlugin @@ -65,6 +66,9 @@ def test_validate_invalid_instance(): report = validator.validate({"foo": "bar"}) assert len(report.results) == 10 + with pytest.raises(ValidationError, match=r"Error\(s\) validating data.*"): + report.raise_for_results() + def test_validate_multiple_plugins(): plugins = [ From 781a968ccdd3d67314aa2644ee6be56d93272436 Mon Sep 17 00:00:00 2001 From: Miles Wirht <114884788+philocalyst@users.noreply.github.com> Date: Mon, 4 May 2026 15:14:39 -0400 Subject: [PATCH 3/3] terminusdb: updates to the modern schema format --- .../src/linkml/generators/terminusdbgen.py | 157 +++++++----- .../test_generators/test_terminusdbgen.py | 237 ++++++++++++++++++ 2 files changed, 327 insertions(+), 67 deletions(-) create mode 100644 tests/linkml/test_generators/test_terminusdbgen.py diff --git a/packages/linkml/src/linkml/generators/terminusdbgen.py b/packages/linkml/src/linkml/generators/terminusdbgen.py index 9935261cce..f2f279cd6a 100644 --- a/packages/linkml/src/linkml/generators/terminusdbgen.py +++ b/packages/linkml/src/linkml/generators/terminusdbgen.py @@ -1,23 +1,17 @@ import json import os -import warnings -from dataclasses import dataclass +from dataclasses import dataclass, field import click -from linkml_runtime.linkml_model.meta import ClassDefinition, SlotDefinition -from linkml_runtime.utils.formatutils import be, camelcase, underscore - -try: - from terminusdb_client.woqlquery import WOQLQuery as WQ -except ImportError: - WQ = None - from linkml._version import __version__ from linkml.utils.generator import Generator, shared_arguments +from linkml_runtime.linkml_model.meta import ClassDefinition, EnumDefinition, SlotDefinition +from linkml_runtime.utils.formatutils import be, camelcase, underscore +# TerminusDB XSD types supported as property ranges. # https://terminusdb.com/docs/terminusdb/#/reference/XSD_WHITELIST -XSD_Ok = { +XSD_OK = { f"xsd:{t}" for t in [ "string", @@ -29,7 +23,6 @@ "dateTime", "byte", "short", - "integer", "long", "positiveInteger", "nonNegativeInteger", @@ -39,106 +32,136 @@ ] } +# Map LinkML XSD types not natively supported by TerminusDB to supported equivalents. +XSD_TRANSLATE = { + "xsd:int": "xsd:integer", + "xsd:language": "xsd:string", + "xsd:date": "xsd:dateTime", + "xsd:time": "xsd:dateTime", +} + @dataclass class TerminusdbGenerator(Generator): - """ - Experimental generator for TerminusDB - - Generates JSON-LD to pass to `WOQLQuery()`. - - Assumes an "inference/main" graph if any slots have "is_a" values, because any statements with - rdfs:subPropertyOf as the predicate must live in a TerminusDB "inference" graph rather than the - "schema" graph. When creating a new TerminusDB database, only the "schema" and "instance" graphs - are created. Thus, you may need to e.g. `WOQLClient.create_graph("inference", "main")`. + """Generator for TerminusDB JSON-LD schema documents. + Produces a JSON array of schema documents compatible with the + TerminusDB v10+ document interface. The output can be loaded via + ``client.insert_document(docs, graph_type="schema")``. """ # ClassVars generatorname = os.path.basename(__file__) - generatorversion = "0.1.0" + generatorversion = "0.2.0" valid_formats = ["json"] visit_all_class_slots = True uses_schemaloader = True # ObjectVars - classes: list = None - raw_additions: list = None - clswq: str = None - - def __post_init__(self): - if WQ is None: - warnings.warn("terminusdb_client is not a requirement of this package, please install it separately") + documents: list = field(default_factory=list) + current_class_doc: dict = field(default_factory=dict) def visit_schema(self, inline: bool = False, **kwargs) -> None: - self.classes = [] - self.raw_additions = [] + self.documents = [] + schema_id = str(self.schema.id) if self.schema.id else "terminusdb:///schema" + schema_base = schema_id.rstrip("/").rstrip("#") + "#" + data_base = schema_id.rstrip("/").rstrip("#").rsplit("/", 1)[0] + "/data/" + self.documents.append( + { + "@type": "@context", + "@documentation": { + "@title": self.schema.title or self.schema.name or "", + "@description": be(self.schema.description) or "", + }, + "@schema": schema_base, + "@base": data_base, + } + ) def end_schema(self, **_) -> str: - return json.dumps(WQ().woql_and(*self.classes, *self.raw_additions).to_dict(), indent=2) + # Emit enum documents + for enum_def in self.schema.enums.values(): + self._emit_enum(enum_def) + return json.dumps(self.documents, indent=2) + + def _emit_enum(self, enum_def: EnumDefinition) -> None: + """Produce a TerminusDB Enum document.""" + doc = { + "@type": "Enum", + "@id": camelcase(enum_def.name), + "@value": [str(pv) for pv in enum_def.permissible_values], + } + self.documents.append(doc) def visit_class(self, cls: ClassDefinition) -> bool: - self.clswq = WQ().add_class(camelcase(cls.name)).label(camelcase(cls.name)).description(be(cls.description)) + doc = { + "@type": "Class", + "@id": camelcase(cls.name), + } + if cls.description: + doc["@documentation"] = {"@comment": be(cls.description), "@properties": {}} if cls.is_a: - self.clswq.parent(camelcase(cls.is_a)) + doc["@inherits"] = [camelcase(cls.is_a)] if cls.abstract: - self.clswq.abstract() + doc["@abstract"] = [] if cls.broad_mappings: if any( str(self.namespaces.uri_for(m)) == "http://terminusdb.com/schema/system#Document" for m in cls.broad_mappings ): - self.clswq.parent("Document") + doc.setdefault("@inherits", []).append("Document") + self.current_class_doc = doc return True def end_class(self, cls: ClassDefinition) -> None: - self.classes.append(self.clswq) + self.documents.append(self.current_class_doc) def visit_class_slot(self, cls: ClassDefinition, aliased_slot_name: str, slot: SlotDefinition) -> None: + rng = self._resolve_range(slot) + prop_name = underscore(aliased_slot_name) + + # Determine cardinality wrapper + if slot.multivalued: + if slot.inlined_as_list: + prop_value = {"@type": "List", "@class": rng} + else: + prop_value = {"@type": "Set", "@class": rng} + elif not slot.required: + prop_value = {"@type": "Optional", "@class": rng} + else: + prop_value = rng + + self.current_class_doc[prop_name] = prop_value + + # Add property documentation, lazily initializing @documentation if needed + if slot.description: + doc = self.current_class_doc.setdefault("@documentation", {"@comment": "", "@properties": {}}) + doc.setdefault("@properties", {})[prop_name] = slot.description + + def _resolve_range(self, slot: SlotDefinition) -> str: + """Resolve a slot range to a TerminusDB type string.""" if slot.range in self.schema.classes: - rng = camelcase(slot.range) - elif slot.range in self.schema.types: - # XXX Why does `linkml_runtime.utils.metamodelcore.Identifier` subclass `str`?? + return camelcase(slot.range) + if slot.range in self.schema.enums: + return camelcase(slot.range) + if slot.range in self.schema.types: rng = str(self.schema.types[slot.range].uri) else: rng = "xsd:string" - name = f"{cls.name} {aliased_slot_name}" if slot.is_usage_slot else aliased_slot_name + rng = XSD_TRANSLATE.get(rng, rng) - # translate to terminusdb xsd builtins: - if rng == "xsd:int": - rng = "xsd:integer" - elif rng == "xsd:float": - rng = "xsd:double" - elif rng == "xsd:language": + if rng not in XSD_OK: rng = "xsd:string" - if rng not in XSD_Ok and slot.range not in self.schema.classes: - raise Exception( - f"slot range for {name} must be schema class or supported xsd type. Range {rng} is of type {type(rng)}." - ) - - self.clswq.property(underscore(name), rng, label=name, description=slot.description) - if not slot.multivalued: - self.clswq.max(1) - if slot.required: - self.clswq.min(1) - if slot.is_a: - self.raw_additions.append( - WQ().add_quad( - underscore(name), - "rdfs:subPropertyOf", - self.clswq.iri(underscore(slot.is_a)), - "inference/main", - ) - ) + return rng @shared_arguments(TerminusdbGenerator) @click.version_option(__version__, "-V", "--version") @click.command(name="terminusdb") def cli(yamlfile, **args): - """Generate graphql representation of a LinkML model""" + """Generate TerminusDB JSON-LD schema from a LinkML model""" print(TerminusdbGenerator(yamlfile, **args).serialize(**args)) diff --git a/tests/linkml/test_generators/test_terminusdbgen.py b/tests/linkml/test_generators/test_terminusdbgen.py new file mode 100644 index 0000000000..0c75470246 --- /dev/null +++ b/tests/linkml/test_generators/test_terminusdbgen.py @@ -0,0 +1,237 @@ +import json + +import pytest +from click.testing import CliRunner + +from linkml.generators.terminusdbgen import XSD_TRANSLATE, TerminusdbGenerator, cli +from linkml_runtime.linkml_model import SlotDefinition +from linkml_runtime.utils.schema_builder import SchemaBuilder + +SIMPLE_SCHEMA = """\ +id: https://example.org/test +name: test_schema +title: Test Schema +description: A test schema for TerminusDB generation + +prefixes: + linkml: https://w3id.org/linkml/ + xsd: http://www.w3.org/2001/XMLSchema# + +types: + string: + base: str + uri: xsd:string + integer: + base: int + uri: xsd:integer + boolean: + base: Bool + uri: xsd:boolean + +classes: + Person: + description: A human being + slots: + - name + - age + + Employee: + is_a: Person + description: A person with a job + slots: + - employer + +slots: + name: + range: string + required: true + description: The name of the entity + age: + range: integer + employer: + range: string +""" + + +def _generate(schema) -> list[dict]: + """Serialize a schema with the TerminusDB generator and return parsed JSON.""" + return json.loads(TerminusdbGenerator(schema).serialize()) + + +def test_context_document(): + """First document must be a valid TerminusDB @context.""" + docs = _generate(SIMPLE_SCHEMA) + ctx = docs[0] + assert ctx["@type"] == "@context" + assert ctx["@schema"].endswith("#") + assert ctx["@base"].endswith("/") + assert ctx["@documentation"]["@title"] == "Test Schema" + assert "test schema" in ctx["@documentation"]["@description"].lower() + + +def test_class_type_and_id(): + docs = _generate(SIMPLE_SCHEMA) + person = next(d for d in docs if d.get("@id") == "Person") + assert person["@type"] == "Class" + + +def test_class_description(): + docs = _generate(SIMPLE_SCHEMA) + person = next(d for d in docs if d.get("@id") == "Person") + assert person["@documentation"]["@comment"] == "A human being" + + +def test_inheritance(): + docs = _generate(SIMPLE_SCHEMA) + employee = next(d for d in docs if d.get("@id") == "Employee") + assert "Person" in employee["@inherits"] + + +def test_abstract_class(): + sb = SchemaBuilder("test") + sb.add_defaults() + sb.add_class("Thing", slots=[]) + sb.schema.classes["Thing"].abstract = True + docs = _generate(sb.schema) + thing = next(d for d in docs if d.get("@id") == "Thing") + assert "@abstract" in thing + + +def test_required_slot_is_plain_range(): + docs = _generate(SIMPLE_SCHEMA) + person = next(d for d in docs if d.get("@id") == "Person") + assert person["name"] == "xsd:string" + + +def test_optional_slot_has_optional_wrapper(): + docs = _generate(SIMPLE_SCHEMA) + person = next(d for d in docs if d.get("@id") == "Person") + assert person["age"] == {"@type": "Optional", "@class": "xsd:integer"} + + +def test_multivalued_set(): + sb = SchemaBuilder("test") + sb.add_defaults() + tags = SlotDefinition(name="tags", range="string", multivalued=True) + sb.add_class("Item", slots=[tags]) + docs = _generate(sb.schema) + item = next(d for d in docs if d.get("@id") == "Item") + assert item["tags"]["@type"] == "Set" + + +def test_multivalued_list(): + sb = SchemaBuilder("test") + sb.add_defaults() + items = SlotDefinition(name="items", range="string", multivalued=True, inlined_as_list=True) + sb.add_class("Container", slots=[items]) + docs = _generate(sb.schema) + container = next(d for d in docs if d.get("@id") == "Container") + assert container["items"]["@type"] == "List" + + +def test_slot_description_in_documentation(): + docs = _generate(SIMPLE_SCHEMA) + person = next(d for d in docs if d.get("@id") == "Person") + assert person["@documentation"]["@properties"]["name"] == "The name of the entity" + + +@pytest.mark.parametrize( + "xsd_uri,expected", + [ + ("xsd:int", "xsd:integer"), + ("xsd:language", "xsd:string"), + ("xsd:date", "xsd:dateTime"), + ("xsd:time", "xsd:dateTime"), + ], +) +def test_xsd_translation(xsd_uri, expected): + assert XSD_TRANSLATE[xsd_uri] == expected + + +@pytest.mark.parametrize( + "xsd_uri", + ["xsd:float", "xsd:nonNegativeInteger", "xsd:positiveInteger"], +) +def test_xsd_ok_types_not_downgraded(xsd_uri): + """Types supported natively by TerminusDB MUST NOT be in XSD_TRANSLATE.""" + assert xsd_uri not in XSD_TRANSLATE + + +def test_slot_description_without_class_description(): + """Slot descriptions should be captured even when the class is descriptionless.""" + sb = SchemaBuilder("test") + sb.add_defaults() + slot = SlotDefinition(name="label", range="string", required=True, description="A human-readable label") + sb.add_class("Thing", slots=[slot]) + docs = _generate(sb.schema) + thing = next(d for d in docs if d.get("@id") == "Thing") + assert "@documentation" in thing + assert thing["@documentation"]["@properties"]["label"] == "A human-readable label" + + +def test_class_range_uses_camelcase(): + sb = SchemaBuilder("test") + sb.add_defaults() + sb.add_class("Address", slots=[]) + ref = SlotDefinition(name="home_address", range="Address", required=True) + sb.add_class("Person", slots=[ref]) + docs = _generate(sb.schema) + person = next(d for d in docs if d.get("@id") == "Person") + assert person["home_address"] == "Address" + + +def test_unsupported_xsd_falls_back_to_string(): + sb = SchemaBuilder("test") + sb.add_defaults() + sb.add_type("token_type", typeof="string") + sb.schema.types["token_type"].uri = "xsd:token" + slot = SlotDefinition(name="tok", range="token_type", required=True) + sb.add_class("Thing", slots=[slot]) + docs = _generate(sb.schema) + thing = next(d for d in docs if d.get("@id") == "Thing") + assert thing["tok"] == "xsd:string" + + +def test_enum_emitted(): + sb = SchemaBuilder("test") + sb.add_defaults() + sb.add_enum("Color", ["RED", "GREEN", "BLUE"]) + docs = _generate(sb.schema) + color = next(d for d in docs if d.get("@id") == "Color") + assert color["@type"] == "Enum" + assert set(color["@value"]) == {"RED", "GREEN", "BLUE"} + + +def test_enum_as_slot_range(): + sb = SchemaBuilder("test") + sb.add_defaults() + sb.add_enum("Status", ["ACTIVE", "INACTIVE"]) + slot = SlotDefinition(name="status", range="Status", required=True) + sb.add_class("Record", slots=[slot]) + docs = _generate(sb.schema) + record = next(d for d in docs if d.get("@id") == "Record") + assert record["status"] == "Status" + + +def test_organization_schema(input_path): + """The organization example schema should produce valid TerminusDB output.""" + docs = _generate(str(input_path("organization.yaml"))) + assert docs[0]["@type"] == "@context" + class_ids = {d["@id"] for d in docs if d.get("@type") == "Class"} + assert "Organization" in class_ids + assert "Employee" in class_ids + assert "Manager" in class_ids + + manager = next(d for d in docs if d.get("@id") == "Manager") + assert "Employee" in manager["@inherits"] + assert manager["has_employees"]["@type"] == "List" + + +def test_cli(input_path): + """The gen-terminusdb CLI should emit valid JSON to stdout.""" + runner = CliRunner() + result = runner.invoke(cli, [str(input_path("organization.yaml"))]) + assert result.exit_code == 0 + docs = json.loads(result.output) + assert docs[0]["@type"] == "@context" + assert any(d.get("@type") == "Class" for d in docs)