diff --git a/packages/linkml/src/linkml/generators/shaclgen.py b/packages/linkml/src/linkml/generators/shaclgen.py index 2f2e6b5d1e..bc46ea8023 100644 --- a/packages/linkml/src/linkml/generators/shaclgen.py +++ b/packages/linkml/src/linkml/generators/shaclgen.py @@ -14,7 +14,7 @@ from linkml.generators.shacl.shacl_data_type import ShaclDataType from linkml.generators.shacl.shacl_ifabsent_processor import ShaclIfAbsentProcessor from linkml.utils.generator import Generator, shared_arguments -from linkml_runtime.linkml_model.meta import ClassDefinition, ElementName +from linkml_runtime.linkml_model.meta import ClassDefinition, ElementName, PresenceEnum from linkml_runtime.utils.formatutils import underscore from linkml_runtime.utils.yamlutils import TypedNode, extended_float, extended_int, extended_str @@ -74,6 +74,19 @@ class ShaclGenerator(Generator): """ expand_subproperty_of: bool = True """If True, expand subproperty_of to sh:in constraints with slot descendants""" + + emit_rules: bool = True + """Emit ``sh:sparql`` constraints from LinkML ``rules:`` blocks. + + When ``True`` (default), recognised rule patterns are translated into + SHACL-SPARQL constraints (``sh:SPARQLConstraint``) on the corresponding + ``sh:NodeShape``. Currently the *boolean-guard* pattern is recognised: + a precondition with ``value_presence: PRESENT`` on a value slot and a + postcondition with ``equals_string: "true"`` on a boolean flag slot. + + See `W3C SHACL §5 `_ + and `linkml/linkml#2464 `_. + """ generatorname = os.path.basename(__file__) generatorversion = "0.0.1" valid_formats = ["ttl"] @@ -283,10 +296,147 @@ def st_node_pv(p, v): if default_value: prop_pv(SH.defaultValue, default_value) + if self.emit_rules: + self._add_rules(g, class_uri_with_suffix, c) + return g LINKML_ANY_URI = "https://w3id.org/linkml/Any" + # ------------------------------------------------------------------- + # Rules → sh:sparql + # ------------------------------------------------------------------- + + def _add_rules(self, g: Graph, shape_uri: URIRef, cls: ClassDefinition) -> None: + """Emit ``sh:sparql`` constraints from LinkML ``rules:`` blocks. + + Each recognised rule is converted into an ``sh:SPARQLConstraint`` + attached to *shape_uri*. Unrecognised patterns are logged at + ``DEBUG`` level and silently skipped. + + Currently recognised patterns: + + * **Boolean guard** — a *precondition* with + ``value_presence: PRESENT`` on a value slot and a *postcondition* + with ``equals_string: "true"`` on a boolean flag slot. + + See `W3C SHACL §5 `_. + """ + if not cls.rules: + return + + sv = self.schemaview + for rule in cls.rules: + if getattr(rule, "deactivated", False): + continue + + if getattr(rule, "bidirectional", False): + logger.warning( + "Rule in class %r has bidirectional=true; " + "SHACL-SPARQL generation does not yet support bidirectional rules. " + "Only the forward direction is emitted.", + cls.name, + ) + + if getattr(rule, "open_world", False): + logger.warning( + "Rule in class %r has open_world=true; " + "SHACL operates under closed-world assumption. " + "The constraint is emitted but may not match open-world semantics.", + cls.name, + ) + + sparql_query = self._rule_to_sparql(sv, cls, rule) + if sparql_query is None: + logger.debug( + "Skipping unsupported rule pattern in class %r: %s", + cls.name, + getattr(rule, "description", "(no description)"), + ) + continue + + constraint = BNode() + g.add((shape_uri, SH.sparql, constraint)) + g.add((constraint, RDF.type, SH.SPARQLConstraint)) + + message = getattr(rule, "description", None) + if message: + g.add((constraint, SH.message, Literal(message))) + + g.add((constraint, SH.select, Literal(sparql_query))) + + def _rule_to_sparql(self, sv, cls: ClassDefinition, rule) -> str | None: + """Convert a ``ClassRule`` to a SPARQL SELECT query string. + + Returns ``None`` when the rule does not match any supported pattern. + """ + pre = getattr(rule, "preconditions", None) + post = getattr(rule, "postconditions", None) + if not pre or not post: + return None + + pre_slots = getattr(pre, "slot_conditions", None) or {} + post_slots = getattr(post, "slot_conditions", None) or {} + + # Pattern: boolean guard + # preconditions: exactly one slot with value_presence PRESENT + # postconditions: exactly one slot with equals_string "true" + if len(pre_slots) == 1 and len(post_slots) == 1: + value_slot_name = next(iter(pre_slots)) + flag_slot_name = next(iter(post_slots)) + + value_cond = pre_slots[value_slot_name] + flag_cond = post_slots[flag_slot_name] + + is_value_present = ( + getattr(value_cond, "value_presence", None) == PresenceEnum(PresenceEnum.PRESENT) + ) + is_flag_true = getattr(flag_cond, "equals_string", None) == "true" + + if is_value_present and is_flag_true: + return self._build_boolean_guard_sparql(sv, cls, flag_slot_name, value_slot_name) + + return None + + def _build_boolean_guard_sparql( + self, sv, cls: ClassDefinition, flag_slot_name: str, value_slot_name: str + ) -> str: + """Build a SPARQL SELECT query for the boolean-guard pattern. + + The query detects violations where the value property is present + but the boolean flag is absent or not ``true``. + + Conforms to `SHACL §5.3.1 + `_: + ``$this`` is pre-bound to each focus node. + """ + flag_uri = self._slot_uri(sv, flag_slot_name, cls) + value_uri = self._slot_uri(sv, value_slot_name, cls) + + return ( + f"SELECT $this WHERE {{\n" + f" OPTIONAL {{ $this <{flag_uri}> ?flag . }}\n" + f" OPTIONAL {{ $this <{value_uri}> ?value . }}\n" + f" FILTER (\n" + f" ( !BOUND(?flag) || ?flag != true ) &&\n" + f" BOUND(?value)\n" + f" )\n" + f"}}" + ) + + def _slot_uri(self, sv, slot_name: str, cls: ClassDefinition) -> str: + """Resolve a slot name to a full IRI string for use in SPARQL queries. + + Mirrors the resolution logic used for ``sh:path`` in the main slot loop: + prefer ``sv.get_uri()`` for slots registered in the schema map, fall + back to ``default_prefix:underscored_name``. + """ + slot = sv.get_slot(slot_name) + if slot and slot_name in sv.element_by_schema_map(): + return sv.get_uri(slot, expand=True) + pfx = sv.schema.default_prefix + return sv.expand_curie(f"{pfx}:{underscore(slot_name)}") + def _add_class(self, func: Callable, r: ElementName) -> None: """Add an sh:class constraint for range class *r*. @@ -526,6 +676,17 @@ def add_simple_data_type(func: Callable, r: ElementName) -> None: help="If --expand-subproperty-of (default), slots with subproperty_of will generate sh:in constraints " "containing all slot descendants. Use --no-expand-subproperty-of to disable this behavior.", ) +@click.option( + "--emit-rules/--no-emit-rules", + default=True, + show_default=True, + help=( + "Emit sh:sparql constraints from LinkML rules: blocks. " + "When enabled (default), recognised rule patterns (e.g. boolean-guard) " + "are translated into SHACL-SPARQL constraints on the corresponding " + "sh:NodeShape. Use --no-emit-rules to suppress rule generation." + ), +) @click.version_option(__version__, "-V", "--version") def cli(yamlfile, **args): """Generate SHACL turtle from a LinkML model""" diff --git a/tests/linkml/test_generators/input/shaclgen/boolean_guard_rules.yaml b/tests/linkml/test_generators/input/shaclgen/boolean_guard_rules.yaml new file mode 100644 index 0000000000..f56c2eca6a --- /dev/null +++ b/tests/linkml/test_generators/input/shaclgen/boolean_guard_rules.yaml @@ -0,0 +1,70 @@ +id: https://example.org/boolean-guards +name: boolean_guard_rules +description: >- + Test schema for SHACL generation of sh:sparql constraints from LinkML rules. + Models the boolean-guard pattern where a boolean flag must be true if a + corresponding value property is present. + +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/boolean-guards/ + +imports: + - linkml:types + +default_prefix: ex +default_range: string + +slots: + WeatherWind: + description: Whether wind conditions are present. + range: boolean + slot_uri: ex:WeatherWind + weatherWindValue: + description: Wind speed value. + range: decimal + slot_uri: ex:weatherWindValue + WeatherRain: + description: Whether rain conditions are present. + range: boolean + slot_uri: ex:WeatherRain + weatherRainValue: + description: Rain intensity value. + range: decimal + slot_uri: ex:weatherRainValue + Temperature: + description: Ambient temperature. + range: decimal + slot_uri: ex:Temperature + +classes: + Environment: + description: Environmental conditions. + class_uri: ex:Environment + slots: + - WeatherWind + - weatherWindValue + - WeatherRain + - weatherRainValue + - Temperature + rules: + - description: >- + If weatherWindValue is provided, WeatherWind must be true. + preconditions: + slot_conditions: + weatherWindValue: + value_presence: PRESENT + postconditions: + slot_conditions: + WeatherWind: + equals_string: "true" + - description: >- + If weatherRainValue is provided, WeatherRain must be true. + preconditions: + slot_conditions: + weatherRainValue: + value_presence: PRESENT + postconditions: + slot_conditions: + WeatherRain: + equals_string: "true" diff --git a/tests/linkml/test_generators/test_shaclgen.py b/tests/linkml/test_generators/test_shaclgen.py index c99547df7e..a1aa621e4c 100644 --- a/tests/linkml/test_generators/test_shaclgen.py +++ b/tests/linkml/test_generators/test_shaclgen.py @@ -1160,3 +1160,274 @@ def test_nodeidentifier_range_produces_blank_node_or_iri(): uri_ref = props["https://example.org/uriRef"] uri_kinds = list(g.objects(uri_ref, SH.nodeKind)) assert SH.IRI in uri_kinds, f"Expected sh:IRI for uri, got {uri_kinds}" + + +# --------------------------------------------------------------------------- +# Helper functions +# --------------------------------------------------------------------------- + + +def _parse_shacl(schema, **kwargs): + shacl = ShaclGenerator(schema, mergeimports=False, **kwargs).serialize() + g = rdflib.Graph() + g.parse(data=shacl) + return g + + +# --------------------------------------------------------------------------- +# --emit-rules / sh:sparql tests +# --------------------------------------------------------------------------- + +_RULES_SCHEMA_YAML = """ +id: https://example.org/boolean-guards +name: boolean_guard_rules +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/boolean-guards/ +imports: + - linkml:types +default_prefix: ex +default_range: string +slots: + WeatherWind: + range: boolean + slot_uri: ex:WeatherWind + weatherWindValue: + description: Wind speed value. + range: decimal + slot_uri: ex:weatherWindValue + WeatherRain: + range: boolean + slot_uri: ex:WeatherRain + weatherRainValue: + description: Rain intensity value. + range: decimal + slot_uri: ex:weatherRainValue + Temperature: + range: decimal + slot_uri: ex:Temperature +classes: + Environment: + class_uri: ex:Environment + slots: + - WeatherWind + - weatherWindValue + - WeatherRain + - weatherRainValue + - Temperature + rules: + - description: If weatherWindValue is provided, WeatherWind must be true. + preconditions: + slot_conditions: + weatherWindValue: + value_presence: PRESENT + postconditions: + slot_conditions: + WeatherWind: + equals_string: "true" + - description: If weatherRainValue is provided, WeatherRain must be true. + preconditions: + slot_conditions: + weatherRainValue: + value_presence: PRESENT + postconditions: + slot_conditions: + WeatherRain: + equals_string: "true" +""" + +EX_RULES = rdflib.Namespace("https://example.org/boolean-guards/") + + +def test_rule_boolean_guard_generates_sparql(): + """Boolean-guard rules produce sh:sparql constraints on the NodeShape.""" + g = _parse_shacl(_RULES_SCHEMA_YAML) + + shape = EX_RULES.Environment + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 2, f"Expected 2 sh:sparql constraints, got {len(sparql_nodes)}" + + for node in sparql_nodes: + assert (node, RDF.type, SH.SPARQLConstraint) in g + selects = list(g.objects(node, SH.select)) + assert len(selects) == 1, "Each constraint must have exactly one sh:select" + query = str(selects[0]) + assert "$this" in query, "SPARQL must use $this pre-bound variable" + assert "OPTIONAL" in query, "SPARQL must use OPTIONAL for flag/value" + assert "FILTER" in query, "SPARQL must have a FILTER clause" + assert "BOUND" in query, "SPARQL must use BOUND()" + + +def test_rule_with_description_generates_message(): + """Rule description is emitted as sh:message on the SPARQLConstraint.""" + g = _parse_shacl(_RULES_SCHEMA_YAML) + + shape = EX_RULES.Environment + sparql_nodes = list(g.objects(shape, SH.sparql)) + + messages = set() + for node in sparql_nodes: + for msg in g.objects(node, SH.message): + messages.add(str(msg)) + + assert "If weatherWindValue is provided, WeatherWind must be true." in messages + assert "If weatherRainValue is provided, WeatherRain must be true." in messages + + +def test_rule_sparql_contains_correct_uris(): + """SPARQL queries reference the correct slot URIs.""" + g = _parse_shacl(_RULES_SCHEMA_YAML) + + shape = EX_RULES.Environment + sparql_nodes = list(g.objects(shape, SH.sparql)) + + queries = [str(list(g.objects(n, SH.select))[0]) for n in sparql_nodes] + all_sparql = "\n".join(queries) + + assert str(EX_RULES.WeatherWind) in all_sparql + assert str(EX_RULES.weatherWindValue) in all_sparql + assert str(EX_RULES.WeatherRain) in all_sparql + assert str(EX_RULES.weatherRainValue) in all_sparql + + +_DEACTIVATED_RULE_SCHEMA_YAML = """ +id: https://example.org/deactivated-test +name: deactivated_rule_test +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/deactivated-test/ +imports: + - linkml:types +default_prefix: ex +default_range: string +slots: + Flag: + range: boolean + slot_uri: ex:Flag + flagValue: + range: decimal + slot_uri: ex:flagValue +classes: + TestClass: + class_uri: ex:TestClass + slots: + - Flag + - flagValue + rules: + - description: This rule is deactivated. + deactivated: true + preconditions: + slot_conditions: + flagValue: + value_presence: PRESENT + postconditions: + slot_conditions: + Flag: + equals_string: "true" +""" + + +def test_rule_deactivated_skipped(): + """Deactivated rules do not produce sh:sparql constraints.""" + g = _parse_shacl(_DEACTIVATED_RULE_SCHEMA_YAML) + + shape = URIRef("https://example.org/deactivated-test/TestClass") + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 0, f"Deactivated rule should not emit sh:sparql, got {len(sparql_nodes)}" + + +_UNSUPPORTED_RULE_SCHEMA_YAML = """ +id: https://example.org/unsupported-test +name: unsupported_rule_test +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/unsupported-test/ +imports: + - linkml:types +default_prefix: ex +default_range: string +slots: + slotA: + range: string + slot_uri: ex:slotA + slotB: + range: string + slot_uri: ex:slotB +classes: + TestClass: + class_uri: ex:TestClass + slots: + - slotA + - slotB + rules: + - description: Rule with no postconditions. + preconditions: + slot_conditions: + slotA: + value_presence: PRESENT +""" + + +def test_rule_unsupported_pattern_skipped(): + """Unrecognised rule patterns are silently skipped (no sh:sparql emitted).""" + g = _parse_shacl(_UNSUPPORTED_RULE_SCHEMA_YAML) + + shape = URIRef("https://example.org/unsupported-test/TestClass") + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 0 + + +def test_rule_no_emit_rules_flag(): + """--no-emit-rules suppresses sh:sparql constraint generation.""" + g = _parse_shacl(_RULES_SCHEMA_YAML, emit_rules=False) + + shape = EX_RULES.Environment + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 0, f"emit_rules=False should suppress rules, got {len(sparql_nodes)}" + + +_NO_RULES_SCHEMA_YAML = """ +id: https://example.org/no-rules +name: no_rules_test +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/no-rules/ +imports: + - linkml:types +default_prefix: ex +default_range: string +slots: + name: + range: string + slot_uri: ex:name +classes: + SimpleClass: + class_uri: ex:SimpleClass + slots: + - name +""" + + +def test_rule_no_rules_no_sparql(): + """Classes without rules: blocks produce no sh:sparql constraints.""" + g = _parse_shacl(_NO_RULES_SCHEMA_YAML) + + shape = URIRef("https://example.org/no-rules/SimpleClass") + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 0 + + +def test_rule_multiple_rules_per_class(): + """Multiple boolean-guard rules on one class produce multiple sh:sparql constraints.""" + g = _parse_shacl(_RULES_SCHEMA_YAML) + + shape = EX_RULES.Environment + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 2 + + # Each constraint should reference different slot pairs + queries = [str(list(g.objects(n, SH.select))[0]) for n in sparql_nodes] + wind_query = [q for q in queries if "weatherWindValue" in q] + rain_query = [q for q in queries if "weatherRainValue" in q] + assert len(wind_query) == 1, "Expected exactly one wind query" + assert len(rain_query) == 1, "Expected exactly one rain query"