diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 439e69731..c0cf04ed7 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -32,6 +32,7 @@ from vulnerabilities.importers import ubuntu_usn from vulnerabilities.importers import vulnrichment from vulnerabilities.importers import xen +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 from vulnerabilities.pipelines import alpine_linux_importer from vulnerabilities.pipelines import github_importer from vulnerabilities.pipelines import gitlab_importer @@ -189,3 +190,9 @@ collect_fix_commits_v2.CollectGitlabFixCommitsPipeline, ] ) + +TODO_EXCLUDED_PIPELINES = [ + key + for key, value in IMPORTERS_REGISTRY.items() + if issubclass(value, VulnerableCodeBaseImporterPipelineV2) and value.exclude_from_package_todo +] diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 11fa5126a..c55c14c8a 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -19,6 +19,7 @@ from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline from vulnerabilities.pipelines import remove_duplicate_advisories from vulnerabilities.pipelines.v2_improvers import collect_ssvc_trees +from vulnerabilities.pipelines.v2_improvers import compute_advisory_todo as compute_advisory_todo_v2 from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2 from vulnerabilities.pipelines.v2_improvers import ( computer_package_version_rank as compute_version_rank_v2, @@ -72,5 +73,6 @@ collect_ssvc_trees.CollectSSVCPipeline, relate_severities.RelateSeveritiesPipeline, group_advisories_for_packages.GroupAdvisoriesForPackages, + compute_advisory_todo_v2.ComputeToDo, ] ) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 6ce9f29df..3fd912951 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2951,6 +2951,12 @@ def latest_advisories_for_purl(self, purl): qs = self.filter(id__in=Subquery(adv_ids)) return qs.latest_per_avid() + def todo_excluded(self): + """Exclude advisory ineligible for ToDo computation.""" + from vulnerabilities.importers import TODO_EXCLUDED_PIPELINES + + return self.exclude(datasource_id__in=TODO_EXCLUDED_PIPELINES) + class AdvisorySet(models.Model): diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index 499f53331..51728b631 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -278,6 +278,11 @@ class VulnerableCodeBaseImporterPipelineV2(VulnerableCodePipeline): ignorable_versions = [] precedence = 0 + # Set this to True if computing fixed/affected package ToDo is not fruitful for this source. + # An example of such advisory would be pipeline dedicated to collecting issues, + # pull requests, commit messages, EPSS, exploits, etc. + exclude_from_package_todo = False + # Control how often progress log is shown (range: 1–100, higher value = less frequent log) progress_step = 10 diff --git a/vulnerabilities/pipelines/v2_importers/__init__.py b/vulnerabilities/pipelines/v2_importers/__init__.py new file mode 100644 index 000000000..20854f2ad --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# diff --git a/vulnerabilities/pipelines/v2_importers/aosp_importer.py b/vulnerabilities/pipelines/v2_importers/aosp_importer.py index 23bcda86f..1abe91776 100644 --- a/vulnerabilities/pipelines/v2_importers/aosp_importer.py +++ b/vulnerabilities/pipelines/v2_importers/aosp_importer.py @@ -32,6 +32,7 @@ class AospImporterPipeline(VulnerableCodeBaseImporterPipelineV2): license_url = "https://github.com/quarkslab/aosp_dataset/blob/master/LICENSE" precedence = 200 + exclude_from_package_todo = True @classmethod def steps(cls): diff --git a/vulnerabilities/pipelines/v2_importers/epss_importer_v2.py b/vulnerabilities/pipelines/v2_importers/epss_importer_v2.py index 6f8adc6d1..007341d0c 100644 --- a/vulnerabilities/pipelines/v2_importers/epss_importer_v2.py +++ b/vulnerabilities/pipelines/v2_importers/epss_importer_v2.py @@ -30,6 +30,8 @@ class EPSSImporterPipeline(VulnerableCodeBaseImporterPipelineV2): spdx_license_expression = "unknown" importer_name = "EPSS Importer" + exclude_from_package_todo = True + precedence = 200 def advisories_count(self): diff --git a/vulnerabilities/pipelines/v2_importers/nvd_importer.py b/vulnerabilities/pipelines/v2_importers/nvd_importer.py index d689aaa05..7c5faf73e 100644 --- a/vulnerabilities/pipelines/v2_importers/nvd_importer.py +++ b/vulnerabilities/pipelines/v2_importers/nvd_importer.py @@ -71,6 +71,8 @@ class NVDImporterPipeline(VulnerableCodeBaseImporterPipelineV2): MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. """ + exclude_from_package_todo = True + precedence = 100 @classmethod diff --git a/vulnerabilities/pipelines/v2_importers/project_kb_msr2019_importer.py b/vulnerabilities/pipelines/v2_importers/project_kb_msr2019_importer.py index 1a4411acd..269c92f71 100644 --- a/vulnerabilities/pipelines/v2_importers/project_kb_msr2019_importer.py +++ b/vulnerabilities/pipelines/v2_importers/project_kb_msr2019_importer.py @@ -30,6 +30,8 @@ class ProjectKBMSR2019Pipeline(VulnerableCodeBaseImporterPipelineV2): license_url = "https://github.com/SAP/project-kb/blob/main/LICENSE.txt" repo_url = "git+https://github.com/SAP/project-kb" + exclude_from_package_todo = True + precedence = 200 @classmethod diff --git a/vulnerabilities/pipelines/v2_importers/project_kb_statements_importer.py b/vulnerabilities/pipelines/v2_importers/project_kb_statements_importer.py index 0c1c8e05d..a4200cedb 100644 --- a/vulnerabilities/pipelines/v2_importers/project_kb_statements_importer.py +++ b/vulnerabilities/pipelines/v2_importers/project_kb_statements_importer.py @@ -37,6 +37,8 @@ class ProjectKBStatementsPipeline(VulnerableCodeBaseImporterPipelineV2): license_url = "https://github.com/SAP/project-kb/blob/main/LICENSE.txt" repo_url = "git+https://github.com/SAP/project-kb@vulnerability-data" + exclude_from_package_todo = True + precedence = 200 @classmethod diff --git a/vulnerabilities/pipelines/v2_importers/suse_score_importer.py b/vulnerabilities/pipelines/v2_importers/suse_score_importer.py index 92a534ddc..299dcb256 100644 --- a/vulnerabilities/pipelines/v2_importers/suse_score_importer.py +++ b/vulnerabilities/pipelines/v2_importers/suse_score_importer.py @@ -23,6 +23,8 @@ class SUSESeverityScoreImporterPipeline(VulnerableCodeBaseImporterPipelineV2): pipeline_id = "suse_importer_v2" url = "https://ftp.suse.com/pub/projects/security/yaml/suse-cvss-scores.yaml" + exclude_from_package_todo = True + @classmethod def steps(cls): return ( diff --git a/vulnerabilities/pipelines/v2_improvers/compute_advisory_todo.py b/vulnerabilities/pipelines/v2_improvers/compute_advisory_todo.py index 981f10e92..e56083b8e 100644 --- a/vulnerabilities/pipelines/v2_improvers/compute_advisory_todo.py +++ b/vulnerabilities/pipelines/v2_improvers/compute_advisory_todo.py @@ -9,9 +9,14 @@ import json +from collections import defaultdict +from itertools import chain from aboutcode.pipeline import LoopProgress +from django.db.models import Prefetch +from django.db.models import Q from django.utils import timezone +from packageurl import PackageURL from vulnerabilities.models import AdvisoryAlias from vulnerabilities.models import AdvisoryToDoV2 @@ -19,6 +24,8 @@ from vulnerabilities.models import ToDoRelatedAdvisoryV2 from vulnerabilities.pipelines import VulnerableCodePipeline from vulnerabilities.pipes.advisory import advisories_checksum +from vulnerabilities.utils import canonical_value +from vulnerabilities.utils import normalize_text class ComputeToDo(VulnerableCodePipeline): @@ -29,7 +36,7 @@ class ComputeToDo(VulnerableCodePipeline): @classmethod def steps(cls): return ( - cls.compute_individual_advisory_todo, + # cls.compute_individual_advisory_todo, cls.detect_conflicting_advisories, ) @@ -100,29 +107,120 @@ def detect_conflicting_advisories(self): new_todos_count = 0 batch_size = 5000 - self.log(f"Cross validating advisory affected and fixed package for {aliases_count} CVEs") + self.log( + f"Cross validating advisory affected and fixed package for {aliases_count} aliases." + ) + aliases_count = AdvisoryAlias.objects.count() progress = LoopProgress( total_iterations=aliases_count, logger=self.log, progress_step=1, ) - for alias in progress.iter(aliases.iterator(chunk_size=2000)): - advisories = ( - alias.advisories.exclude( - advisory_todos__issue_type="MISSING_AFFECTED_AND_FIXED_BY_PACKAGES" - ) - .distinct() - .prefetch_related( - "impacted_packages", - ) + + advisory_qs = ( + AdvisoryV2.objects.exclude( + advisory_todos__issue_type="MISSING_AFFECTED_AND_FIXED_BY_PACKAGES" + ) + .exclude(advisory_todos__issue_type="MISSING_AFFECTED_PACKAGE") + .exclude(advisory_todos__issue_type="MISSING_FIXED_BY_PACKAGE") + .todo_excluded() + .latest_per_avid() + .distinct() + .prefetch_related( + "impacted_packages", + "impacted_packages__affecting_packages", + "impacted_packages__fixed_by_packages", ) + ) - check_conflicting_affected_and_fixed_by_packages_for_alias( - advisories=advisories, - cve=alias, - todo_to_create=todo_to_create, - advisory_relation_to_create=advisory_relation_to_create, + aliases = ( + AdvisoryAlias.objects.filter(alias__istartswith="cve") + .prefetch_related( + Prefetch("advisories", queryset=advisory_qs, to_attr="filtered_advisories") + ) + .iterator(chunk_size=50) + ) + non_cve = ( + AdvisoryAlias.objects.exclude(alias__istartswith="cve") + .prefetch_related( + Prefetch("advisories", queryset=advisory_qs, to_attr="filtered_advisories") + ) + .iterator(chunk_size=500) + ) + for alias in progress.iter(chain(aliases, non_cve)): + advisories_with_common_alias = alias.filtered_advisories or [] + duplicate_ids = [a.id for a in advisories_with_common_alias] + others = advisory_qs.filter(advisory_id=alias.alias).exclude(id__in=duplicate_ids) + if not advisories_with_common_alias and not others.exists(): + continue + + adv_purl_map = defaultdict(set) + purl_adv_map = defaultdict( + lambda: defaultdict( + lambda: { + "affected": set(), + "fixed": set(), + "impact_count": 0, + } + ) + ) + unfurled_base_purls = set() + advisories_with_common_alias.extend(others) + + if len(advisories_with_common_alias) < 2: + continue + + for advisory in advisories_with_common_alias: + for impact in advisory.impacted_packages.all(): + base_purl = impact.base_purl + adv_purl_map[advisory.avid].add(base_purl) + advisory_map = purl_adv_map[base_purl][advisory.avid] + advisory_map["affected"].update( + p.version for p in impact.affecting_packages.all() + ) + advisory_map["fixed"].update(p.version for p in impact.fixed_by_packages.all()) + advisory_map["impact_count"] += 1 + + if not impact.last_successful_range_unfurl_at: + unfurled_base_purls.add(base_purl) + + # keep only purls linked to 2+ advisories + comparable_purl_map = { + purl: value for purl, value in purl_adv_map.items() if len(value) >= 2 + } + + # if any eligible purl is not unfurled, skip + if set(comparable_purl_map) & unfurled_base_purls: + continue + + uncomparable_purls = {purl for purl, avids in purl_adv_map.items() if len(avids) < 2} + + comparable_adv_map = { + adv: (purls - uncomparable_purls) + for adv, purls in adv_purl_map.items() + if (purls - uncomparable_purls) + } + + avids_with_common_alias_and_purl = set(comparable_adv_map) + + advisory_group = { + adv.avid: adv + for adv in advisories_with_common_alias + if adv.avid in avids_with_common_alias_and_purl + } + + if not len(advisory_group) > 2: + continue + + package_conflict_count, count_conflicting_advisory = ( + check_conflicting_affected_and_fixed_by_packages_for_alias( + purl_adv_map=comparable_purl_map, + alias=alias, + advisories=advisory_group, + todo_to_create=todo_to_create, + advisory_relation_to_create=advisory_relation_to_create, + ) ) if len(todo_to_create) > batch_size: @@ -207,89 +305,96 @@ def check_missing_affected_and_fixed_by_packages( advisory_relation_to_create[todo_id] = [advisory] +def compute_version_range_disagreement(adv_map): + """Compute differences in affected and fixed version sets across advisories.""" + + affected_sets = [v["affected"] for v in adv_map.values()] + fixed_sets = [v["fixed"] for v in adv_map.values()] + + affected_union = set().union(*affected_sets) + affected_intersection = set.intersection(*affected_sets) + + fixed_union = set().union(*fixed_sets) + fixed_intersection = set.intersection(*fixed_sets) + + return { + "affected_union": affected_union, + "affected_intersection": affected_intersection, + "affected_disagreement": affected_union - affected_intersection, + "fixed_union": fixed_union, + "fixed_intersection": fixed_intersection, + "fixed_disagreement": fixed_union - fixed_intersection, + } + + def check_conflicting_affected_and_fixed_by_packages_for_alias( + purl_adv_map, + alias, advisories, - cve, todo_to_create, advisory_relation_to_create, ): """ Add appropriate AdvisoryToDo for conflicting affected/fixed packages. - Compute the comparison matrix for the given set of advisories. Iterate through each advisory - and compute and store fixed versionsrange and affected versionrange for each advisory, - keyed by purl. + Compute the comparison matrix for the given set of advisories. Iterate through each purl_adv_map + and compute and store version range disagreement for conflicting affected/fixed range keyed by PURL. - Use the matrix to determine conflicts in affected/fixed versions for each purl. If for any purl - there is more than one set of fixed versionrange or more than one set of affected versionrange, - it means the advisories have conflicting opinions on the fixed or affected packages. + Also compute partial curation advisory by merging non conflicting component of conflicting in advisory. + Conflict package details, partial curation advisory is stored in issue_detail field. - Example of comparison matrix: + Example of conflicting_package_details: { - "pkg:npm/foo/bar": { - "affected": { - Advisory1: frozenset(VersionRange1, VersionRange2), - Advisory2: frozenset(...), - }, - "fixed": { - Advisory1: frozenset(VersionRange1, VersionRange2), - Advisory2: frozenset(...), - }, + "pkg:maven/org.apache.struts/struts2-core": { + "avids": [ + "github_osv_importer_v2/GHSA-mwrx-hx6x-3hhv", + "gitlab_importer_v2/maven/org.apache.struts/struts2-core/CVE-2012-0838" + ], + "affected_union": {"2.1.8.1", "2.0.8", "2.1.2", "2.0.5", "2.0.11", "2.2.1.1", "2.2.3"}, + "affected_intersection": {"2.1.8.1", "2.0.8", "2.1.2", "2.0.5", "2.0.11", "2.2.3"}, + "affected_disagreement": {"2.2.1.1"}, + "fixed_union": {"2.2.3.1"}, + "fixed_intersection": {"2.2.3.1"}, + "fixed_disagreement": set() }, "pkg:pypi/foobar": { - "affected": { - Advisory1: frozenset(...), - Advisory2: frozenset(...), - }, - "fixed": { - Advisory1: frozenset(...), - Advisory2: frozenset(...), - }, + "avids": [ + "pypa_importer_v2/PYSEC-xxxx-18", + "pysec_importer_v2/PYSEC-xxxx-18" + ], + "affected_union": {"2.1.8.1", "2.0.8"}, + "affected_intersection": {"2.1.8.1", "2.0.8"}, + "affected_disagreement": set(), + "fixed_union": {"3.1", "3.0"}, + "fixed_intersection": {"3.1"}, + "fixed_disagreement": {"3.0"}, }, ... } """ - matrix = {} - for advisory in advisories: - advisory_id = advisory.unique_content_id - for impacted in advisory.impacted_packages.all() or []: - affected_purl = impacted.base_purl - - initialize_sub_matrix( - matrix=matrix, - affected_purl=affected_purl, - advisory=advisory, - ) - - if fixed_version_range := impacted.fixed_vers: - matrix[affected_purl]["fixed"][advisory_id].add(fixed_version_range) - - if affecting_version_range := impacted.affecting_vers: - matrix[affected_purl]["affected"][advisory_id].add(affecting_version_range) + conflicting_package_details = {} has_conflicting_affected_packages = False has_conflicting_fixed_package = False - messages = [] - for purl, board in matrix.items(): - fixed = board.get("fixed", {}).values() - impacted = board.get("affected", {}).values() - - unique_set_of_affected_vers = {frozenset(vers) for vers in impacted} - unique_set_of_fixed_vers = {frozenset(vers) for vers in fixed} + conflicting_advisories = set() + for purl, adv_map in purl_adv_map.items(): + result = compute_version_range_disagreement(adv_map) + if not (result["fixed_disagreement"] or result["affected_disagreement"]): + continue - if len(unique_set_of_affected_vers) > 1: - has_conflicting_affected_packages = True - messages.append( - f"{cve}: {purl} with conflicting affected versions {unique_set_of_affected_vers}" - ) - if len(unique_set_of_fixed_vers) > 1: + if result["fixed_disagreement"]: has_conflicting_fixed_package = True - messages.append( - f"{cve}: {purl} with conflicting fixed version {unique_set_of_fixed_vers}" - ) + if result["affected_disagreement"]: + has_conflicting_affected_packages = True + + conflicting_package_details[purl] = { + "avids": adv_map.keys(), + } + conflicting_advisories.update([advisories[avid] for avid in adv_map]) + conflicting_package_details[purl].update(result) if not has_conflicting_affected_packages and not has_conflicting_fixed_package: - return + return 0, 0 issue_type = "CONFLICTING_AFFECTED_AND_FIXED_BY_PACKAGES" if not has_conflicting_fixed_package: @@ -297,33 +402,134 @@ def check_conflicting_affected_and_fixed_by_packages_for_alias( elif not has_conflicting_affected_packages: issue_type = "CONFLICTING_FIXED_BY_PACKAGES" + conflicting_advisories = list(conflicting_advisories) + conflicting_avids = [avd.avid for avd in conflicting_advisories] + non_conflicting_purl_avid_map = get_best_impact_for_non_conflicting_purls( + purl_adv_map, + conflicting_package_details, + conflicting_avids, + ) + partial_merged_advisory = merged_advisory(conflicting_advisories, non_conflicting_purl_avid_map) + issue_detail = { - "Conflict summary": messages, - "Conflict matrix": matrix, + "alias": alias.alias, + "conflict_matrix": conflicting_package_details, + "partial_merged_advisory": partial_merged_advisory, } - todo_id = advisories_checksum(advisories) + todo_id = advisories_checksum(conflicting_advisories) todo = AdvisoryToDoV2( related_advisories_id=todo_id, issue_type=issue_type, issue_detail=json.dumps(issue_detail, default=list), ) todo_to_create.append(todo) - advisory_relation_to_create[todo_id] = list(advisories) + advisory_relation_to_create[todo_id] = conflicting_advisories + return len(conflicting_package_details), len(conflicting_advisories) -def initialize_sub_matrix(matrix, affected_purl, advisory): - advisory_id = advisory.unique_content_id - if affected_purl not in matrix: - matrix[affected_purl] = { - "affected": {advisory_id: set()}, - "fixed": {advisory_id: set()}, - } - else: - if advisory not in matrix[affected_purl]["affected"]: - matrix[affected_purl]["affected"][advisory_id] = set() - if advisory not in matrix[affected_purl]["fixed"]: - matrix[affected_purl]["fixed"][advisory_id] = set() + +def get_best_impact_for_non_conflicting_purls( + purl_adv_map, conflicting_package_details, conflicting_avids +): + """ + Return PURL - AVID mapping for non-conflicting packages. + + Select only one advisory per PURL based on maximum impact package count. + """ + best_purl_avid_map = {} + for purl, advs in purl_adv_map.items(): + if purl in conflicting_package_details: + continue + + candidates = [ + (avid, values["impact_count"]) + for avid, values in advs.items() + if avid in conflicting_avids + ] + + if candidates: + best_purl_avid_map[purl] = max(candidates, key=lambda x: x[1]) + return best_purl_avid_map + + +def merged_advisory(advisories, non_conflicting_purl_avid_map): + """Merge multiple advisory to one removing any duplicates or conflicting package ranges.""" + merged_adv = { + "aliases": set(), + "summary": "", + "affected_packages": [], + "references": [], + "patches": [], + "severities": [], + "weaknesses": set(), + } + + seen_affected = set() + seen_references = set() + seen_patches = set() + seen_severities = set() + seen_summaries = {} + merged_summary = [] + + for adv in advisories: + adv_dict = adv.to_advisory_data().to_dict() + + merged_adv["aliases"].update(adv_dict.get("aliases", [])) + merged_adv["weaknesses"].update(adv_dict.get("weaknesses", [])) + + if summary := adv_dict.get("summary", "").strip(): + key = normalize_text(summary) + entry = seen_summaries.setdefault(key, [summary, []]) + entry[1].append(adv.avid) + + for ref in adv_dict.get("references", []): + update_advisory_item( + item=ref, + seen_item=seen_references, + updatable=merged_adv["references"], + ) + + for patch in adv_dict.get("patches", []): + update_advisory_item( + item=patch, + seen_item=seen_patches, + updatable=merged_adv["patches"], + ) + + for sev in adv_dict.get("severities", []): + update_advisory_item( + item=sev, + seen_item=seen_severities, + updatable=merged_adv["severities"], + ) + + for affected in adv_dict.get("affected_packages", []): + base_purl = PackageURL(**affected["package"]).to_string() + if ( + base_purl in non_conflicting_purl_avid_map + and non_conflicting_purl_avid_map[base_purl][0] == adv.avid + ): + update_advisory_item( + item=affected, + seen_item=seen_affected, + updatable=merged_adv["affected_packages"], + ) + + for summary, avids in seen_summaries.values(): + merged_summary.append(f"{tuple(avids)}: {summary}") + + merged_adv["summary"] = "\n".join(merged_summary) + merged_adv["aliases"] = list(merged_adv["aliases"]) + merged_adv["weaknesses"] = list(merged_adv["weaknesses"]) + return merged_adv + + +def update_advisory_item(item, seen_item, updatable): + digest = hash(canonical_value(item)) + if digest not in seen_item: + seen_item.add(digest) + updatable.append(item) def bulk_create_with_m2m(todos, advisories, logger): diff --git a/vulnerabilities/pipes/vcs_collector_utils.py b/vulnerabilities/pipes/vcs_collector_utils.py index 54db73c1a..80c0be1d0 100644 --- a/vulnerabilities/pipes/vcs_collector_utils.py +++ b/vulnerabilities/pipes/vcs_collector_utils.py @@ -30,6 +30,8 @@ class CollectVCSFixCommitPipeline(VulnerableCodeBaseImporterPipelineV2): Pipeline to collect fix commits from any git repository. """ + exclude_from_package_todo = True + repo_url: str patterns: list[str] = [ r"\bCVE-\d{4}-\d{4,19}\b", diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index e8a13821e..3fc1a00e9 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -618,6 +618,22 @@ def normalize_list(lst): return sorted(lst) if lst else [] +def canonical_value(value): + """ + Return a canonical, order independent tuple for hashing/deduplication. + + >>> canonical_value({"b": ["k", "j"], "a": 2}) + (('a', 2), ('b', ('j', 'k'))) + >>> canonical_value([2, 1]) + (1, 2) + """ + if isinstance(value, dict): + return tuple(sorted((k, canonical_value(v)) for k, v in value.items())) + if isinstance(value, (list, set, tuple)): + return tuple(sorted(canonical_value(v) for v in value)) + return value + + def compute_content_id(advisory_data): """ Compute a unique content_id for an advisory by normalizing its data and hashing it.