diff --git a/cmd/catalogd/main.go b/cmd/catalogd/main.go index e6e4758b2..9f02f4a13 100644 --- a/cmd/catalogd/main.go +++ b/cmd/catalogd/main.go @@ -365,12 +365,27 @@ func run(ctx context.Context) error { return err } - localStorage = &storage.LocalDirV1{ - RootDir: storeDir, - RootURL: baseStorageURL, - EnableMetasHandler: features.CatalogdFeatureGate.Enabled(features.APIV1MetasHandler), + var metasMode storage.MetasHandlerMode + if features.CatalogdFeatureGate.Enabled(features.APIV1MetasHandler) { + metasMode = storage.MetasHandlerEnabled + } else { + metasMode = storage.MetasHandlerDisabled } + var graphqlMode storage.GraphQLQueriesMode + if features.CatalogdFeatureGate.Enabled(features.GraphQLCatalogQueries) { + graphqlMode = storage.GraphQLQueriesEnabled + } else { + graphqlMode = storage.GraphQLQueriesDisabled + } + + localStorage = storage.NewLocalDirV1( + storeDir, + baseStorageURL, + metasMode, + graphqlMode, + ) + // Config for the catalogd web server catalogServerConfig := serverutil.CatalogServerConfig{ ExternalAddr: cfg.externalAddr, diff --git a/docs/draft/howto/catalog-queries-graphql-endpoint.md b/docs/draft/howto/catalog-queries-graphql-endpoint.md new file mode 100644 index 000000000..34197d030 --- /dev/null +++ b/docs/draft/howto/catalog-queries-graphql-endpoint.md @@ -0,0 +1,202 @@ +# Catalog queries using GraphQL + +!!! warning "Alpha Feature" + The GraphQL endpoint is an **alpha feature** controlled by the `GraphQLCatalogQueries` feature gate. + The API and behavior may change in future releases. + +After you [add a catalog of extensions](../../tutorials/add-catalog.md) to your cluster, you can query the catalog using GraphQL for flexible, structured queries with precise field selection. + +## Prerequisites + +* You have added a ClusterCatalog of extensions, such as [OperatorHub.io](https://operatorhub.io), to your cluster. +* The `GraphQLCatalogQueries` feature gate is enabled in catalogd. + +!!! note + By default, Catalogd is installed with TLS enabled for the catalog webserver. + The following examples will show this default behavior, but for simplicity's sake will ignore TLS verification in the curl commands using the `-k` flag. + +You also need to port forward the catalog server service: + +``` terminal +kubectl -n olmv1-system port-forward svc/catalogd-service 8443:443 +``` + +## GraphQL Endpoint + +The GraphQL endpoint is available at: + +``` +https://localhost:8443/catalogs//api/v1/graphql +``` + +All queries must be sent as **HTTP POST** requests with a JSON body containing a `query` field. + +## Understanding GraphQL Field Names + +**IMPORTANT**: GraphQL field names are automatically generated from catalog schema names. + +### Naming Convention + +Schema names are converted to GraphQL field names using this process: + +1. Remove dots and special characters: `olm.bundle` → `olmbundle` +2. Convert to lowercase: `OLM.Bundle` → `olmbundle` +3. Append 's' for pluralization: `olmbundle` → `olmbundles` + +**Examples:** + +| Schema Name | GraphQL Field Name | +|-------------|-------------------| +| `olm.bundle` | `olmbundles` | +| `olm.package` | `olmpackages` | +| `olm.channel` | `olmchannels` | +| `helm.chart` | `helmcharts` | + +### Discovering Available Fields + +To find the exact field names available for your catalog, use GraphQL introspection: + +``` terminal +curl -k -X POST 'https://localhost:8443/catalogs/operatorhubio/api/v1/graphql' \ + -H "Content-Type: application/json" \ + -d '{ + "query": "{ __schema { queryType { fields { name description } } } }" + }' | jq +``` + +This returns all available query fields for the catalog, including the automatically generated schema-based fields. + +!!! warning "Pluralization Limitations" + The current implementation appends 's' to schema names for pluralization. This may not produce grammatically correct English plurals in all cases (e.g., `index` → `indexs` instead of `indices`). When creating custom schemas, use singular nouns that pluralize well with a simple 's' suffix. + +## Basic Queries + +### Catalog Summary + +Get an overview of schemas and object counts in the catalog: + +``` terminal +curl -k -X POST 'https://localhost:8443/catalogs/operatorhubio/api/v1/graphql' \ + -H "Content-Type: application/json" \ + -d '{ + "query": "{ summary { totalSchemas schemas { name totalObjects totalFields } } }" + }' | jq +``` + +### Query Bundles + +List bundles with specific fields: + +``` terminal +curl -k -X POST 'https://localhost:8443/catalogs/operatorhubio/api/v1/graphql' \ + -H "Content-Type: application/json" \ + -d '{ + "query": "{ olmbundles(limit: 5, offset: 0) { name package image } }" + }' | jq +``` + +### Query Packages + +List packages with metadata: + +``` terminal +curl -k -X POST 'https://localhost:8443/catalogs/operatorhubio/api/v1/graphql' \ + -H "Content-Type: application/json" \ + -d '{ + "query": "{ olmpackages(limit: 10) { name description defaultChannel } }" + }' | jq +``` + +### Query Channels + +List channels: + +``` terminal +curl -k -X POST 'https://localhost:8443/catalogs/operatorhubio/api/v1/graphql' \ + -H "Content-Type: application/json" \ + -d '{ + "query": "{ olmchannels { name package entries } }" + }' | jq +``` + +## Advanced Queries + +### Pagination + +All schema-based queries support pagination via `limit` and `offset` arguments: + +``` terminal +curl -k -X POST 'https://localhost:8443/catalogs/operatorhubio/api/v1/graphql' \ + -H "Content-Type: application/json" \ + -d '{ + "query": "{ olmbundles(limit: 10, offset: 20) { name } }" + }' | jq +``` + +### Nested Field Selection + +Select only the fields you need, including nested objects: + +``` terminal +curl -k -X POST 'https://localhost:8443/catalogs/operatorhubio/api/v1/graphql' \ + -H "Content-Type: application/json" \ + -d '{ + "query": "{ olmpackages { name icon { mediatype base64data } } }" + }' | jq +``` + +### Complex Bundle Properties + +Query bundle properties with their type and value fields: + +``` terminal +curl -k -X POST 'https://localhost:8443/catalogs/operatorhubio/api/v1/graphql' \ + -H "Content-Type: application/json" \ + -d '{ + "query": "{ olmbundles(limit: 5) { name properties { type value } } }" + }' | jq +``` + +**Note:** The `properties` field contains an array of objects, each with a `type` string and a `value` field that can contain complex nested data. GraphQL will return the full JSON structure for the `value` field. + +## Comparing GraphQL vs Metas Endpoint + +| Feature | GraphQL (`/api/v1/graphql`) | Metas (`/api/v1/metas`) | +|---------|---------------------------|------------------------| +| Field selection | Precise - request only needed fields | All fields always returned | +| Query complexity | Rich queries with nested objects | Simple parameter-based filtering | +| Response size | Minimal - only requested data | Full objects always returned | +| Schema discovery | Introspection built-in | External documentation needed | +| Pagination | Built-in `limit` and `offset` | Manual implementation required | +| HTTP Method | POST only | GET supported | +| Feature status | Alpha (feature gate required) | Stable | + +**When to use GraphQL:** +- You need specific fields from large objects +- You want to query related data in a single request +- You need structured, typed responses +- You're building a UI or client that benefits from precise data fetching + +**When to use Metas endpoint:** +- You need simple, stable API +- You're doing basic filtering by schema/package/name +- You want to use GET requests for caching +- You need guaranteed API stability + +## Limitations + +1. **Pluralization**: Schema names are pluralized by appending 's', which may not be grammatically correct for all words +2. **Schema naming**: Full schema names (including namespace/prefix) are preserved in field names (`olm.bundle` → `olmbundles`, not `bundles`) +3. **POST only**: GraphQL endpoint only accepts POST requests, unlike the metas endpoint which supports GET +4. **Alpha stability**: API may change in future releases while in alpha + +## Enabling the GraphQL Feature + +The GraphQL endpoint is controlled by the `GraphQLCatalogQueries` feature gate. To enable it: + +``` yaml +args: + - --feature-gates=GraphQLCatalogQueries=true +``` + +See [enable webhook support](enable-webhook-support.md) for more details on configuring feature gates. diff --git a/go.mod b/go.mod index 9fa2070c1..77e69cfb5 100644 --- a/go.mod +++ b/go.mod @@ -17,6 +17,7 @@ require ( github.com/google/go-containerregistry v0.21.4 github.com/google/renameio/v2 v2.0.2 github.com/gorilla/handlers v1.5.2 + github.com/graphql-go/graphql v0.8.1 github.com/klauspost/compress v1.18.5 github.com/opencontainers/go-digest v1.0.0 github.com/opencontainers/image-spec v1.1.1 diff --git a/go.sum b/go.sum index a8bf64ba8..3730f9be5 100644 --- a/go.sum +++ b/go.sum @@ -280,6 +280,8 @@ github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5T github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA= github.com/gosuri/uitable v0.0.4 h1:IG2xLKRvErL3uhY6e1BylFzG+aJiwQviDDTfOKeKTpY= github.com/gosuri/uitable v0.0.4/go.mod h1:tKR86bXuXPZazfOTG1FIzvjIdXzd0mo4Vtn16vt0PJo= +github.com/graphql-go/graphql v0.8.1 h1:p7/Ou/WpmulocJeEx7wjQy611rtXGQaAcXGqanuMMgc= +github.com/graphql-go/graphql v0.8.1/go.mod h1:nKiHzRM0qopJEwCITUuIsxk9PlVlwIiiI8pnJEhordQ= github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 h1:+ngKgrYPPJrOjhax5N+uePQ0Fh1Z7PheYoUI/0nzkPA= github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.1-0.20210315223345-82c243799c99 h1:JYghRBlGCZyCF2wNUJ8W0cwaQdtpcssJ4CgC406g+WU= diff --git a/helm/experimental.yaml b/helm/experimental.yaml index b158389d4..2070ae20b 100644 --- a/helm/experimental.yaml +++ b/helm/experimental.yaml @@ -23,5 +23,6 @@ options: features: enabled: - APIV1MetasHandler + - GraphQLCatalogQueries # This can be one of: standard or experimental featureSet: experimental diff --git a/internal/catalogd/features/features.go b/internal/catalogd/features/features.go index abf23083a..3dcc6857e 100644 --- a/internal/catalogd/features/features.go +++ b/internal/catalogd/features/features.go @@ -9,11 +9,13 @@ import ( ) const ( - APIV1MetasHandler = featuregate.Feature("APIV1MetasHandler") + APIV1MetasHandler = featuregate.Feature("APIV1MetasHandler") + GraphQLCatalogQueries = featuregate.Feature("GraphQLCatalogQueries") ) var catalogdFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ - APIV1MetasHandler: {Default: false, PreRelease: featuregate.Alpha}, + APIV1MetasHandler: {Default: false, PreRelease: featuregate.Alpha, LockToDefault: false}, + GraphQLCatalogQueries: {Default: false, PreRelease: featuregate.Alpha, LockToDefault: false}, } var CatalogdFeatureGate featuregate.MutableFeatureGate = featuregate.NewFeatureGate() diff --git a/internal/catalogd/graphql/README.md b/internal/catalogd/graphql/README.md new file mode 100644 index 000000000..20d26afca --- /dev/null +++ b/internal/catalogd/graphql/README.md @@ -0,0 +1,143 @@ +# GraphQL Integration + +This package provides dynamic GraphQL schema generation for operator catalog data, integrated into the catalogd storage server. + +⚠️ **Alpha Feature**: This is an experimental feature controlled by the `GraphQLCatalogQueries` feature gate. See user documentation at `docs/draft/howto/catalog-queries-graphql-endpoint.md`. + +## Usage + +The GraphQL endpoint is now available as part of the catalogd storage server at: + +``` +/catalogs/{catalog}/api/v1/graphql +``` + +Where `{catalog}` is replaced by the actual catalog name at runtime. + +## Example Usage + +### Making a GraphQL Request + +```bash +curl -X POST http://localhost:8080/catalogs/my-catalog/api/v1/graphql \ + -H "Content-Type: application/json" \ + -d '{ + "query": "{ summary { totalSchemas schemas { name totalObjects totalFields } } }" + }' +``` + +### Sample Queries + +#### Get catalog summary: +```graphql +{ + summary { + totalSchemas + schemas { + name + totalObjects + totalFields + } + } +} +``` + +#### Get bundles with pagination: +```graphql +{ + olmbundles(limit: 5, offset: 0) { + name + package + version + } +} +``` + +#### Get packages: +```graphql +{ + olmpackages(limit: 10) { + name + description + } +} +``` + +#### Get channels: +```graphql +{ + olmchannels(limit: 10) { + name + package + entries + } +} +``` + +## Features + +- **Dynamic Schema Generation**: Automatically discovers schema structure from catalog metadata +- **Nested Object Support**: Handles complex nested structures like bundle properties and related images +- **Pagination**: Built-in limit/offset pagination for all queries +- **Field Name Sanitization**: Converts JSON field names to valid GraphQL identifiers +- **Catalog-Specific**: Each catalog gets its own dynamically generated schema +- **Query Performance**: Pre-parsed objects cached during schema build eliminate JSON unmarshaling overhead + +## Integration + +The GraphQL functionality is integrated across multiple packages: + +- `internal/catalogd/server/handlers.go`: `CatalogHandlers.handleV1GraphQL()` handles POST requests to the GraphQL endpoint +- `internal/catalogd/storage/localdir.go`: `LocalDirV1.GetCatalogFS()` creates filesystem interface for catalog data +- `internal/catalogd/service/graphql_service.go`: `GraphQLService.GetSchema()` and `buildSchemaFromFS()` build dynamic GraphQL schemas for specific catalogs + +## Technical Details + +- Uses `declcfg.WalkMetasFS` to discover schema structure from catalog metadata +- Generates GraphQL object types dynamically from discovered fields +- Handles nested objects (arrays of objects) by creating dynamic nested types +- Pre-parses all catalog objects during schema build and caches them for fast query execution +- Supports all standard GraphQL features including introspection + +## Field Naming Conventions + +### Schema to GraphQL Field Name Mapping + +**IMPORTANT**: GraphQL field names are automatically generated from schema names using the following convention: + +1. **Remove dots and special characters** - `olm.bundle` becomes `olmbundle` +2. **Convert to lowercase** - `OLM.Bundle` becomes `olmbundle` +3. **Append 's' for pluralization** - `olmbundle` becomes `olmbundles` + +**Examples:** +- `olm.bundle` → `olmbundles` +- `olm.package` → `olmpackages` +- `olm.channel` → `olmchannels` +- `helm.chart` → `helmcharts` +- `custom.operator` → `customoperators` + +### Limitations and Considerations + +⚠️ **Pluralization Limitations**: The current implementation blindly appends 's' to create plural field names. This approach has known limitations: + +1. **English grammar rules not applied**: Words ending in 's', 'x', 'z', 'ch', 'sh' should use 'es', but currently just get 's' appended +2. **Irregular plurals not supported**: Schema names like `person`, `child`, `index` will become `persons`, `childs`, `indexs` instead of proper English plurals +3. **Non-English schema names**: Schemas using non-English words will not follow appropriate pluralization rules for their language +4. **Already-plural names**: If a schema name is already plural, it will still get 's' appended + +**Recommendations for schema naming:** +- Use schema names that work well with simple 's' pluralization (e.g., `bundle`, `package`, `chart`) +- Avoid schema names that are already plural or have irregular plural forms +- Document the expected GraphQL field names in your catalog documentation +- Use GraphQL introspection to discover actual field names: `{ __schema { queryType { fields { name } } } }` + +### Field Name Sanitization + +All field names within objects are sanitized to be valid GraphQL identifiers: + +- Special characters (dots, hyphens, etc.) are replaced with underscores +- CamelCase conversion: `package-name` → `packageName`, `default_channel` → `defaultChannel` +- Names starting with numbers get `field_` prefix: `123invalid` → `field_123invalid` +- Empty or invalid names default to `value` + +See `remapFieldName()` function for complete logic. \ No newline at end of file diff --git a/internal/catalogd/graphql/discovery_test.go b/internal/catalogd/graphql/discovery_test.go new file mode 100644 index 000000000..c08ef4a3f --- /dev/null +++ b/internal/catalogd/graphql/discovery_test.go @@ -0,0 +1,307 @@ +package graphql + +import ( + "testing" + + "github.com/operator-framework/operator-registry/alpha/declcfg" +) + +func TestFieldNameRemapping_EdgeCases(t *testing.T) { + testCases := []struct { + input string + expected string + }{ + {"name", "name"}, + {"package-name", "packageName"}, + {"default_channel", "defaultChannel"}, + {"related-images", "relatedImages"}, + {"", "value"}, + {"123invalid", "field_123invalid"}, + {"my.field.name", "myFieldName"}, + {"CamelCase", "camelCase"}, + {"UPPERCASE", "uppercase"}, + {"mixed_case-field.name", "mixedCaseFieldName"}, + {"spec.template.spec.containers", "specTemplateSpecContainers"}, + {"metadata.annotations.description", "metadataAnnotationsDescription"}, + {"operators.operatorframework.io/bundle.channels.v1", "operatorsOperatorframeworkIoBundleChannelsV1"}, + {"---", "field_"}, + {"123", "field_123"}, + {"field@#$%", "field"}, + } + + for _, tc := range testCases { + result := remapFieldName(tc.input) + if result != tc.expected { + t.Errorf("remapFieldName(%q) = %q, expected %q", tc.input, result, tc.expected) + } + } +} + +func TestSanitizeTypeName_EdgeCases(t *testing.T) { + testCases := []struct { + input string + expected string + }{ + {"olm.package", "OlmPackage"}, + {"olm.gvk", "OlmGvk"}, + {"some-type", "SomeType"}, + {"complex.type-name_here", "ComplexTypeNameHere"}, + {"", "Unknown"}, + {"123invalid", "Invalid"}, + {"operators.operatorframework.io/bundle.channels.v1", "OperatorsOperatorframeworkIoBundleChannelsV1"}, + {"@#$%", "Unknown"}, + {"_____", "Unknown"}, + {"ABC", "Abc"}, + {"lowercase", "Lowercase"}, + } + + for _, tc := range testCases { + result := sanitizeTypeName(tc.input) + if result != tc.expected { + t.Errorf("sanitizeTypeName(%q) = %q, expected %q", tc.input, result, tc.expected) + } + } +} + +func TestAnalyzeJSONObject_FieldTypes(t *testing.T) { + testObj := map[string]interface{}{ + "name": "test-package", + "version": "1.0.0", + "count": 42, + "active": true, + "tags": []interface{}{"tag1", "tag2"}, + "numbers": []interface{}{1, 2, 3}, + "nested": map[string]interface{}{"key": "value"}, + "nullField": nil, + "emptyArray": []interface{}{}, + "floatValue": 3.14, + "mixedArray": []interface{}{"string", 123, true}, + } + + info := &SchemaInfo{ + Fields: make(map[string]*FieldInfo), + } + + analyzeJSONObject(testObj, info) + + // Check that all fields were discovered + expectedFieldCount := len(testObj) + if len(info.Fields) != expectedFieldCount { + t.Errorf("Expected %d fields discovered, got %d", expectedFieldCount, len(info.Fields)) + } + + // Check specific field types + testField := func(origName string, shouldBeArray bool) { + graphqlField := remapFieldName(origName) + fieldInfo, exists := info.Fields[graphqlField] + if !exists { + t.Errorf("Field %s (mapped to %s) not discovered", origName, graphqlField) + return + } + + if fieldInfo.IsArray != shouldBeArray { + t.Errorf("Field %s array status: expected %v, got %v", graphqlField, shouldBeArray, fieldInfo.IsArray) + } + + if len(fieldInfo.SampleValues) == 0 { + t.Errorf("No sample values recorded for field %s", graphqlField) + } + } + + testField("name", false) + testField("count", false) + testField("active", false) + testField("tags", true) + testField("numbers", true) + testField("emptyArray", true) +} + +func TestBundlePropertiesAnalysis_ComprehensiveTypes(t *testing.T) { + // Test that properties field is discovered with nested structure + bundleObj := map[string]interface{}{ + "name": "test-bundle", + "package": "test-package", + "properties": []interface{}{ + map[string]interface{}{ + "type": "olm.package", + "value": map[string]interface{}{ + "packageName": "test-package", + "version": "1.0.0", + }, + }, + map[string]interface{}{ + "type": "olm.gvk", + "value": map[string]interface{}{ + "group": "example.com", + "version": "v1", + "kind": "TestResource", + }, + }, + }, + } + + info := &SchemaInfo{ + Fields: make(map[string]*FieldInfo), + } + + // Use the generic field analysis (not bundle-specific) + analyzeJSONObject(bundleObj, info) + + // Check that properties field was discovered + propertiesField, exists := info.Fields[remapFieldName("properties")] + if !exists { + t.Error("properties field not discovered") + return + } + + // Verify it's detected as an array + if !propertiesField.IsArray { + t.Error("properties field should be detected as an array") + } + + // Verify nested fields were discovered + if propertiesField.NestedFields == nil { + t.Error("properties field should have nested fields discovered") + return + } + + // Check for common property fields (type, value) + expectedFields := []string{"type", "value"} + for _, field := range expectedFields { + fieldName := remapFieldName(field) + if _, exists := propertiesField.NestedFields[fieldName]; !exists { + t.Errorf("Expected nested field %s not found in properties", fieldName) + } + } +} + +func TestSchemaDiscovery_RealWorldExample(t *testing.T) { + // Test with more realistic catalog data + packageMeta := &declcfg.Meta{ + Schema: declcfg.SchemaPackage, + Package: "nginx-ingress-operator", + Name: "nginx-ingress-operator", + Blob: []byte(`{ + "defaultChannel": "alpha", + "icon": { + "base64data": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==", + "mediatype": "image/png" + }, + "name": "nginx-ingress-operator", + "schema": "olm.package" + }`), + } + + channelMeta := &declcfg.Meta{ + Schema: declcfg.SchemaChannel, + Package: "nginx-ingress-operator", + Name: "alpha", + Blob: []byte(`{ + "entries": [ + {"name": "nginx-ingress-operator.v0.0.1"}, + {"name": "nginx-ingress-operator.v0.0.2", "replaces": "nginx-ingress-operator.v0.0.1"} + ], + "name": "alpha", + "package": "nginx-ingress-operator", + "schema": "olm.channel" + }`), + } + + bundleMeta := &declcfg.Meta{ + Schema: declcfg.SchemaBundle, + Package: "nginx-ingress-operator", + Name: "nginx-ingress-operator.v0.0.2", + Blob: []byte(`{ + "image": "quay.io/operatorhubio/nginx-ingress-operator@sha256:abc123", + "name": "nginx-ingress-operator.v0.0.2", + "package": "nginx-ingress-operator", + "properties": [ + { + "type": "olm.package", + "value": { + "packageName": "nginx-ingress-operator", + "version": "0.0.2" + } + }, + { + "type": "olm.gvk", + "value": { + "group": "k8s.nginx.org", + "kind": "NginxIngress", + "version": "v1" + } + }, + { + "type": "olm.bundle.mediatype", + "value": "registry+v1" + } + ], + "relatedImages": [ + { + "image": "quay.io/operatorhubio/nginx-ingress-operator@sha256:abc123", + "name": "operator" + } + ], + "schema": "olm.bundle" + }`), + } + + testMetas := []*declcfg.Meta{packageMeta, channelMeta, bundleMeta} + + catalogSchema, err := DiscoverSchemaFromMetas(testMetas) + if err != nil { + t.Fatalf("Failed to discover schema: %v", err) + } + + // Validate the results + if len(catalogSchema.Schemas) != 3 { + t.Errorf("Expected 3 schemas, got %d", len(catalogSchema.Schemas)) + } + + // Check bundle property discovery + bundleSchema := catalogSchema.Schemas[declcfg.SchemaBundle] + if bundleSchema == nil { + t.Fatal("Bundle schema not found") + } + + // With the schema-agnostic approach, we verify the properties field has nested structure + propertiesField, exists := bundleSchema.Fields[remapFieldName("properties")] + if !exists { + t.Error("properties field not found in bundle schema") + return + } + + if !propertiesField.IsArray { + t.Error("properties field should be an array") + } + if len(propertiesField.NestedFields) == 0 { + t.Error("properties field should have nested fields") + return + } + + // Verify common property fields + for _, field := range []string{"type", "value"} { + if _, exists := propertiesField.NestedFields[remapFieldName(field)]; !exists { + t.Errorf("Expected field %s not found in properties", field) + } + } + + // Validate that complex fields are properly mapped + packageSchema := catalogSchema.Schemas[declcfg.SchemaPackage] + if packageSchema == nil { + t.Fatal("Package schema not found") + } + + // Check that icon field exists (it's a complex object) + if _, exists := packageSchema.Fields["icon"]; !exists { + t.Error("Icon field not discovered in package schema") + } + + // Validate total object counts + if packageSchema.TotalObjects != 1 { + t.Errorf("Expected 1 package, got %d", packageSchema.TotalObjects) + } + if bundleSchema.TotalObjects != 1 { + t.Errorf("Expected 1 bundle, got %d", bundleSchema.TotalObjects) + } +} diff --git a/internal/catalogd/graphql/graphql.go b/internal/catalogd/graphql/graphql.go new file mode 100644 index 000000000..cd788a199 --- /dev/null +++ b/internal/catalogd/graphql/graphql.go @@ -0,0 +1,711 @@ +package graphql + +import ( + "context" + "encoding/json" + "fmt" + "io/fs" + "reflect" + "regexp" + "strings" + + "github.com/graphql-go/graphql" + + "github.com/operator-framework/operator-registry/alpha/declcfg" +) + +// Pre-compiled regex patterns to avoid repeated compilation in hot paths +var ( + invalidCharsRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) + consecutiveUnderscoresRE = regexp.MustCompile(`_+`) + startsWithLetterRE = regexp.MustCompile(`^[a-zA-Z]`) + alphanumericOnlyRE = regexp.MustCompile(`[^a-zA-Z0-9]`) + leadingDigitsRE = regexp.MustCompile(`^[0-9]+`) +) + +// FieldInfo represents discovered field information +type FieldInfo struct { + Name string + GraphQLType graphql.Type + JSONType reflect.Kind + IsArray bool + SampleValues []interface{} + NestedFields map[string]*FieldInfo // For array-of-objects, stores object structure +} + +// SchemaInfo holds discovered schema information +type SchemaInfo struct { + Fields map[string]*FieldInfo + TotalObjects int + SampleObject map[string]interface{} +} + +// CatalogSchema holds the complete discovered schema +type CatalogSchema struct { + Schemas map[string]*SchemaInfo // schema name -> info +} + +// DynamicSchema holds the generated GraphQL schema and metadata +type DynamicSchema struct { + Schema graphql.Schema + CatalogSchema *CatalogSchema + ParsedObjects map[string][]map[string]interface{} // Pre-parsed JSON objects cached during schema build + // Performance optimization: ParsedObjects avoids json.Unmarshal on every GraphQL query. + // Objects are parsed once during schema build and cached for all subsequent queries. + // Memory cost: ~same as storing raw blobs (parsed maps ≈ JSON size in memory). + // For 1000 bundles @ 5KB each: ~5MB, same as raw metadata storage. + // Performance gain: Eliminates N × json.Unmarshal operations per query (where N = returned objects). +} + +// remapFieldName converts field names to valid GraphQL camelCase identifiers +func remapFieldName(name string) string { + // Handle empty names + if name == "" { + return "value" + } + + // Replace invalid characters with underscores + clean := invalidCharsRE.ReplaceAllString(name, "_") + + // Collapse multiple consecutive underscores + clean = consecutiveUnderscoresRE.ReplaceAllString(clean, "_") + + // Trim leading underscores only (keep trailing to detect them) + clean = strings.TrimLeft(clean, "_") + + // Split on underscores and camelCase + parts := strings.Split(clean, "_") + result := "" + isFirst := true + for _, part := range parts { + // Skip empty parts (from consecutive or trailing underscores) + if part == "" { + continue + } + + if isFirst { + // For the first part, check if it's all uppercase + if strings.ToUpper(part) == part { + // If all uppercase, convert entirely to lowercase + result = strings.ToLower(part) + } else { + // Otherwise, make only the first character lowercase + result = strings.ToLower(string(part[0])) + part[1:] + } + isFirst = false + } else { + // For subsequent parts, capitalize first letter, lowercase rest + result += strings.ToUpper(string(part[0])) + strings.ToLower(part[1:]) + } + } + + // Ensure it starts with a letter + if result == "" || !startsWithLetterRE.MatchString(result) { + result = "field_" + result + } + + return result +} + +// jsonTypeToGraphQL maps JSON types to GraphQL types +func jsonTypeToGraphQL(jsonType reflect.Kind, isArray bool) graphql.Type { + var baseType graphql.Type + + switch jsonType { + case reflect.String: + baseType = graphql.String + case reflect.Bool: + baseType = graphql.Boolean + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + baseType = graphql.Int + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + baseType = graphql.Int + case reflect.Float32, reflect.Float64: + baseType = graphql.Float + default: + // For complex types, use String as fallback (JSON serialized) + baseType = graphql.String + } + + if isArray { + return graphql.NewList(baseType) + } + return baseType +} + +// determineFieldType determines the JSON type and array status of a value +func determineFieldType(value interface{}) (reflect.Kind, bool) { + if value == nil { + return reflect.String, false + } + + valueType := reflect.TypeOf(value) + if valueType.Kind() != reflect.Slice { + return valueType.Kind(), false + } + + // Handle slice types + slice := reflect.ValueOf(value) + if slice.Len() == 0 { + return reflect.String, true + } + + firstElem := slice.Index(0).Interface() + if firstElem == nil { + return reflect.String, true + } + + return reflect.TypeOf(firstElem).Kind(), true +} + +// analyzeFieldValue analyzes a field value and returns type info, sample value, and nested fields +func analyzeFieldValue(value interface{}) (reflect.Kind, bool, interface{}, map[string]*FieldInfo) { + if value == nil { + return reflect.String, false, value, nil + } + + valueType := reflect.TypeOf(value) + if valueType.Kind() != reflect.Slice { + return valueType.Kind(), false, value, nil + } + + // Handle slice types + slice := reflect.ValueOf(value) + if slice.Len() == 0 { + return reflect.String, true, value, nil + } + + firstElem := slice.Index(0).Interface() + if firstElem == nil { + return reflect.String, true, value, nil + } + + jsonType := reflect.TypeOf(firstElem).Kind() + + // If array element is an object, analyze its structure + var nestedFields map[string]*FieldInfo + if jsonType == reflect.Map { + if elemObj, ok := firstElem.(map[string]interface{}); ok { + nestedFields = analyzeNestedObject(elemObj) + } + } + + return jsonType, true, firstElem, nestedFields +} + +// analyzeNestedObject analyzes a nested object and returns its field structure +func analyzeNestedObject(obj map[string]interface{}) map[string]*FieldInfo { + fields := make(map[string]*FieldInfo) + + for key, value := range obj { + fieldName := remapFieldName(key) + jsonType, isArray := determineFieldType(value) + + fields[fieldName] = &FieldInfo{ + Name: fieldName, + GraphQLType: jsonTypeToGraphQL(jsonType, isArray), + JSONType: jsonType, + IsArray: isArray, + SampleValues: []interface{}{value}, + } + } + + return fields +} + +// mergeNestedFields merges discovered nested fields into existing ones +func mergeNestedFields(existing, new map[string]*FieldInfo) { + for fieldName, newInfo := range new { + if existingInfo, ok := existing[fieldName]; ok { + // Merge sample values + for _, sample := range newInfo.SampleValues { + existingInfo.SampleValues = appendUnique(existingInfo.SampleValues, sample) + } + } else { + existing[fieldName] = newInfo + } + } +} + +// analyzeJSONObject analyzes a JSON object and extracts field information +func analyzeJSONObject(obj map[string]interface{}, info *SchemaInfo) { + if info.Fields == nil { + info.Fields = make(map[string]*FieldInfo) + } + + for key, value := range obj { + fieldName := remapFieldName(key) + + // Determine type, array status, sample value, and nested structure + jsonType, isArray, sampleValue, nestedFields := analyzeFieldValue(value) + + // Update or create field info + existing, ok := info.Fields[fieldName] + if !ok { + info.Fields[fieldName] = &FieldInfo{ + Name: fieldName, + GraphQLType: jsonTypeToGraphQL(jsonType, isArray), + JSONType: jsonType, + IsArray: isArray, + SampleValues: []interface{}{sampleValue}, + NestedFields: nestedFields, + } + continue + } + + // Update existing field + existing.SampleValues = appendUnique(existing.SampleValues, sampleValue) + + // Merge nested fields if discovered + if nestedFields == nil { + continue + } + if existing.NestedFields == nil { + existing.NestedFields = nestedFields + } else { + mergeNestedFields(existing.NestedFields, nestedFields) + } + } +} + +// appendUnique adds a value to slice if not already present, using JSON string as key for uniqueness +func appendUnique(slice []interface{}, value interface{}) []interface{} { + seen := make(map[string]struct{}, len(slice)) + + for _, existing := range slice { + key, err := json.Marshal(existing) + if err != nil { + continue // skip values that can't be marshaled + } + seen[string(key)] = struct{}{} + } + + valueKey, err := json.Marshal(value) + if err != nil { + return slice // skip value if it can't be marshaled + } + + if _, exists := seen[string(valueKey)]; exists { + return slice + } + + return append(slice, value) +} + +// DiscoverSchemaFromMetas analyzes Meta objects to discover schema structure +func DiscoverSchemaFromMetas(metas []*declcfg.Meta) (*CatalogSchema, error) { + catalogSchema := &CatalogSchema{ + Schemas: make(map[string]*SchemaInfo), + } + + // Process each meta object + for _, meta := range metas { + if meta.Schema == "" { + continue + } + + // Ensure schema info exists + if catalogSchema.Schemas[meta.Schema] == nil { + catalogSchema.Schemas[meta.Schema] = &SchemaInfo{ + Fields: make(map[string]*FieldInfo), + TotalObjects: 0, + } + } + + info := catalogSchema.Schemas[meta.Schema] + info.TotalObjects++ + + // Parse the JSON blob + var obj map[string]interface{} + if err := json.Unmarshal(meta.Blob, &obj); err != nil { + continue // Skip malformed objects + } + + // Store a sample object for reference + if info.SampleObject == nil { + info.SampleObject = obj + } + + // Analyze general fields (including nested structures) + analyzeJSONObject(obj, info) + } + + return catalogSchema, nil +} + +// marshalComplexValue marshals maps and slices as JSON strings +func marshalComplexValue(value interface{}) interface{} { + if value == nil { + return nil + } + + // Use reflection to detect maps and slices + v := reflect.ValueOf(value) + kind := v.Kind() + + if kind == reflect.Map || kind == reflect.Slice { + // Marshal as JSON + if jsonBytes, err := json.Marshal(value); err == nil { + return string(jsonBytes) + } + // If marshal fails, return formatted string + return fmt.Sprintf("%v", value) + } + + // For simple types, return as-is + return value +} + +// createNestedObjectType creates a GraphQL object type for nested array elements +func createNestedObjectType(typeName string, nestedFields map[string]*FieldInfo) *graphql.Object { + fields := graphql.Fields{} + + for fieldName, fieldInfo := range nestedFields { + fieldName := fieldName // Capture loop variable + fieldInfo := fieldInfo // Capture loop variable + + fields[fieldName] = &graphql.Field{ + Type: fieldInfo.GraphQLType, + Resolve: func(p graphql.ResolveParams) (interface{}, error) { + if source, ok := p.Source.(map[string]interface{}); ok { + // Try direct field name first + if value, ok := source[fieldName]; ok { + return marshalComplexValue(value), nil + } + // Then try finding by remapped name + for origKey, value := range source { + if remapFieldName(origKey) == fieldName { + return marshalComplexValue(value), nil + } + } + } + return nil, nil + }, + } + } + + return graphql.NewObject(graphql.ObjectConfig{ + Name: typeName, + Fields: fields, + }) +} + +// createFieldResolver creates a resolver function for a field name +func createFieldResolver(fieldName string) graphql.FieldResolveFn { + return func(p graphql.ResolveParams) (interface{}, error) { + source, ok := p.Source.(map[string]interface{}) + if !ok { + return nil, nil + } + + for origKey, value := range source { + if remapFieldName(origKey) == fieldName { + return value, nil + } + } + return nil, nil + } +} + +// buildGraphQLObjectType creates a GraphQL object type from discovered field info +func buildGraphQLObjectType(schemaName string, info *SchemaInfo) *graphql.Object { + fields := graphql.Fields{} + + // Add discovered fields + for fieldName, fieldInfo := range info.Fields { + fieldName := fieldName // Capture loop variable + fieldInfo := fieldInfo // Capture loop variable + + var fieldType graphql.Output + // Check if this field has nested structure (array of objects) + if len(fieldInfo.NestedFields) > 0 { + // Create a dynamic nested type + nestedTypeName := sanitizeTypeName(schemaName) + sanitizeTypeName(fieldName) + nestedType := createNestedObjectType(nestedTypeName, fieldInfo.NestedFields) + fieldType = graphql.NewList(nestedType) + } else { + // Regular field (not nested) + fieldType = fieldInfo.GraphQLType + } + + fields[fieldName] = &graphql.Field{ + Type: fieldType, + Resolve: createFieldResolver(fieldName), + } + } + + return graphql.NewObject(graphql.ObjectConfig{ + Name: sanitizeTypeName(schemaName), + Fields: fields, + }) +} + +// sanitizeTypeName converts a property type to a valid GraphQL type name +func sanitizeTypeName(propType string) string { + // Remove dots and other invalid characters, capitalize words + clean := alphanumericOnlyRE.ReplaceAllString(propType, "_") + + // Strip leading digits + clean = leadingDigitsRE.ReplaceAllString(clean, "") + + parts := strings.Split(clean, "_") + + result := "" + for _, part := range parts { + if part != "" { + result += strings.ToUpper(string(part[0])) + strings.ToLower(part[1:]) + } + } + + if result == "" { + result = "Unknown" + } + + return result +} + +// BuildDynamicGraphQLSchema creates a complete GraphQL schema from discovered structure +func BuildDynamicGraphQLSchema(catalogSchema *CatalogSchema, metasBySchema map[string][]*declcfg.Meta) (*DynamicSchema, error) { + // Pre-parse all meta blobs to avoid unmarshaling on every query + // This has minimal memory overhead (parsed objects ≈ raw blob size) + // but eliminates expensive json.Unmarshal operations from the query path + parsedObjects := make(map[string][]map[string]interface{}) + for schemaName, metas := range metasBySchema { + parsedObjects[schemaName] = make([]map[string]interface{}, 0, len(metas)) + for _, meta := range metas { + var obj map[string]interface{} + if err := json.Unmarshal(meta.Blob, &obj); err != nil { + continue // Skip malformed objects (same as runtime behavior) + } + parsedObjects[schemaName] = append(parsedObjects[schemaName], obj) + } + } + + // Build GraphQL object types for each discovered schema + objectTypes := make(map[string]*graphql.Object) + + for schemaName, schemaInfo := range catalogSchema.Schemas { + objectTypes[schemaName] = buildGraphQLObjectType(schemaName, schemaInfo) + } + + // Pre-build field name to schema name lookup map for O(1) access in resolvers + fieldNameToSchema := make(map[string]string) + for schemaName := range catalogSchema.Schemas { + sanitized := alphanumericOnlyRE.ReplaceAllString(schemaName, "") + fieldName := strings.ToLower(sanitized) + "s" // e.g., "olmbundles", "olmpackages" + fieldNameToSchema[fieldName] = schemaName + } + + // Create root query fields + queryFields := graphql.Fields{} + + for schemaName, objectType := range objectTypes { + schemaName := schemaName // Capture loop variable + objectType := objectType // Capture loop variable + // Generate GraphQL field name from schema name + // Convention: remove dots/special chars, lowercase, append 's' for pluralization + // Examples: "olm.bundle" -> "olmbundles", "helm.chart" -> "helmcharts" + // LIMITATION: Simple 's' appending doesn't follow English grammar rules or support + // non-English languages. Schemas should use names that pluralize well with 's'. + sanitized := alphanumericOnlyRE.ReplaceAllString(schemaName, "") + fieldName := strings.ToLower(sanitized) + "s" + + queryFields[fieldName] = &graphql.Field{ + Type: graphql.NewList(objectType), + Args: graphql.FieldConfigArgument{ + "limit": &graphql.ArgumentConfig{ + Type: graphql.Int, + DefaultValue: 100, + Description: "Maximum number of items to return", + }, + "offset": &graphql.ArgumentConfig{ + Type: graphql.Int, + DefaultValue: 0, + Description: "Number of items to skip", + }, + }, + Resolve: func(p graphql.ResolveParams) (interface{}, error) { + // O(1) lookup of schema name from pre-built map + currentSchemaName, ok := fieldNameToSchema[p.Info.FieldName] + if !ok { + return nil, fmt.Errorf("unknown schema for field %s", p.Info.FieldName) + } + + // Get pre-parsed objects for this schema (no unmarshaling needed!) + objects, ok := parsedObjects[currentSchemaName] + if !ok { + return []interface{}{}, nil + } + + // Parse arguments + limit, _ := p.Args["limit"].(int) + offset, _ := p.Args["offset"].(int) + + // Apply pagination to pre-parsed objects + var results []interface{} + for i, obj := range objects { + if i < offset { + continue + } + if len(results) >= limit { + break + } + results = append(results, obj) + } + + return results, nil + }, + } + } + + // Add summary field + queryFields["summary"] = &graphql.Field{ + Type: graphql.NewObject(graphql.ObjectConfig{ + Name: "CatalogSummary", + Fields: graphql.Fields{ + "totalSchemas": &graphql.Field{Type: graphql.Int}, + "schemas": &graphql.Field{ + Type: graphql.NewList(graphql.NewObject(graphql.ObjectConfig{ + Name: "SchemaSummary", + Fields: graphql.Fields{ + "name": &graphql.Field{Type: graphql.String}, + "totalObjects": &graphql.Field{Type: graphql.Int}, + "totalFields": &graphql.Field{Type: graphql.Int}, + }, + })), + }, + }, + }), + Resolve: func(p graphql.ResolveParams) (interface{}, error) { + schemas := make([]interface{}, 0, len(catalogSchema.Schemas)) + for name, info := range catalogSchema.Schemas { + schemas = append(schemas, map[string]interface{}{ + "name": name, + "totalObjects": info.TotalObjects, + "totalFields": len(info.Fields), + }) + } + + return map[string]interface{}{ + "totalSchemas": len(catalogSchema.Schemas), + "schemas": schemas, + }, nil + }, + } + + // Create root query + rootQuery := graphql.NewObject(graphql.ObjectConfig{ + Name: "Query", + Fields: queryFields, + }) + + // Build the schema + schema, err := graphql.NewSchema(graphql.SchemaConfig{ + Query: rootQuery, + }) + if err != nil { + return nil, fmt.Errorf("failed to create GraphQL schema: %w", err) + } + + return &DynamicSchema{ + Schema: schema, + CatalogSchema: catalogSchema, + ParsedObjects: parsedObjects, + }, nil +} + +// LoadAndSummarizeCatalogDynamic loads FBC using WalkMetasReader and builds dynamic GraphQL schema +func LoadAndSummarizeCatalogDynamic(catalogFS fs.FS) (*DynamicSchema, error) { + var metas []*declcfg.Meta + + // Collect all metas from the filesystem + err := declcfg.WalkMetasFS(context.Background(), catalogFS, func(path string, meta *declcfg.Meta, err error) error { + if err != nil { + return err + } + if meta != nil { + metas = append(metas, meta) + } + return nil + }) + if err != nil { + return nil, fmt.Errorf("error walking catalog metas: %w", err) + } + + // Discover schema from collected metas + catalogSchema, err := DiscoverSchemaFromMetas(metas) + if err != nil { + return nil, fmt.Errorf("error discovering schema: %w", err) + } + + // Organize metas by schema for resolvers + metasBySchema := make(map[string][]*declcfg.Meta) + for _, meta := range metas { + if meta.Schema != "" { + metasBySchema[meta.Schema] = append(metasBySchema[meta.Schema], meta) + } + } + + // Build dynamic GraphQL schema + dynamicSchema, err := BuildDynamicGraphQLSchema(catalogSchema, metasBySchema) + if err != nil { + return nil, fmt.Errorf("error building GraphQL schema: %w", err) + } + + return dynamicSchema, nil +} + +// PrintCatalogSummary prints a comprehensive summary of the discovered schema +func PrintCatalogSummary(dynamicSchema *DynamicSchema) { + catalogSchema := dynamicSchema.CatalogSchema + + // Print comprehensive summary + fmt.Printf("Dynamic GraphQL schema generation complete.\n") + fmt.Printf("Total schemas discovered: %d\n", len(catalogSchema.Schemas)) + + for schemaName, info := range catalogSchema.Schemas { + fmt.Printf("\nSchema: %s\n", schemaName) + fmt.Printf(" Objects: %d\n", info.TotalObjects) + fmt.Printf(" Fields: %d\n", len(info.Fields)) + + // Show sample fields + if len(info.Fields) > 0 { + fmt.Printf(" Sample fields: ") + count := 0 + for fieldName := range info.Fields { + if count > 0 { + fmt.Printf(", ") + } + fmt.Printf("%s", fieldName) + count++ + if count >= 5 { // Show first 5 fields + if len(info.Fields) > 5 { + fmt.Printf(", ...") + } + break + } + } + fmt.Printf("\n") + } + } + + fmt.Printf("\nGraphQL endpoints available:\n") + for schemaName := range catalogSchema.Schemas { + fmt.Printf(" - %ss\n", strings.ToLower(schemaName)) + } + fmt.Printf(" - summary\n") + + fmt.Printf("\nSample GraphQL query:\n") + fmt.Printf("{\n") + fmt.Printf(" summary {\n") + fmt.Printf(" totalSchemas\n") + fmt.Printf(" schemas { name totalObjects totalFields }\n") + fmt.Printf(" }\n") + if _, ok := catalogSchema.Schemas[declcfg.SchemaBundle]; ok { + fmt.Printf(" bundles(limit: 5) { name package }\n") + } + if _, ok := catalogSchema.Schemas[declcfg.SchemaPackage]; ok { + fmt.Printf(" packages(limit: 5) { name }\n") + } + fmt.Printf("}\n") +} diff --git a/internal/catalogd/graphql/graphql_test.go b/internal/catalogd/graphql/graphql_test.go new file mode 100644 index 000000000..3b4830225 --- /dev/null +++ b/internal/catalogd/graphql/graphql_test.go @@ -0,0 +1,317 @@ +package graphql + +import ( + "testing" + + "github.com/operator-framework/operator-registry/alpha/declcfg" +) + +func TestDiscoverSchemaFromMetas(t *testing.T) { + // Create test metas simulating real catalog data + testMetas := []*declcfg.Meta{ + { + Schema: declcfg.SchemaPackage, + Package: "test-package", + Name: "test-package", + Blob: []byte(`{ + "schema": "olm.package", + "name": "test-package", + "defaultChannel": "stable", + "icon": { + "base64data": "...", + "mediatype": "image/svg+xml" + }, + "description": "A test package" + }`), + }, + { + Schema: declcfg.SchemaChannel, + Package: "test-package", + Name: "stable", + Blob: []byte(`{ + "schema": "olm.channel", + "name": "stable", + "package": "test-package", + "entries": [ + {"name": "test-package.v1.0.0"}, + {"name": "test-package.v1.1.0", "replaces": "test-package.v1.0.0"} + ] + }`), + }, + { + Schema: declcfg.SchemaBundle, + Package: "test-package", + Name: "test-package.v1.0.0", + Blob: []byte(`{ + "schema": "olm.bundle", + "name": "test-package.v1.0.0", + "package": "test-package", + "image": "registry.io/test-package@sha256:abc123", + "properties": [ + { + "type": "olm.package", + "value": { + "packageName": "test-package", + "version": "1.0.0" + } + }, + { + "type": "olm.gvk", + "value": { + "group": "example.com", + "version": "v1", + "kind": "TestResource" + } + } + ], + "relatedImages": [ + { + "name": "operator", + "image": "registry.io/test-package@sha256:abc123" + } + ] + }`), + }, + } + + // Test schema discovery + catalogSchema, err := DiscoverSchemaFromMetas(testMetas) + if err != nil { + t.Fatalf("Failed to discover schema: %v", err) + } + + // Validate discovered schemas + if len(catalogSchema.Schemas) != 3 { + t.Errorf("Expected 3 schemas, got %d", len(catalogSchema.Schemas)) + } + + // Test package schema + packageSchema, ok := catalogSchema.Schemas[declcfg.SchemaPackage] + if !ok { + t.Error("Package schema not discovered") + } else { + if packageSchema.TotalObjects != 1 { + t.Errorf("Expected 1 package object, got %d", packageSchema.TotalObjects) + } + if len(packageSchema.Fields) == 0 { + t.Error("No fields discovered for package schema") + } + + // Check for expected fields + expectedFields := []string{"name", "defaultChannel", "icon", "description", "schema"} + for _, field := range expectedFields { + graphqlField := remapFieldName(field) + if _, exists := packageSchema.Fields[graphqlField]; !exists { + t.Errorf("Expected field %s (mapped to %s) not found in package schema", field, graphqlField) + } + } + } + + // Test bundle schema with properties + bundleSchema, ok := catalogSchema.Schemas[declcfg.SchemaBundle] + if !ok { + t.Error("Bundle schema not discovered") + return + } + + if bundleSchema.TotalObjects != 1 { + t.Errorf("Expected 1 bundle object, got %d", bundleSchema.TotalObjects) + } + + // Check that properties field is discovered with nested structure + propertiesField, exists := bundleSchema.Fields[remapFieldName("properties")] + if !exists { + t.Error("properties field not discovered in bundle schema") + return + } + if !propertiesField.IsArray { + t.Error("properties field should be an array") + return + } + if len(propertiesField.NestedFields) == 0 { + t.Error("properties field should have nested fields discovered") + return + } + + // Check for typical property fields (type, value) + expectedFields := []string{"type", "value"} + for _, field := range expectedFields { + if _, exists := propertiesField.NestedFields[remapFieldName(field)]; !exists { + t.Errorf("Expected nested field %s not found in properties", field) + } + } + + // Test channel schema + channelSchema, ok := catalogSchema.Schemas[declcfg.SchemaChannel] + if !ok { + t.Error("Channel schema not discovered") + } else { + if channelSchema.TotalObjects != 1 { + t.Errorf("Expected 1 channel object, got %d", channelSchema.TotalObjects) + } + } +} + +func TestFieldNameRemapping(t *testing.T) { + testCases := []struct { + input string + expected string + }{ + {"name", "name"}, + {"package-name", "packageName"}, + {"default_channel", "defaultChannel"}, + {"related-images", "relatedImages"}, + {"", "value"}, + {"123invalid", "field_123invalid"}, + {"my.field.name", "myFieldName"}, + {"CamelCase", "camelCase"}, + {"UPPERCASE", "uppercase"}, + {"mixed_case-field.name", "mixedCaseFieldName"}, + } + + for _, tc := range testCases { + result := remapFieldName(tc.input) + if result != tc.expected { + t.Errorf("remapFieldName(%q) = %q, expected %q", tc.input, result, tc.expected) + } + } +} + +func TestSanitizeTypeName(t *testing.T) { + testCases := []struct { + input string + expected string + }{ + {"olm.package", "OlmPackage"}, + {"olm.gvk", "OlmGvk"}, + {"some-type", "SomeType"}, + {"complex.type-name_here", "ComplexTypeNameHere"}, + {"", "Unknown"}, + {"123invalid", "Invalid"}, + } + + for _, tc := range testCases { + result := sanitizeTypeName(tc.input) + if result != tc.expected { + t.Errorf("sanitizeTypeName(%q) = %q, expected %q", tc.input, result, tc.expected) + } + } +} + +func TestAnalyzeJSONObject(t *testing.T) { + testObj := map[string]interface{}{ + "name": "test-package", + "version": "1.0.0", + "count": 42, + "active": true, + "tags": []interface{}{"tag1", "tag2"}, + "numbers": []interface{}{1, 2, 3}, + "nested": map[string]interface{}{"key": "value"}, + "nullField": nil, + "emptyArray": []interface{}{}, + } + + info := &SchemaInfo{ + Fields: make(map[string]*FieldInfo), + } + + analyzeJSONObject(testObj, info) + + // Check that all fields were discovered + expectedFields := map[string]string{ + "name": "string", + "version": "string", + "count": "int", + "active": "bool", + "tags": "[]string", + "numbers": "[]int", + "nested": "string", // Complex objects become strings + "nullField": "string", // Null becomes string + "emptyArray": "[]string", + } + + for origField, expectedType := range expectedFields { + graphqlField := remapFieldName(origField) + fieldInfo, exists := info.Fields[graphqlField] + if !exists { + t.Errorf("Field %s (mapped to %s) not discovered", origField, graphqlField) + continue + } + + // Type checking would require GraphQL types, so we just check that it was analyzed + if len(fieldInfo.SampleValues) == 0 { + t.Errorf("No sample values recorded for field %s", graphqlField) + } + + _ = expectedType // We can't easily test GraphQL types without the library + } +} + +// TestBundlePropertiesAnalysis tests the analysis of complex bundle properties +func TestBundlePropertiesAnalysis(t *testing.T) { + bundleObj := map[string]interface{}{ + "name": "test-bundle", + "package": "test-package", + "properties": []interface{}{ + map[string]interface{}{ + "type": "olm.package", + "value": map[string]interface{}{ + "packageName": "test-package", + "version": "1.0.0", + }, + }, + map[string]interface{}{ + "type": "olm.gvk", + "value": map[string]interface{}{ + "group": "example.com", + "version": "v1", + "kind": "TestResource", + }, + }, + map[string]interface{}{ + "type": "olm.csv.metadata", + "value": map[string]interface{}{ + "name": "test-operator", + "namespace": "test-namespace", + "annotations": map[string]interface{}{ + "description": "A test operator", + }, + }, + }, + }, + } + + info := &SchemaInfo{ + Fields: make(map[string]*FieldInfo), + } + + // Use the generic field analysis (not bundle-specific) + analyzeJSONObject(bundleObj, info) + + // Check that properties field was discovered + propertiesField, exists := info.Fields[remapFieldName("properties")] + if !exists { + t.Error("properties field not discovered") + return + } + + // Verify it's detected as an array + if !propertiesField.IsArray { + t.Error("properties field should be detected as an array") + } + + // Verify nested fields were discovered + if propertiesField.NestedFields == nil { + t.Error("properties field should have nested fields discovered") + return + } + + // Check for common property fields (type, value) + expectedFields := []string{"type", "value"} + for _, field := range expectedFields { + fieldName := remapFieldName(field) + if _, exists := propertiesField.NestedFields[fieldName]; !exists { + t.Errorf("Expected nested field %s not found in properties", fieldName) + } + } +} diff --git a/internal/catalogd/graphql/sample-queries.txt b/internal/catalogd/graphql/sample-queries.txt new file mode 100644 index 000000000..93594ae08 --- /dev/null +++ b/internal/catalogd/graphql/sample-queries.txt @@ -0,0 +1,412 @@ +GraphQL Query Examples for Catalog Data +========================================== + +This file contains common GraphQL queries for examining catalog objects +served via the /graphql endpoint. The dynamic schema adapts to your catalog +structure, so field availability may vary based on your specific data. + +Basic Summary Queries +-------------------- + +# Get overall catalog summary +{ + summary { + totalSchemas + schemas { + name + totalObjects + totalFields + } + } +} + +# Get schema information with more details +{ + summary { + totalSchemas + schemas { + name + totalObjects + totalFields + } + } +} + +Basic Object Queries +------------------- + +# Get first 10 packages +{ + packages(limit: 10) { + name + defaultChannel + icon + description + } +} + +# Get packages with pagination +{ + packages(limit: 5, offset: 10) { + name + defaultChannel + description + } +} + +# Get first 10 bundles with basic info +{ + bundles(limit: 10) { + name + package + version + image + skipRange + } +} + +# Get channels +{ + channels(limit: 10) { + name + package + entries + } +} + +Bundle Property Queries +---------------------- + +# Get bundles with all properties +{ + bundles(limit: 5) { + name + package + version + properties { + type + value { + ... on PropertyValueFeaturesOperatorsOpenshiftIo { + disconnected + cnf + cni + csi + fips + proxy + tlsProfiles + tokenAuthentication + } + ... on PropertyValueOlmGvk { + group + version + kind + } + ... on PropertyValueOlmPackage { + packageName + version + } + ... on PropertyValueOlmSkips { + value + } + ... on PropertyValueOlmSkipRange { + value + } + } + } + } +} + +# Specific query for OpenShift features properties +{ + bundles(limit: 20) { + name + package + version + properties { + type + value { + ... on PropertyValueFeaturesOperatorsOpenshiftIo { + disconnected + cnf + cni + csi + fips + proxy + tlsProfiles + tokenAuthentication + } + } + } + } +} + +# Query bundles with GVK properties +{ + bundles(limit: 10) { + name + package + properties { + type + value { + ... on PropertyValueOlmGvk { + group + version + kind + } + } + } + } +} + +# Query bundles with package requirements +{ + bundles(limit: 10) { + name + package + version + properties { + type + value { + ... on PropertyValueOlmPackageRequired { + packageName + versionRange + } + ... on PropertyValueOlmPackage { + packageName + version + } + } + } + } +} + +Complex Nested Queries +---------------------- + +# Comprehensive bundle analysis +{ + bundles(limit: 5) { + name + package + version + image + skipRange + replaces + properties { + type + value { + ... on PropertyValueFeaturesOperatorsOpenshiftIo { + disconnected + cnf + cni + csi + fips + proxy + tlsProfiles + tokenAuthentication + } + ... on PropertyValueOlmGvk { + group + version + kind + } + ... on PropertyValueOlmPackage { + packageName + version + } + ... on PropertyValueOlmSkips { + value + } + ... on PropertyValueOlmSkipRange { + value + } + ... on PropertyValueOlmBundle { + name + version + } + } + } + } +} + +# Search for specific OpenShift capabilities +{ + bundles(limit: 50) { + name + package + version + properties { + type + value { + ... on PropertyValueFeaturesOperatorsOpenshiftIo { + disconnected + fips + proxy + cnf + } + } + } + } +} + +Filtering and Analysis Queries +----------------------------- + +# Get packages and their default channels +{ + packages(limit: 20) { + name + defaultChannel + description + } + channels(limit: 30) { + name + package + entries + } +} + +# Get bundles with specific fields +{ + bundles(limit: 15) { + name + package + version + image + csvDescription + skipRange + replaces + relatedImages + } +} + +# Large dataset exploration +{ + bundles(limit: 100, offset: 0) { + name + package + version + } +} + +Schema Discovery Queries +----------------------- + +# Explore available schemas and their object counts +{ + summary { + totalSchemas + schemas { + name + totalObjects + totalFields + } + } +} + +# Minimal query to check endpoint availability +{ + summary { + totalSchemas + } +} + +Property Type Analysis +--------------------- + +# Focus on OpenShift features across all bundles +{ + bundles(limit: 100) { + name + package + properties { + type + value { + ... on PropertyValueFeaturesOperatorsOpenshiftIo { + disconnected + cnf + cni + csi + fips + proxy + tlsProfiles + tokenAuthentication + } + } + } + } +} + +# Check for specific property types +{ + bundles(limit: 50) { + name + package + properties { + type + # The value will be resolved based on the type + } + } +} + +Performance Queries +------------------ + +# Small result set for quick testing +{ + packages(limit: 3) { + name + } + bundles(limit: 3) { + name + package + } +} + +# Larger result set for comprehensive analysis +{ + bundles(limit: 200) { + name + package + version + properties { + type + value { + ... on PropertyValueFeaturesOperatorsOpenshiftIo { + disconnected + fips + } + ... on PropertyValueOlmGvk { + group + kind + } + } + } + } +} + +Notes on Usage +-------------- + +1. Property union types are dynamically generated based on your catalog data. + The examples above assume common property types like: + - features.operators.openshift.io + - olm.gvk + - olm.package + - olm.bundle + - olm.skips + - olm.skipRange + +2. Field names are automatically converted to camelCase for GraphQL compatibility. + Original JSON field names like "csv-description" become "csvDescription". + +3. Use the summary query first to understand what schemas and fields are + available in your specific catalog. + +4. Pagination is available on all list endpoints using limit and offset parameters. + +5. The property value union types allow type-safe access to different property + structures while maintaining flexibility for unknown property types. + +6. For large catalogs, start with small limit values and increase as needed + to avoid overwhelming responses. \ No newline at end of file diff --git a/internal/catalogd/server/handlers.go b/internal/catalogd/server/handlers.go new file mode 100644 index 000000000..3ad4cf0ad --- /dev/null +++ b/internal/catalogd/server/handlers.go @@ -0,0 +1,296 @@ +package server + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "io/fs" + "net/http" + "net/url" + "os" + "strings" + + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/validation" + "k8s.io/klog/v2" + + "github.com/operator-framework/operator-controller/internal/catalogd/service" +) + +var ( + errInvalidParams = errors.New("invalid parameters") + errInvalidCatalogName = errors.New("invalid catalog name") +) + +// MetasHandlerMode controls whether the metas API endpoint is enabled +type MetasHandlerMode bool + +const ( + MetasHandlerDisabled MetasHandlerMode = false + MetasHandlerEnabled MetasHandlerMode = true +) + +// GraphQLQueriesMode controls whether GraphQL queries are enabled +type GraphQLQueriesMode bool + +const ( + GraphQLQueriesDisabled GraphQLQueriesMode = false + GraphQLQueriesEnabled GraphQLQueriesMode = true +) + +// CatalogHandlers handles HTTP requests for catalog content +type CatalogHandlers struct { + store CatalogStore + graphqlSvc service.GraphQLService + rootURL *url.URL + enableMetas MetasHandlerMode + enableGraphQL GraphQLQueriesMode +} + +// Index provides methods for looking up catalog content by schema/package/name +type Index interface { + Get(catalogFile io.ReaderAt, schema, pkg, name string) io.Reader +} + +// CatalogStore defines the storage interface needed by handlers +type CatalogStore interface { + // GetCatalogData returns the catalog file and its metadata + GetCatalogData(catalog string) (*os.File, os.FileInfo, error) + + // GetCatalogFS returns a filesystem interface for the catalog + GetCatalogFS(catalog string) (fs.FS, error) + + // GetIndex returns the index for a catalog (if metas handler is enabled) + GetIndex(catalog string) (Index, error) +} + +// NewCatalogHandlers creates a new HTTP handlers instance +func NewCatalogHandlers(store CatalogStore, graphqlSvc service.GraphQLService, rootURL *url.URL, enableMetas MetasHandlerMode, enableGraphQL GraphQLQueriesMode) *CatalogHandlers { + return &CatalogHandlers{ + store: store, + graphqlSvc: graphqlSvc, + rootURL: rootURL, + enableMetas: enableMetas, + enableGraphQL: enableGraphQL, + } +} + +// Handler returns an HTTP handler with all routes configured +func (h *CatalogHandlers) Handler() http.Handler { + mux := http.NewServeMux() + + mux.HandleFunc(h.rootURL.JoinPath("{catalog}", "api", "v1", "all").Path, h.handleV1All) + if h.enableMetas { + mux.HandleFunc(h.rootURL.JoinPath("{catalog}", "api", "v1", "metas").Path, h.handleV1Metas) + } + if h.enableGraphQL { + mux.HandleFunc(h.rootURL.JoinPath("{catalog}", "api", "v1", "graphql").Path, h.handleV1GraphQL) + } + + return allowedMethodsHandler(mux, http.MethodGet, http.MethodHead, http.MethodPost) +} + +// handleV1All serves the complete catalog content +func (h *CatalogHandlers) handleV1All(w http.ResponseWriter, r *http.Request) { + catalog := r.PathValue("catalog") + if err := isValidCatalogName(catalog); err != nil { + httpError(w, err) + return + } + catalogFile, catalogStat, err := h.store.GetCatalogData(catalog) + if err != nil { + httpError(w, err) + return + } + defer catalogFile.Close() + + w.Header().Add("Content-Type", "application/jsonl") + http.ServeContent(w, r, "", catalogStat.ModTime(), catalogFile) +} + +// handleV1Metas serves filtered catalog content based on query parameters +func (h *CatalogHandlers) handleV1Metas(w http.ResponseWriter, r *http.Request) { + catalog := r.PathValue("catalog") + if err := isValidCatalogName(catalog); err != nil { + httpError(w, err) + return + } + + // Check for unexpected query parameters + expectedParams := map[string]bool{ + "schema": true, + "package": true, + "name": true, + } + + for param := range r.URL.Query() { + if !expectedParams[param] { + httpError(w, errInvalidParams) + return + } + } + catalogFile, catalogStat, err := h.store.GetCatalogData(catalog) + if err != nil { + httpError(w, err) + return + } + defer catalogFile.Close() + + w.Header().Set("Last-Modified", catalogStat.ModTime().UTC().Format(timeFormat)) + done := checkPreconditions(w, r, catalogStat.ModTime()) + if done { + return + } + + schema := r.URL.Query().Get("schema") + pkg := r.URL.Query().Get("package") + name := r.URL.Query().Get("name") + + if schema == "" && pkg == "" && name == "" { + // If no parameters are provided, return the entire catalog + serveJSONLines(w, r, catalogFile) + return + } + + idx, err := h.store.GetIndex(catalog) + if err != nil { + httpError(w, err) + return + } + indexReader := idx.Get(catalogFile, schema, pkg, name) + serveJSONLines(w, r, indexReader) +} + +// handleV1GraphQL handles GraphQL queries +func (h *CatalogHandlers) handleV1GraphQL(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "Only POST is allowed", http.StatusMethodNotAllowed) + return + } + + catalog := r.PathValue("catalog") + if err := isValidCatalogName(catalog); err != nil { + httpError(w, err) + return + } + + // Limit request body size to prevent memory exhaustion attacks (1MB limit) + r.Body = http.MaxBytesReader(w, r.Body, 1<<20) + + // Parse GraphQL query from request body + var params struct { + Query string `json:"query"` + } + if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil { + http.Error(w, "Invalid request body", http.StatusBadRequest) + return + } + + // Validate query + if params.Query == "" { + http.Error(w, "Query cannot be empty", http.StatusBadRequest) + return + } + if len(params.Query) > 100000 { // 100KB limit + http.Error(w, "Query too large", http.StatusBadRequest) + return + } + + // Get catalog filesystem + catalogFS, err := h.store.GetCatalogFS(catalog) + if err != nil { + httpError(w, err) + return + } + + // Execute GraphQL query through the service + result, err := h.graphqlSvc.ExecuteQuery(catalog, catalogFS, params.Query) + if err != nil { + httpError(w, err) + return + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(result); err != nil { + httpError(w, err) + return + } +} + +// httpError writes an HTTP error response based on the error type +func httpError(w http.ResponseWriter, err error) { + var code int + var message string + switch { + case errors.Is(err, fs.ErrNotExist): + code = http.StatusNotFound + message = fmt.Sprintf("%d %s", code, http.StatusText(code)) + case errors.Is(err, fs.ErrPermission): + code = http.StatusForbidden + message = fmt.Sprintf("%d %s", code, http.StatusText(code)) + case errors.Is(err, errInvalidParams): + code = http.StatusBadRequest + message = fmt.Sprintf("%d %s", code, http.StatusText(code)) + case errors.Is(err, errInvalidCatalogName): + code = http.StatusBadRequest + // Include detailed DNS1123 validation errors for better user feedback + message = err.Error() + default: + code = http.StatusInternalServerError + message = fmt.Sprintf("%d %s", code, http.StatusText(code)) + } + // Log 5xx errors at ERROR level, 4xx at INFO level + if code >= 500 { + klog.ErrorS(err, "HTTP error", "code", code) + } else { + klog.V(2).InfoS("HTTP client error", "code", code, "error", err.Error()) + } + http.Error(w, message, code) +} + +// serveJSONLines writes JSON lines content to the response +func serveJSONLines(w http.ResponseWriter, r *http.Request, rs io.Reader) { + w.Header().Add("Content-Type", "application/jsonl") + // Copy the content of the reader to the response writer only if it's a GET request + if r.Method == http.MethodHead { + return + } + _, err := io.Copy(w, rs) + if err != nil { + httpError(w, err) + return + } +} + +// allowedMethodsHandler wraps a handler to only allow specific HTTP methods +func allowedMethodsHandler(next http.Handler, allowedMethods ...string) http.Handler { + allowedMethodSet := sets.New[string](allowedMethods...) + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Allow POST requests only for GraphQL endpoints (paths ending with /graphql) + if r.Method == http.MethodPost { + // Check if this is the GraphQL endpoint - must end with exactly "/api/v1/graphql" + if !strings.HasSuffix(r.URL.Path, "/api/v1/graphql") { + http.Error(w, http.StatusText(http.StatusMethodNotAllowed), http.StatusMethodNotAllowed) + return + } + } else if !allowedMethodSet.Has(r.Method) { + http.Error(w, http.StatusText(http.StatusMethodNotAllowed), http.StatusMethodNotAllowed) + return + } + next.ServeHTTP(w, r) + }) +} + +// isValidCatalogName validates that a catalog name is safe for filesystem operations +// and suitable for Kubernetes metadata.name by using DNS1123 subdomain validation. +// Prevents path traversal attacks by requiring alphanumeric start/end characters. +// Returns nil if valid, or an error with detailed DNS1123 validation messages if invalid. +func isValidCatalogName(name string) error { + errs := validation.IsDNS1123Subdomain(name) + if len(errs) == 0 { + return nil + } + // Wrap errInvalidCatalogName to maintain errors.Is compatibility while adding details + return fmt.Errorf("%w: %s", errInvalidCatalogName, strings.Join(errs, "; ")) +} diff --git a/internal/catalogd/server/handlers_test.go b/internal/catalogd/server/handlers_test.go new file mode 100644 index 000000000..d565de277 --- /dev/null +++ b/internal/catalogd/server/handlers_test.go @@ -0,0 +1,303 @@ +package server + +import ( + "bytes" + "context" + "encoding/json" + "io/fs" + "net/http" + "net/http/httptest" + "net/url" + "os" + "strings" + "testing" + + "github.com/graphql-go/graphql" + + gql "github.com/operator-framework/operator-controller/internal/catalogd/graphql" +) + +// mockCatalogStore implements CatalogStore for testing +type mockCatalogStore struct { + catalogFile *os.File + catalogStat os.FileInfo + catalogFS fs.FS + getDataErr error + getFSErr error +} + +func (m *mockCatalogStore) GetCatalogData(catalog string) (*os.File, os.FileInfo, error) { + return m.catalogFile, m.catalogStat, m.getDataErr +} + +func (m *mockCatalogStore) GetCatalogFS(catalog string) (fs.FS, error) { + return m.catalogFS, m.getFSErr +} + +func (m *mockCatalogStore) GetIndex(catalog string) (Index, error) { + return nil, nil +} + +// mockGraphQLService implements service.GraphQLService for testing +type mockGraphQLService struct { + executeResult *graphql.Result + executeErr error +} + +func (m *mockGraphQLService) GetSchema(catalog string, catalogFS fs.FS) (*gql.DynamicSchema, error) { + return nil, nil +} + +func (m *mockGraphQLService) ExecuteQuery(catalog string, catalogFS fs.FS, query string) (*graphql.Result, error) { + return m.executeResult, m.executeErr +} + +func (m *mockGraphQLService) InvalidateCache(catalog string) {} + +func TestHandleV1GraphQL_MethodNotAllowed(t *testing.T) { + rootURL, _ := url.Parse("http://localhost/") + store := &mockCatalogStore{} + graphqlSvc := &mockGraphQLService{} + + handlers := NewCatalogHandlers(store, graphqlSvc, rootURL, MetasHandlerDisabled, GraphQLQueriesEnabled) + + req := httptest.NewRequest(http.MethodGet, "/test-catalog/api/v1/graphql", nil) + req.SetPathValue("catalog", "test-catalog") + w := httptest.NewRecorder() + + handlers.handleV1GraphQL(w, req) + + if w.Code != http.StatusMethodNotAllowed { + t.Errorf("Expected status %d, got %d", http.StatusMethodNotAllowed, w.Code) + } +} + +func TestHandleV1GraphQL_InvalidCatalogName(t *testing.T) { + rootURL, _ := url.Parse("http://localhost/") + store := &mockCatalogStore{} + graphqlSvc := &mockGraphQLService{} + + handlers := NewCatalogHandlers(store, graphqlSvc, rootURL, MetasHandlerDisabled, GraphQLQueriesEnabled) + + req := httptest.NewRequest(http.MethodPost, "/INVALID-CATALOG-NAME/api/v1/graphql", strings.NewReader(`{"query": "{ summary { totalSchemas } }"}`)) + req.SetPathValue("catalog", "INVALID-CATALOG-NAME") + w := httptest.NewRecorder() + + handlers.handleV1GraphQL(w, req) + + if w.Code != http.StatusBadRequest { + t.Errorf("Expected status %d, got %d", http.StatusBadRequest, w.Code) + } +} + +func TestHandleV1GraphQL_InvalidJSON(t *testing.T) { + rootURL, _ := url.Parse("http://localhost/") + store := &mockCatalogStore{} + graphqlSvc := &mockGraphQLService{} + + handlers := NewCatalogHandlers(store, graphqlSvc, rootURL, MetasHandlerDisabled, GraphQLQueriesEnabled) + + req := httptest.NewRequest(http.MethodPost, "/test-catalog/api/v1/graphql", strings.NewReader(`{invalid json`)) + req.SetPathValue("catalog", "test-catalog") + w := httptest.NewRecorder() + + handlers.handleV1GraphQL(w, req) + + if w.Code != http.StatusBadRequest { + t.Errorf("Expected status %d, got %d", http.StatusBadRequest, w.Code) + } +} + +func TestHandleV1GraphQL_EmptyQuery(t *testing.T) { + rootURL, _ := url.Parse("http://localhost/") + store := &mockCatalogStore{} + graphqlSvc := &mockGraphQLService{} + + handlers := NewCatalogHandlers(store, graphqlSvc, rootURL, MetasHandlerDisabled, GraphQLQueriesEnabled) + + req := httptest.NewRequest(http.MethodPost, "/test-catalog/api/v1/graphql", strings.NewReader(`{"query": ""}`)) + req.SetPathValue("catalog", "test-catalog") + w := httptest.NewRecorder() + + handlers.handleV1GraphQL(w, req) + + if w.Code != http.StatusBadRequest { + t.Errorf("Expected status %d, got %d", http.StatusBadRequest, w.Code) + } + if !strings.Contains(w.Body.String(), "Query cannot be empty") { + t.Errorf("Expected error message about empty query, got: %s", w.Body.String()) + } +} + +func TestHandleV1GraphQL_QueryTooLarge(t *testing.T) { + rootURL, _ := url.Parse("http://localhost/") + store := &mockCatalogStore{} + graphqlSvc := &mockGraphQLService{} + + handlers := NewCatalogHandlers(store, graphqlSvc, rootURL, MetasHandlerDisabled, GraphQLQueriesEnabled) + + // Create a query larger than 100KB + largeQuery := strings.Repeat("a", 100001) + req := httptest.NewRequest(http.MethodPost, "/test-catalog/api/v1/graphql", strings.NewReader(`{"query": "`+largeQuery+`"}`)) + req.SetPathValue("catalog", "test-catalog") + w := httptest.NewRecorder() + + handlers.handleV1GraphQL(w, req) + + if w.Code != http.StatusBadRequest { + t.Errorf("Expected status %d, got %d", http.StatusBadRequest, w.Code) + } +} + +func TestHandleV1GraphQL_BodyTooLarge(t *testing.T) { + rootURL, _ := url.Parse("http://localhost/") + store := &mockCatalogStore{} + graphqlSvc := &mockGraphQLService{} + + handlers := NewCatalogHandlers(store, graphqlSvc, rootURL, MetasHandlerDisabled, GraphQLQueriesEnabled) + + // Create a body larger than 1MB + largeBody := strings.Repeat("a", 1<<20+1) + req := httptest.NewRequest(http.MethodPost, "/test-catalog/api/v1/graphql", strings.NewReader(largeBody)) + req.SetPathValue("catalog", "test-catalog") + w := httptest.NewRecorder() + + handlers.handleV1GraphQL(w, req) + + // MaxBytesReader should cause this to fail during decode + if w.Code != http.StatusBadRequest { + t.Errorf("Expected status %d, got %d", http.StatusBadRequest, w.Code) + } +} + +func TestHandleV1GraphQL_Success(t *testing.T) { + rootURL, _ := url.Parse("http://localhost/") + + // Create a temporary directory for the mock filesystem + tmpDir := t.TempDir() + catalogFS := os.DirFS(tmpDir) + + store := &mockCatalogStore{ + catalogFS: catalogFS, + } + + expectedResult := &graphql.Result{ + Data: map[string]interface{}{ + "summary": map[string]interface{}{ + "totalSchemas": 3, + }, + }, + } + + graphqlSvc := &mockGraphQLService{ + executeResult: expectedResult, + } + + handlers := NewCatalogHandlers(store, graphqlSvc, rootURL, MetasHandlerDisabled, GraphQLQueriesEnabled) + + query := `{"query": "{ summary { totalSchemas } }"}` + req := httptest.NewRequest(http.MethodPost, "/test-catalog/api/v1/graphql", strings.NewReader(query)) + req.SetPathValue("catalog", "test-catalog") + w := httptest.NewRecorder() + + handlers.handleV1GraphQL(w, req) + + if w.Code != http.StatusOK { + t.Errorf("Expected status %d, got %d", http.StatusOK, w.Code) + } + + if w.Header().Get("Content-Type") != "application/json" { + t.Errorf("Expected Content-Type application/json, got %s", w.Header().Get("Content-Type")) + } + + var result map[string]interface{} + if err := json.NewDecoder(w.Body).Decode(&result); err != nil { + t.Fatalf("Failed to decode response: %v", err) + } + + // Verify the result structure + data, ok := result["data"].(map[string]interface{}) + if !ok { + t.Error("Expected data field in response") + } + summary, ok := data["summary"].(map[string]interface{}) + if !ok { + t.Error("Expected summary field in data") + } + totalSchemas, ok := summary["totalSchemas"].(float64) // JSON numbers decode to float64 + if !ok || totalSchemas != 3 { + t.Errorf("Expected totalSchemas to be 3, got %v", summary["totalSchemas"]) + } +} + +func TestHandleV1GraphQL_GetCatalogFSError(t *testing.T) { + rootURL, _ := url.Parse("http://localhost/") + + store := &mockCatalogStore{ + getFSErr: fs.ErrNotExist, + } + + graphqlSvc := &mockGraphQLService{} + + handlers := NewCatalogHandlers(store, graphqlSvc, rootURL, MetasHandlerDisabled, GraphQLQueriesEnabled) + + query := `{"query": "{ summary { totalSchemas } }"}` + req := httptest.NewRequest(http.MethodPost, "/test-catalog/api/v1/graphql", strings.NewReader(query)) + req.SetPathValue("catalog", "test-catalog") + w := httptest.NewRecorder() + + handlers.handleV1GraphQL(w, req) + + if w.Code != http.StatusNotFound { + t.Errorf("Expected status %d, got %d", http.StatusNotFound, w.Code) + } +} + +func TestHandleV1GraphQL_ExecuteQueryError(t *testing.T) { + rootURL, _ := url.Parse("http://localhost/") + + tmpDir := t.TempDir() + catalogFS := os.DirFS(tmpDir) + + store := &mockCatalogStore{ + catalogFS: catalogFS, + } + + graphqlSvc := &mockGraphQLService{ + executeErr: context.DeadlineExceeded, + } + + handlers := NewCatalogHandlers(store, graphqlSvc, rootURL, MetasHandlerDisabled, GraphQLQueriesEnabled) + + query := `{"query": "{ summary { totalSchemas } }"}` + req := httptest.NewRequest(http.MethodPost, "/test-catalog/api/v1/graphql", strings.NewReader(query)) + req.SetPathValue("catalog", "test-catalog") + w := httptest.NewRecorder() + + handlers.handleV1GraphQL(w, req) + + if w.Code != http.StatusInternalServerError { + t.Errorf("Expected status %d, got %d", http.StatusInternalServerError, w.Code) + } +} + +func TestAllowedMethodsHandler_POSTOnlyForGraphQL(t *testing.T) { + rootURL, _ := url.Parse("http://localhost/") + store := &mockCatalogStore{} + graphqlSvc := &mockGraphQLService{} + + handlers := NewCatalogHandlers(store, graphqlSvc, rootURL, MetasHandlerDisabled, GraphQLQueriesEnabled) + handler := handlers.Handler() + + // Test POST to GraphQL endpoint - should be allowed + graphqlReq := httptest.NewRequest(http.MethodPost, "/test-catalog/api/v1/graphql", bytes.NewReader([]byte(`{"query": "{ summary { totalSchemas } }"}`))) + graphqlReq.SetPathValue("catalog", "test-catalog") + w := httptest.NewRecorder() + handler.ServeHTTP(w, graphqlReq) + + // Should not return 405 Method Not Allowed at the router level + // (handler itself returns 405 for GET, but router allows POST through) + if w.Code == http.StatusMethodNotAllowed && strings.Contains(w.Body.String(), "Method Not Allowed") { + t.Error("POST should be allowed for GraphQL endpoint at router level") + } +} diff --git a/internal/catalogd/storage/http_preconditions_check.go b/internal/catalogd/server/http_preconditions_check.go similarity index 99% rename from internal/catalogd/storage/http_preconditions_check.go rename to internal/catalogd/server/http_preconditions_check.go index 7fb5239b5..ceabad2d3 100644 --- a/internal/catalogd/storage/http_preconditions_check.go +++ b/internal/catalogd/server/http_preconditions_check.go @@ -5,7 +5,7 @@ // Source: Originally from Go's net/http/fs.go // https://cs.opensource.google/go/go/+/master:src/net/http/fs.go -package storage +package server import ( "net/http" diff --git a/internal/catalogd/service/graphql_service.go b/internal/catalogd/service/graphql_service.go new file mode 100644 index 000000000..e142d4447 --- /dev/null +++ b/internal/catalogd/service/graphql_service.go @@ -0,0 +1,163 @@ +package service + +import ( + "context" + "fmt" + "io/fs" + "sync" + + "github.com/graphql-go/graphql" + "golang.org/x/sync/singleflight" + + "github.com/operator-framework/operator-registry/alpha/declcfg" + + gql "github.com/operator-framework/operator-controller/internal/catalogd/graphql" +) + +// GraphQLService handles GraphQL schema generation and query execution for catalogs +type GraphQLService interface { + // GetSchema returns the GraphQL schema for a catalog, using cache if available + GetSchema(catalog string, catalogFS fs.FS) (*gql.DynamicSchema, error) + + // ExecuteQuery executes a GraphQL query against a catalog + ExecuteQuery(catalog string, catalogFS fs.FS, query string) (*graphql.Result, error) + + // InvalidateCache removes the cached schema for a catalog + InvalidateCache(catalog string) +} + +// CachedGraphQLService implements GraphQLService with an in-memory schema cache +type CachedGraphQLService struct { + schemaMux sync.RWMutex + schemaCache map[string]*gql.DynamicSchema + buildGroup singleflight.Group // Prevents duplicate concurrent schema builds +} + +// NewCachedGraphQLService creates a new GraphQL service with caching +func NewCachedGraphQLService() *CachedGraphQLService { + return &CachedGraphQLService{ + schemaCache: make(map[string]*gql.DynamicSchema), + } +} + +// GetSchema returns the GraphQL schema for a catalog, using cache if available +func (s *CachedGraphQLService) GetSchema(catalog string, catalogFS fs.FS) (*gql.DynamicSchema, error) { + // Check cache first (read lock) + s.schemaMux.RLock() + if cachedSchema, ok := s.schemaCache[catalog]; ok { + s.schemaMux.RUnlock() + return cachedSchema, nil + } + s.schemaMux.RUnlock() + + // Use singleflight to prevent duplicate concurrent builds for the same catalog + result, err, _ := s.buildGroup.Do(catalog, func() (interface{}, error) { + // Double-check cache after acquiring singleflight lock + s.schemaMux.RLock() + if cachedSchema, ok := s.schemaCache[catalog]; ok { + s.schemaMux.RUnlock() + return cachedSchema, nil + } + s.schemaMux.RUnlock() + + // Schema not in cache, build it + dynamicSchema, err := buildSchemaFromFS(catalogFS) + if err != nil { + return nil, err + } + + // Cache the result (write lock) + s.schemaMux.Lock() + s.schemaCache[catalog] = dynamicSchema + s.schemaMux.Unlock() + + return dynamicSchema, nil + }) + + if err != nil { + return nil, err + } + + return result.(*gql.DynamicSchema), nil +} + +// ExecuteQuery executes a GraphQL query against a catalog +func (s *CachedGraphQLService) ExecuteQuery(catalog string, catalogFS fs.FS, query string) (*graphql.Result, error) { + // Get or build the schema (uses cache and singleflight) + dynamicSchema, err := s.GetSchema(catalog, catalogFS) + if err != nil { + return nil, fmt.Errorf("failed to get GraphQL schema: %w", err) + } + + // Execute the query + result := graphql.Do(graphql.Params{ + Schema: dynamicSchema.Schema, + RequestString: query, + }) + + return result, nil +} + +// InvalidateCache removes the cached schema for a catalog +func (s *CachedGraphQLService) InvalidateCache(catalog string) { + s.schemaMux.Lock() + delete(s.schemaCache, catalog) + s.schemaMux.Unlock() +} + +// buildSchemaFromFS builds a GraphQL schema from a catalog filesystem +func buildSchemaFromFS(catalogFS fs.FS) (*gql.DynamicSchema, error) { + var metas []*declcfg.Meta + var metasMux sync.Mutex + var walkErr error + + // Collect all metas from the catalog filesystem + // WalkMetasFS walks the filesystem concurrently, so we need to protect the metas slice and error + err := declcfg.WalkMetasFS(context.Background(), catalogFS, func(path string, meta *declcfg.Meta, err error) error { + metasMux.Lock() + defer metasMux.Unlock() + + if err != nil { + // Set shared error so other goroutines can check + if walkErr == nil { + walkErr = err + } + return err + } + + // If an error has already occurred, skip further mutation + if walkErr != nil { + return walkErr + } + + if meta != nil { + metas = append(metas, meta) + } + return nil + }) + if err != nil { + return nil, fmt.Errorf("error walking catalog metas: %w", err) + } + + // Discover schema from collected metas + catalogSchema, err := gql.DiscoverSchemaFromMetas(metas) + if err != nil { + return nil, fmt.Errorf("error discovering schema: %w", err) + } + + // Organize metas by schema for resolvers + metasBySchema := make(map[string][]*declcfg.Meta) + for _, meta := range metas { + if meta.Schema != "" { + metasBySchema[meta.Schema] = append(metasBySchema[meta.Schema], meta) + } + } + + // Build dynamic GraphQL schema + dynamicSchema, err := gql.BuildDynamicGraphQLSchema(catalogSchema, metasBySchema) + if err != nil { + return nil, fmt.Errorf("error building GraphQL schema: %w", err) + } + + return dynamicSchema, nil +} diff --git a/internal/catalogd/service/graphql_service_test.go b/internal/catalogd/service/graphql_service_test.go new file mode 100644 index 000000000..71e36967f --- /dev/null +++ b/internal/catalogd/service/graphql_service_test.go @@ -0,0 +1,307 @@ +package service + +import ( + "io/fs" + "sync" + "testing" + "testing/fstest" + "time" +) + +func TestCachedGraphQLService_CacheHit(t *testing.T) { + svc := NewCachedGraphQLService() + + // Create a test filesystem with valid catalog data + testFS := fstest.MapFS{ + "catalog.json": &fstest.MapFile{ + Data: []byte(`{ + "schema": "olm.package", + "name": "test-package", + "defaultChannel": "stable" + }`), + }, + } + + // First call - cache miss, should build schema + schema1, err := svc.GetSchema("test-catalog", testFS) + if err != nil { + t.Fatalf("First GetSchema failed: %v", err) + } + if schema1 == nil { + t.Fatal("Expected non-nil schema") + } + + // Second call - cache hit, should return same schema without rebuilding + schema2, err := svc.GetSchema("test-catalog", testFS) + if err != nil { + t.Fatalf("Second GetSchema failed: %v", err) + } + if schema2 != schema1 { + t.Error("Expected cache to return same schema instance") + } +} + +func TestCachedGraphQLService_InvalidateCache(t *testing.T) { + svc := NewCachedGraphQLService() + + testFS := fstest.MapFS{ + "catalog.json": &fstest.MapFile{ + Data: []byte(`{ + "schema": "olm.package", + "name": "test-package", + "defaultChannel": "stable" + }`), + }, + } + + // Build and cache schema + schema1, err := svc.GetSchema("test-catalog", testFS) + if err != nil { + t.Fatalf("GetSchema failed: %v", err) + } + + // Invalidate cache + svc.InvalidateCache("test-catalog") + + // Verify cache was cleared + svc.schemaMux.RLock() + _, exists := svc.schemaCache["test-catalog"] + svc.schemaMux.RUnlock() + + if exists { + t.Error("Expected cache to be cleared after InvalidateCache") + } + + // Next call should rebuild + schema2, err := svc.GetSchema("test-catalog", testFS) + if err != nil { + t.Fatalf("GetSchema after invalidation failed: %v", err) + } + if schema2 == schema1 { + t.Error("Expected new schema instance after cache invalidation") + } +} + +func TestCachedGraphQLService_ConcurrentAccess(t *testing.T) { + svc := NewCachedGraphQLService() + + testFS := fstest.MapFS{ + "catalog.json": &fstest.MapFile{ + Data: []byte(`{ + "schema": "olm.package", + "name": "test-package", + "defaultChannel": "stable" + }`), + }, + } + + // Run multiple concurrent GetSchema calls + const concurrency = 20 + var wg sync.WaitGroup + errors := make(chan error, concurrency) + schemas := make(chan *interface{}, concurrency) + + for i := 0; i < concurrency; i++ { + wg.Add(1) + go func() { + defer wg.Done() + schema, err := svc.GetSchema("test-catalog", testFS) + if err != nil { + errors <- err + return + } + // Store schema pointer as interface{} to compare instances + var schemaPtr interface{} = schema + schemas <- &schemaPtr + }() + } + + wg.Wait() + close(errors) + close(schemas) + + // Check for errors + for err := range errors { + t.Errorf("Concurrent GetSchema failed: %v", err) + } + + // All goroutines should get the same cached schema instance + var firstSchema *interface{} + schemaCount := 0 + for schema := range schemas { + schemaCount++ + if firstSchema == nil { + firstSchema = schema + } else if *schema != *firstSchema { + t.Error("Expected all concurrent calls to return same schema instance") + } + } + + if schemaCount != concurrency { + t.Errorf("Expected %d schemas, got %d", concurrency, schemaCount) + } +} + +func TestCachedGraphQLService_SingleflightDeduplication(t *testing.T) { + svc := NewCachedGraphQLService() + + // Track build count with a slow filesystem that takes time to build + var buildCount int + var buildMux sync.Mutex + + slowFS := &slowBuildFS{ + delay: 50 * time.Millisecond, + onBuild: func() { + buildMux.Lock() + buildCount++ + buildMux.Unlock() + }, + fs: fstest.MapFS{ + "catalog.json": &fstest.MapFile{ + Data: []byte(`{ + "schema": "olm.package", + "name": "test-package", + "defaultChannel": "stable" + }`), + }, + }, + } + + // Launch concurrent builds + const concurrency = 10 + var wg sync.WaitGroup + for i := 0; i < concurrency; i++ { + wg.Add(1) + go func() { + defer wg.Done() + _, _ = svc.GetSchema("test-catalog", slowFS) + }() + } + + wg.Wait() + + // Singleflight should deduplicate - only 1 build should occur + buildMux.Lock() + finalCount := buildCount + buildMux.Unlock() + + if finalCount != 1 { + t.Errorf("Expected singleflight to deduplicate builds (1 build), got %d builds", finalCount) + } +} + +func TestCachedGraphQLService_MultipleCatalogs(t *testing.T) { + svc := NewCachedGraphQLService() + + fs1 := fstest.MapFS{ + "catalog.json": &fstest.MapFile{ + Data: []byte(`{"schema": "olm.package", "name": "catalog1"}`), + }, + } + fs2 := fstest.MapFS{ + "catalog.json": &fstest.MapFile{ + Data: []byte(`{"schema": "olm.package", "name": "catalog2"}`), + }, + } + + // Build schemas for two different catalogs + schema1, err := svc.GetSchema("catalog1", fs1) + if err != nil { + t.Fatalf("GetSchema for catalog1 failed: %v", err) + } + + schema2, err := svc.GetSchema("catalog2", fs2) + if err != nil { + t.Fatalf("GetSchema for catalog2 failed: %v", err) + } + + // Schemas should be different instances + if schema1 == schema2 { + t.Error("Expected different schemas for different catalogs") + } + + // Both should be cached independently + svc.schemaMux.RLock() + _, exists1 := svc.schemaCache["catalog1"] + _, exists2 := svc.schemaCache["catalog2"] + svc.schemaMux.RUnlock() + + if !exists1 || !exists2 { + t.Error("Expected both catalogs to be cached independently") + } + + // Invalidate only catalog1 + svc.InvalidateCache("catalog1") + + svc.schemaMux.RLock() + _, exists1AfterInvalidate := svc.schemaCache["catalog1"] + _, exists2AfterInvalidate := svc.schemaCache["catalog2"] + svc.schemaMux.RUnlock() + + if exists1AfterInvalidate { + t.Error("Expected catalog1 to be removed from cache") + } + if !exists2AfterInvalidate { + t.Error("Expected catalog2 to remain in cache") + } +} + +func TestCachedGraphQLService_ExecuteQuery(t *testing.T) { + svc := NewCachedGraphQLService() + + testFS := fstest.MapFS{ + "catalog.json": &fstest.MapFile{ + Data: []byte(`{ + "schema": "olm.package", + "name": "test-package", + "defaultChannel": "stable" + }`), + }, + } + + // Execute a simple introspection query + query := `{ __schema { queryType { name } } }` + result, err := svc.ExecuteQuery("test-catalog", testFS, query) + if err != nil { + t.Fatalf("ExecuteQuery failed: %v", err) + } + + if result == nil { + t.Fatal("Expected non-nil result") + } + + // Verify no GraphQL errors + if len(result.Errors) > 0 { + t.Errorf("Expected no GraphQL errors, got: %v", result.Errors) + } + + // Verify result has data + if result.Data == nil { + t.Error("Expected result to have data") + } +} + +// slowBuildFS wraps an fs.FS and adds a delay when Open is called +// to simulate slow schema building +type slowBuildFS struct { + delay time.Duration + onBuild func() + fs fstest.MapFS + built bool + mux sync.Mutex +} + +func (s *slowBuildFS) Open(name string) (fs.File, error) { + s.mux.Lock() + // Track that a build is happening (only once per instance) + if !s.built { + if s.onBuild != nil { + s.onBuild() + } + // Simulate slow build + time.Sleep(s.delay) + s.built = true + } + s.mux.Unlock() + return s.fs.Open(name) +} diff --git a/internal/catalogd/storage/localdir.go b/internal/catalogd/storage/localdir.go index 572ffbf36..a55bc8b1f 100644 --- a/internal/catalogd/storage/localdir.go +++ b/internal/catalogd/storage/localdir.go @@ -5,7 +5,6 @@ import ( "encoding/json" "errors" "fmt" - "io" "io/fs" "net/http" "net/url" @@ -16,9 +15,25 @@ import ( "golang.org/x/sync/errgroup" "golang.org/x/sync/singleflight" - "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" "github.com/operator-framework/operator-registry/alpha/declcfg" + + "github.com/operator-framework/operator-controller/internal/catalogd/server" + "github.com/operator-framework/operator-controller/internal/catalogd/service" +) + +// Re-export enum types and constants from server package for convenience +type ( + MetasHandlerMode = server.MetasHandlerMode + GraphQLQueriesMode = server.GraphQLQueriesMode +) + +const ( + MetasHandlerDisabled = server.MetasHandlerDisabled + MetasHandlerEnabled = server.MetasHandlerEnabled + GraphQLQueriesDisabled = server.GraphQLQueriesDisabled + GraphQLQueriesEnabled = server.GraphQLQueriesEnabled ) // LocalDirV1 is a storage Instance. When Storing a new FBC contained in @@ -27,26 +42,43 @@ import ( // done so that clients accessing the content stored in RootDir/.json1 // have an atomic view of the content for a catalog. type LocalDirV1 struct { - RootDir string - RootURL *url.URL - EnableMetasHandler bool + RootDir string + RootURL *url.URL + EnableMetasHandler MetasHandlerMode + EnableGraphQLQueries GraphQLQueriesMode m sync.RWMutex - // this singleflight Group is used in `getIndex()`` to handle concurrent HTTP requests - // optimally. With the use of this slightflight group, the index is loaded from disk + // this singleflight Group is used in `GetIndex()` to handle concurrent HTTP requests + // optimally. With the use of this singleflight group, the index is loaded from disk // once per concurrent group of HTTP requests being handled by the metas handler. // The single flight instance gives us a way to load the index from disk exactly once // per concurrent group of callers, and then let every concurrent caller have access to // the loaded index. This avoids lots of unnecessary open/decode/close cycles when concurrent // requests are being handled, which improves overall performance and decreases response latency. sf singleflight.Group + + // GraphQL service for handling schema generation and caching + graphqlSvc service.GraphQLService } var ( - _ Instance = (*LocalDirV1)(nil) - errInvalidParams = errors.New("invalid parameters") + _ Instance = (*LocalDirV1)(nil) ) +// NewLocalDirV1 creates a new LocalDirV1 storage instance +func NewLocalDirV1(rootDir string, rootURL *url.URL, enableMetasHandler MetasHandlerMode, enableGraphQLQueries GraphQLQueriesMode) *LocalDirV1 { + s := &LocalDirV1{ + RootDir: rootDir, + RootURL: rootURL, + EnableMetasHandler: enableMetasHandler, + EnableGraphQLQueries: enableGraphQLQueries, + } + if enableGraphQLQueries == GraphQLQueriesEnabled { + s.graphqlSvc = service.NewCachedGraphQLService() + } + return s +} + func (s *LocalDirV1) Store(ctx context.Context, catalog string, fsys fs.FS) error { s.m.Lock() defer s.m.Unlock() @@ -109,10 +141,32 @@ func (s *LocalDirV1) Store(ctx context.Context, catalog string, fsys fs.FS) erro } catalogDir := s.catalogDir(catalog) - return errors.Join( + err = errors.Join( os.RemoveAll(catalogDir), os.Rename(tmpCatalogDir, catalogDir), ) + if err != nil { + return err + } + + // Invalidate and pre-warm GraphQL schema cache if GraphQL service is enabled + if s.graphqlSvc != nil { + s.graphqlSvc.InvalidateCache(catalog) + + // Pre-warm the GraphQL schema cache using the newly created catalog directory + // Use the actual catalog directory filesystem, not the input fsys + catalogFS := os.DirFS(catalogDir) + if _, err := s.graphqlSvc.GetSchema(catalog, catalogFS); err != nil { + // Schema build failed - rollback by removing the catalog directory + // to maintain consistency (don't persist catalog without valid schema) + if removeErr := os.RemoveAll(catalogDir); removeErr != nil { + return fmt.Errorf("failed to pre-build GraphQL schema for catalog %q: %w (rollback also failed: %v)", catalog, err, removeErr) + } + return fmt.Errorf("failed to pre-build GraphQL schema for catalog %q: %w", catalog, err) + } + } + + return nil } // removeOrphanedTempDirs removes temporary staging directories that were created by a @@ -143,6 +197,11 @@ func (s *LocalDirV1) Delete(catalog string) error { s.m.Lock() defer s.m.Unlock() + // Invalidate GraphQL cache if service is enabled + if s.graphqlSvc != nil { + s.graphqlSvc.InvalidateCache(catalog) + } + return os.RemoveAll(s.catalogDir(catalog)) } @@ -218,132 +277,59 @@ func (s *LocalDirV1) BaseURL(catalog string) string { return s.RootURL.JoinPath(catalog).String() } +// StorageServerHandler returns an HTTP handler for serving catalog content +// This implements the Instance interface for backward compatibility func (s *LocalDirV1) StorageServerHandler() http.Handler { - mux := http.NewServeMux() - - mux.HandleFunc(s.RootURL.JoinPath("{catalog}", "api", "v1", "all").Path, s.handleV1All) - if s.EnableMetasHandler { - mux.HandleFunc(s.RootURL.JoinPath("{catalog}", "api", "v1", "metas").Path, s.handleV1Metas) - } - allowedMethodsHandler := func(next http.Handler, allowedMethods ...string) http.Handler { - allowedMethodSet := sets.New[string](allowedMethods...) - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if !allowedMethodSet.Has(r.Method) { - http.Error(w, http.StatusText(http.StatusMethodNotAllowed), http.StatusMethodNotAllowed) - return - } - next.ServeHTTP(w, r) - }) - } - return allowedMethodsHandler(mux, http.MethodGet, http.MethodHead) -} - -func (s *LocalDirV1) handleV1All(w http.ResponseWriter, r *http.Request) { - s.m.RLock() - defer s.m.RUnlock() - - catalog := r.PathValue("catalog") - catalogFile, catalogStat, err := s.catalogData(catalog) - if err != nil { - httpError(w, err) - return - } - w.Header().Add("Content-Type", "application/jsonl") - http.ServeContent(w, r, "", catalogStat.ModTime(), catalogFile) + handlers := server.NewCatalogHandlers(s, s.graphqlSvc, s.RootURL, s.EnableMetasHandler, s.EnableGraphQLQueries) + return handlers.Handler() } -func (s *LocalDirV1) handleV1Metas(w http.ResponseWriter, r *http.Request) { +// GetCatalogData returns the catalog file and its metadata +// Implements server.CatalogStore interface +func (s *LocalDirV1) GetCatalogData(catalog string) (*os.File, os.FileInfo, error) { s.m.RLock() defer s.m.RUnlock() - // Check for unexpected query parameters - expectedParams := map[string]bool{ - "schema": true, - "package": true, - "name": true, - } - - for param := range r.URL.Query() { - if !expectedParams[param] { - httpError(w, errInvalidParams) - return - } - } - - catalog := r.PathValue("catalog") - catalogFile, catalogStat, err := s.catalogData(catalog) - if err != nil { - httpError(w, err) - return - } - defer catalogFile.Close() - - w.Header().Set("Last-Modified", catalogStat.ModTime().UTC().Format(timeFormat)) - done := checkPreconditions(w, r, catalogStat.ModTime()) - if done { - return - } - - schema := r.URL.Query().Get("schema") - pkg := r.URL.Query().Get("package") - name := r.URL.Query().Get("name") - - if schema == "" && pkg == "" && name == "" { - // If no parameters are provided, return the entire catalog (this is the same as /api/v1/all) - serveJSONLines(w, r, catalogFile) - return - } - idx, err := s.getIndex(catalog) - if err != nil { - httpError(w, err) - return - } - indexReader := idx.Get(catalogFile, schema, pkg, name) - serveJSONLines(w, r, indexReader) -} - -func (s *LocalDirV1) catalogData(catalog string) (*os.File, os.FileInfo, error) { catalogFile, err := os.Open(catalogFilePath(s.catalogDir(catalog))) if err != nil { return nil, nil, err } catalogFileStat, err := catalogFile.Stat() if err != nil { + if closeErr := catalogFile.Close(); closeErr != nil { + klog.ErrorS(closeErr, "failed to close catalog file after stat error") + } return nil, nil, err } return catalogFile, catalogFileStat, nil } -func httpError(w http.ResponseWriter, err error) { - var code int - switch { - case errors.Is(err, fs.ErrNotExist): - code = http.StatusNotFound - case errors.Is(err, fs.ErrPermission): - code = http.StatusForbidden - case errors.Is(err, errInvalidParams): - code = http.StatusBadRequest - default: - code = http.StatusInternalServerError - } - http.Error(w, fmt.Sprintf("%d %s", code, http.StatusText(code)), code) -} +// GetCatalogFS returns a filesystem interface for the catalog +// Implements server.CatalogStore interface +func (s *LocalDirV1) GetCatalogFS(catalog string) (fs.FS, error) { + s.m.RLock() + defer s.m.RUnlock() -func serveJSONLines(w http.ResponseWriter, r *http.Request, rs io.Reader) { - w.Header().Add("Content-Type", "application/jsonl") - // Copy the content of the reader to the response writer - // only if it's a Get request - if r.Method == http.MethodHead { - return - } - _, err := io.Copy(w, rs) + catalogDir := s.catalogDir(catalog) + info, err := os.Stat(catalogDir) if err != nil { - httpError(w, err) - return + if errors.Is(err, os.ErrNotExist) { + return nil, fs.ErrNotExist + } + return nil, err } + if !info.IsDir() { + return nil, fmt.Errorf("catalog path %q is not a directory", catalogDir) + } + return os.DirFS(catalogDir), nil } -func (s *LocalDirV1) getIndex(catalog string) (*index, error) { +// GetIndex returns the index for a catalog +// Implements server.CatalogStore interface +func (s *LocalDirV1) GetIndex(catalog string) (server.Index, error) { + s.m.RLock() + defer s.m.RUnlock() + idx, err, _ := s.sf.Do(catalog, func() (interface{}, error) { indexFile, err := os.Open(catalogIndexFilePath(s.catalogDir(catalog))) if err != nil { diff --git a/internal/catalogd/storage/localdir_test.go b/internal/catalogd/storage/localdir_test.go index 44387cb84..aa1286cc3 100644 --- a/internal/catalogd/storage/localdir_test.go +++ b/internal/catalogd/storage/localdir_test.go @@ -25,7 +25,7 @@ import ( const urlPrefix = "/catalogs/" -func TestLocalDirStoraget(t *testing.T) { +func TestLocalDirStorage(t *testing.T) { tests := []struct { name string setup func(*testing.T) (*LocalDirV1, fs.FS) @@ -35,10 +35,12 @@ func TestLocalDirStoraget(t *testing.T) { { name: "store and retrieve catalog content", setup: func(t *testing.T) (*LocalDirV1, fs.FS) { - s := &LocalDirV1{ - RootDir: t.TempDir(), - RootURL: &url.URL{Scheme: "http", Host: "test-addr", Path: urlPrefix}, - } + s := NewLocalDirV1( + t.TempDir(), + &url.URL{Scheme: "http", Host: "test-addr", Path: urlPrefix}, + MetasHandlerDisabled, + GraphQLQueriesDisabled, + ) return s, createTestFS(t) }, test: func(t *testing.T, s *LocalDirV1, fsys fs.FS) { @@ -73,10 +75,12 @@ func TestLocalDirStoraget(t *testing.T) { { name: "storing with metas handler enabled should create indices", setup: func(t *testing.T) (*LocalDirV1, fs.FS) { - s := &LocalDirV1{ - RootDir: t.TempDir(), - EnableMetasHandler: true, - } + s := NewLocalDirV1( + t.TempDir(), + nil, + MetasHandlerEnabled, + GraphQLQueriesDisabled, + ) return s, createTestFS(t) }, test: func(t *testing.T, s *LocalDirV1, fsys fs.FS) { @@ -100,7 +104,7 @@ func TestLocalDirStoraget(t *testing.T) { name: "concurrent reads during write should not cause data race", setup: func(t *testing.T) (*LocalDirV1, fs.FS) { dir := t.TempDir() - s := &LocalDirV1{RootDir: dir} + s := NewLocalDirV1(dir, nil, MetasHandlerDisabled, GraphQLQueriesDisabled) return s, createTestFS(t) }, test: func(t *testing.T, s *LocalDirV1, fsys fs.FS) { @@ -130,7 +134,7 @@ func TestLocalDirStoraget(t *testing.T) { { name: "delete nonexistent catalog", setup: func(t *testing.T) (*LocalDirV1, fs.FS) { - return &LocalDirV1{RootDir: t.TempDir()}, nil + return NewLocalDirV1(t.TempDir(), nil, MetasHandlerDisabled, GraphQLQueriesDisabled), nil }, test: func(t *testing.T, s *LocalDirV1, _ fs.FS) { err := s.Delete("nonexistent") @@ -212,7 +216,7 @@ func TestLocalDirStoraget(t *testing.T) { if err := os.Chmod(dir, 0000); err != nil { t.Fatal(err) } - return &LocalDirV1{RootDir: dir}, createTestFS(t) + return NewLocalDirV1(dir, nil, MetasHandlerDisabled, GraphQLQueriesDisabled), createTestFS(t) }, test: func(t *testing.T, s *LocalDirV1, fsys fs.FS) { err := s.Store(context.Background(), "test-catalog", fsys) @@ -239,7 +243,7 @@ func TestLocalDirStoraget(t *testing.T) { } func TestLocalDirServerHandler(t *testing.T) { - store := &LocalDirV1{RootDir: t.TempDir(), RootURL: &url.URL{Path: urlPrefix}} + store := NewLocalDirV1(t.TempDir(), &url.URL{Path: urlPrefix}, MetasHandlerDisabled, GraphQLQueriesDisabled) if store.Store(context.Background(), "test-catalog", createTestFS(t)) != nil { t.Fatal("failed to store test catalog and start server") } @@ -340,11 +344,12 @@ func TestLocalDirServerHandler(t *testing.T) { // Tests to verify the behavior of the metas endpoint, as described in // https://docs.google.com/document/d/1s6_9IFEKGQLNh3ueH7SF4Yrx4PW9NSiNFqFIJx0pU-8/ func TestMetasEndpoint(t *testing.T) { - store := &LocalDirV1{ - RootDir: t.TempDir(), - RootURL: &url.URL{Path: urlPrefix}, - EnableMetasHandler: true, - } + store := NewLocalDirV1( + t.TempDir(), + &url.URL{Path: urlPrefix}, + MetasHandlerEnabled, + GraphQLQueriesDisabled, + ) if store.Store(context.Background(), "test-catalog", createTestFS(t)) != nil { t.Fatal("failed to store test catalog") } @@ -484,11 +489,12 @@ func TestMetasEndpoint(t *testing.T) { } func TestServerLoadHandling(t *testing.T) { - store := &LocalDirV1{ - RootDir: t.TempDir(), - RootURL: &url.URL{Path: urlPrefix}, - EnableMetasHandler: true, - } + store := NewLocalDirV1( + t.TempDir(), + &url.URL{Path: urlPrefix}, + MetasHandlerEnabled, + GraphQLQueriesDisabled, + ) // Create large test data largeFS := fstest.MapFS{} diff --git a/internal/shared/util/image/pull_test.go b/internal/shared/util/image/pull_test.go index ca2cfa50a..bcdeceea4 100644 --- a/internal/shared/util/image/pull_test.go +++ b/internal/shared/util/image/pull_test.go @@ -40,7 +40,16 @@ func TestContainersImagePuller_Pull(t *testing.T) { defer shutdown() myModTime := time.Date(1985, 10, 25, 7, 53, 0, 0, time.FixedZone("PDT", -8*60*60)) - defaultContextFunc := func(context.Context) (*types.SystemContext, error) { return &types.SystemContext{}, nil } + + // Create a default context with insecure policy for tests that don't use buildSourceContextFunc + configDir := t.TempDir() + policyPath := filepath.Join(configDir, "policy.json") + insecurePolicy := `{"default":[{"type":"insecureAcceptAnything"}]}` + require.NoError(t, os.WriteFile(policyPath, []byte(insecurePolicy), 0600)) + + defaultContextFunc := func(context.Context) (*types.SystemContext, error) { + return &types.SystemContext{SignaturePolicyPath: policyPath}, nil + } testCases := []struct { name string diff --git a/manifests/experimental-e2e.yaml b/manifests/experimental-e2e.yaml index 4e19ea3f4..f903db09f 100644 --- a/manifests/experimental-e2e.yaml +++ b/manifests/experimental-e2e.yaml @@ -2647,6 +2647,7 @@ spec: - --pprof-bind-address=:6060 - --external-address=catalogd-service.olmv1-system.svc - --feature-gates=APIV1MetasHandler=true + - --feature-gates=GraphQLCatalogQueries=true - --tls-cert=/var/certs/tls.crt - --tls-key=/var/certs/tls.key - --pull-cas-dir=/var/ca-certs diff --git a/manifests/experimental.yaml b/manifests/experimental.yaml index c49ab79a0..91c92aa24 100644 --- a/manifests/experimental.yaml +++ b/manifests/experimental.yaml @@ -2566,6 +2566,7 @@ spec: - --metrics-bind-address=:7443 - --external-address=catalogd-service.olmv1-system.svc - --feature-gates=APIV1MetasHandler=true + - --feature-gates=GraphQLCatalogQueries=true - --tls-cert=/var/certs/tls.crt - --tls-key=/var/certs/tls.key - --pull-cas-dir=/var/ca-certs