diff --git a/-scripts/render-mermaid.sh b/-scripts/render-mermaid.sh new file mode 100755 index 0000000000..c568b6b529 --- /dev/null +++ b/-scripts/render-mermaid.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# +# Re-renders all .mmd Mermaid sources to .svg in the on-premises images folder. +# +# Usage (from repo root): +# ./-scripts/render-mermaid.sh +# +# Requirements: +# Node.js (npx downloads @mermaid-js/mermaid-cli automatically) +# +set -euo pipefail + +DIAGRAM_DIR="modules/ROOT/images/tinymceai-on-premises" +CONFIG_FILE=$(mktemp) + +cat > "$CONFIG_FILE" << 'JSON' +{ + "htmlLabels": false, + "flowchart": { "htmlLabels": false, "useMaxWidth": true }, + "sequence": { "useMaxWidth": true }, + "theme": "default" +} +JSON + +trap 'rm -f "$CONFIG_FILE"' EXIT + +count=0 +for mmd in "$DIAGRAM_DIR"/*.mmd; do + [ -f "$mmd" ] || continue + svg="${mmd%.mmd}.svg" + name=$(basename "$mmd") + printf " Rendering %s\n" "$name" + npx -y @mermaid-js/mermaid-cli -i "$mmd" -o "$svg" \ + -c "$CONFIG_FILE" --backgroundColor white 2>/dev/null + + # Mermaid outputs width="100%" which has no intrinsic size in tags. + # Replace with the actual pixel width from the viewBox so browsers can + # calculate the correct aspect ratio when the page scales the image. + vb_width=$(grep -o 'viewBox="[^"]*"' "$svg" | head -1 | awk -F'[ "]' '{print $4}') + if [ -n "$vb_width" ]; then + vb_int=$(printf "%.0f" "$vb_width") + perl -i -pe "s/width=\"100%\"/width=\"${vb_int}\"/" "$svg" + fi + + count=$((count + 1)) +done + +printf "\nRendered %d diagrams in %s\n" "$count" "$DIAGRAM_DIR" diff --git a/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.mmd new file mode 100644 index 0000000000..6d69c87d3a --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.mmd @@ -0,0 +1,4 @@ +flowchart LR + Editor[TinyMCE editor] <-->|chat / quick actions| AI[AI Service] + AI -->|MCP tools/call| MCP[MCP Server
knowledge-hub] + MCP -->|read| KB[Confluence ·
Notion ·
GitBook ·
internal wiki] diff --git a/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.svg new file mode 100644 index 0000000000..2ab529c4b1 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-1.svg @@ -0,0 +1 @@ +chat / quick actionsMCP tools/callreadTinyMCE editorAI ServiceMCP Serverknowledge-hubConfluence ·Notion ·GitBook ·internal wiki \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-2.mmd b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-2.mmd new file mode 100644 index 0000000000..b8eb0690a6 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-2.mmd @@ -0,0 +1,14 @@ +flowchart LR + subgraph Tenants[Your SaaS customers] + CA[Customer A users] + CB[Customer B users] + CC[Customer C users] + end + subgraph AISvc[Single AI service deployment] + EA[Environment A
access keys A
isolated conversations] + EB[Environment B
access keys B
isolated conversations] + EC[Environment C
access keys C
isolated conversations] + end + CA --> EA --> OpenAI[OpenAI] + CB --> EB --> Anthropic[Anthropic] + CC --> EC --> Azure[Azure OpenAI] diff --git a/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-2.svg b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-2.svg new file mode 100644 index 0000000000..8687168cdd --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-2.svg @@ -0,0 +1 @@ +Single AI service deploymentYour SaaS customersCustomer A usersCustomer B usersCustomer C usersEnvironment Aaccess keys Aisolated conversationsEnvironment Baccess keys Bisolated conversationsEnvironment Caccess keys Cisolated conversationsOpenAIAnthropicAzure OpenAI \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-3.mmd b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-3.mmd new file mode 100644 index 0000000000..b58745d342 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-3.mmd @@ -0,0 +1,8 @@ +flowchart LR + Lawyer[TinyMCE editor
used by lawyer] <--> AI[AI Service] + AI -->|tools/call| MCP1[MCP: contract-db] + AI -->|tools/call| MCP2[MCP: compliance-checker] + AI -->|tools/call| MCP3[MCP: precedent-search] + MCP1 --> ContractDB[(Contract clause
repository)] + MCP2 --> ComplianceRules[(Regulatory
rule sets)] + MCP3 --> PrecedentIdx[(Precedent
search index)] diff --git a/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-3.svg b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-3.svg new file mode 100644 index 0000000000..97885c4cfb --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/advanced-scenarios-fig-3.svg @@ -0,0 +1 @@ +tools/calltools/calltools/callTinyMCE editorused by lawyerAI ServiceMCP: contract-dbMCP: compliance-checkerMCP: precedent-searchContract clauserepositoryRegulatoryrule setsPrecedentsearch index \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.mmd new file mode 100644 index 0000000000..0a6555af04 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.mmd @@ -0,0 +1,28 @@ +flowchart TB + Browser["Browser
TinyMCE editor + tinymceai plugin"] + TokenEP["Your token endpoint
signs HS256 JWTs"] + Browser -->|"fetch JWT"| TokenEP + Browser -->|"HTTPS + Bearer JWT"| LB + + subgraph App["Application layer (stateless, N replicas)"] + LB["Reverse proxy / Load balancer
nginx · ALB · K8s Ingress
TLS termination · SSE pass-through"] + AIN["ai-service replica N"] + AI2["ai-service replica 2"] + AI1["ai-service replica 1"] + LB --> AIN + LB --> AI2 + LB --> AI1 + end + + subgraph Data["Shared data layer"] + DB[("SQL database
MySQL 8.0+ / PostgreSQL 13+")] + Cache[("Redis 3.2.6+")] + Storage[("File storage
S3 · Azure Blob · filesystem")] + end + + AI1 --> Data + + AI1 -->|"HTTPS"| LLM["LLM provider
OpenAI · Anthropic · Google ·
Azure · Bedrock · Vertex ·
self-hosted"] + + AI1 -.->|"telemetry"| Obs["OpenTelemetry · Langfuse"] + AI1 -.->|"tool calls"| MCP["MCP servers"] diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg new file mode 100644 index 0000000000..4828889949 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-1.svg @@ -0,0 +1 @@ +

Application layer (stateless, N replicas)

fetch JWT

HTTPS + Bearer JWT

HTTPS

telemetry

tool calls

Shared data layer

SQL database
MySQL 8.0+ / PostgreSQL 13+

Redis 3.2.6+

File storage
S3 · Azure Blob · filesystem

Browser
TinyMCE editor + tinymceai plugin

Your token endpoint
signs HS256 JWTs

Reverse proxy / Load balancer
nginx · ALB · K8s Ingress
TLS termination · SSE pass-through

ai-service replica N

ai-service replica 2

ai-service replica 1

LLM provider
OpenAI · Anthropic · Google ·
Azure · Bedrock · Vertex ·
self-hosted

OpenTelemetry · Langfuse

MCP servers

\ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.mmd new file mode 100644 index 0000000000..8fb6a35113 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.mmd @@ -0,0 +1,17 @@ +flowchart TD + Start([New deployment]) --> Q1{Evaluating or
going to production?} + Q1 -->|Evaluating locally| Compose[Docker Compose
all services on one host
Part 2 quick start] + Q1 -->|Production| Q2{Orchestrator?} + Q2 -->|Kubernetes| K8s[Kubernetes deployment
Section 33] + Q2 -->|AWS ECS / Fargate| ECS[ECS task definition
Section 34] + Q2 -->|Docker / Podman on VMs| VMs[Docker or Podman compose
Sections 9.1 / 9.2] + Q2 -->|Bare metal / no containers| Bare[Native install for
data layer; container
for AI service
Section 8.5] + Compose --> DB{Database?} + K8s --> DB + ECS --> DB + VMs --> DB + Bare --> DB + DB -->|Managed cloud DB| Managed[RDS · Cloud SQL ·
Azure Database] + DB -->|Self-managed| Self[Containers or native install] + Managed --> Done([Continue with Part 3]) + Self --> Done diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.svg new file mode 100644 index 0000000000..dcf71a1ca6 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-2.svg @@ -0,0 +1 @@ +Evaluating locallyProductionKubernetesAWS ECS / FargateDocker / Podman on VMsBare metal / no containersManaged cloud DBSelf-managedNew deploymentEvaluating orgoing to production?Docker Composeall services on one hostPart 2 quick startOrchestrator?Kubernetes deploymentSection 33ECS task definitionSection 34Docker or Podman composeSections 9.1 / 9.2Native install fordata layer; containerfor AI serviceSection 8.5Database?RDS · Cloud SQL ·Azure DatabaseContainers or native installContinue with Part 3 \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.mmd new file mode 100644 index 0000000000..e5fa13ba4f --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.mmd @@ -0,0 +1,18 @@ +flowchart TB + Internet([Internet]) --> Ingress[Ingress controller
nginx-ingress · ALB controller
proxy-buffering off] + Ingress --> SvcAI[Service: ai-service] + SvcAI --> Pod1[Pod: ai-service replica 1] + SvcAI --> Pod2[Pod: ai-service replica 2] + SvcAI --> PodN[Pod: ai-service replica N] + Pod1 --> SvcDB[Service: database
or external RDS] + Pod2 --> SvcDB + PodN --> SvcDB + Pod1 --> SvcRedis[Service: redis
or external ElastiCache] + Pod2 --> SvcRedis + PodN --> SvcRedis + Pod1 --> S3[(S3 / Azure Blob)] + Pod2 --> S3 + PodN --> S3 + HPA[HorizontalPodAutoscaler] -. scales .-> Pod1 + HPA -. scales .-> Pod2 + HPA -. scales .-> PodN diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.svg new file mode 100644 index 0000000000..a13caed517 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-3.svg @@ -0,0 +1 @@ +scalesscalesscalesInternetIngress controllernginx-ingress · ALBcontrollerproxy-buffering offService: ai-servicePod: ai-service replica 1Pod: ai-service replica 2Pod: ai-service replica NService: databaseor external RDSService: redisor external ElastiCacheS3 / Azure BlobHorizontalPodAutoscaler \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-4.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-4.mmd new file mode 100644 index 0000000000..ae5f01914b --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-4.mmd @@ -0,0 +1,25 @@ +flowchart LR + subgraph PR[PROVIDERS env var] + P1["my-openai-key
type: openai
apiKeys: [sk-...]"] + P2["my-bedrock
type: bedrock
credentials: {...}"] + P3["my-ollama
type: openai-compatible
baseUrl: .../v1"] + end + subgraph MD[MODELS env var] + M1["id: gpt-4.1
provider: my-openai-key
features: [...]"] + M2["id: us.anthropic.claude-sonnet-4-...
provider: my-bedrock
features: [...]"] + M3["id: qwen3:0.6b
provider: my-ollama
features: [...]"] + end + subgraph JWT[JWT auth.ai.permissions] + K1["ai:models:my-openai-key:gpt-4.1"] + K2["ai:models:my-bedrock:us.anthropic.claude-sonnet-4-..."] + K3["ai:models:my-ollama:qwen3:0.6b"] + end + M1 -.provider key.-> P1 + M2 -.provider key.-> P2 + M3 -.provider key.-> P3 + K1 -.gates access.-> M1 + K2 -.gates access.-> M2 + K3 -.gates access.-> M3 + M1 ==>|forwarded| LLM1[OpenAI API] + M2 ==>|forwarded| LLM2[AWS Bedrock] + M3 ==>|forwarded| LLM3[Local Ollama] diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-4.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-4.svg new file mode 100644 index 0000000000..7a119ff245 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-4.svg @@ -0,0 +1 @@ +JWT auth.ai.permissionsMODELS env varPROVIDERS env varprovider keyprovider keyprovider keygates accessgates accessgates accessforwardedforwardedforwardedmy-openai-keytype: openaiapiKeys: [sk-...]my-bedrocktype: bedrockcredentials: {...}my-ollamatype: openai-compatiblebaseUrl: .../v1id: gpt-4.1provider: my-openai-keyfeatures: [...]id:us.anthropic.claude-sonnet-4-...provider: my-bedrockfeatures: [...]id: qwen3:0.6bprovider: my-ollamafeatures: [...]ai:models:my-openai-key:gpt-4.1ai:models:my-bedrock:us.anthropic.claude-sonnet-4-...ai:models:my-ollama:qwen3:0.6bOpenAI APIAWS BedrockLocal Ollama \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-5.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-5.mmd new file mode 100644 index 0000000000..87929497d5 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-5.mmd @@ -0,0 +1,29 @@ +sequenceDiagram + autonumber + actor User + participant Editor as TinyMCE editor
tinymceai plugin + participant App as Your backend
token endpoint + participant AI as AI service + participant LLM as LLM provider + + User->>Editor: Triggers an AI feature + Editor->>App: POST /api/ai-token
session cookie or Bearer + App->>App: Authenticate the user + Note over App: Sign HS256 JWT with API Secret
aud = environment ID
sub = user ID
auth.ai.permissions = [...] + App-->>Editor: { token: eyJ... } + Editor->>AI: POST /v1/conversations/id/messages
Authorization: Bearer eyJ... + AI->>AI: Verify HS256 signature
check aud, exp, permissions + + alt Token valid and permissions allow + AI->>LLM: Forward prompt + LLM-->>AI: Stream response chunks + AI-->>Editor: SSE text-delta events + else Signature does not match + AI-->>Editor: 401 invalid-jwt-signature + else aud not registered with AI runtime + AI-->>Editor: 401 invalid-jwt-payload + else Past expiry plus 60s leeway + AI-->>Editor: 401 invalid-jwt + else Permissions do not cover action + AI-->>Editor: 200 with allowed false + end diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-5.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-5.svg new file mode 100644 index 0000000000..c800f9248a --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-5.svg @@ -0,0 +1 @@ +LLM providerAI serviceYour backendtoken endpointTinyMCE editortinymceai pluginLLM providerAI serviceYour backendtoken endpointTinyMCE editortinymceai pluginSign HS256 JWT with API Secretaud = environment IDsub = user IDauth.ai.permissions = [...]alt[Token valid and permissions allow][Signature does not match][aud not registered with AI runtime][Past expiry plus 60s leeway][Permissions do not cover action]UserTriggers an AI feature1POST /api/ai-tokensession cookie or Bearer2Authenticate the user3{ token: eyJ... }4POST /v1/conversations/id/messagesAuthorization: Bearer eyJ...5Verify HS256 signaturecheck aud, exp, permissions6Forward prompt7Stream response chunks8SSE text-delta events9401 invalid-jwt-signature10401 invalid-jwt-payload11401 invalid-jwt12200 with allowed false13User \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-6.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-6.mmd new file mode 100644 index 0000000000..452423aa61 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-6.mmd @@ -0,0 +1,20 @@ +sequenceDiagram + autonumber + actor User + participant Editor as TinyMCE + tinymceai plugin + participant Provider as tinymceai_token_provider
your function + participant App as Your backend
token endpoint + participant AI as AI service + + Note over Editor: tinymce.init runs once
plugin registers toolbar buttons + User->>Editor: Click AI button or open chat + Editor->>Provider: invoke + Provider->>App: fetch /api/ai-token
credentials include + App-->>Provider: { token eyJ... } + Provider-->>Editor: { token } + Editor->>AI: HTTPS request
Authorization Bearer eyJ... + AI-->>Editor: SSE stream + loop For each chunk + Editor->>Editor: Render streaming text + end + Note over Editor,Provider: Plugin re-invokes the provider
before token expiry
do not cache the JWT yourself diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-6.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-6.svg new file mode 100644 index 0000000000..f7f9c365fa --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-6.svg @@ -0,0 +1 @@ +AI serviceYour backendtoken endpointtinymceai_token_provideryour functionTinyMCE + tinymceai pluginAI serviceYour backendtoken endpointtinymceai_token_provideryour functionTinyMCE + tinymceai plugintinymce.init runs onceplugin registers toolbar buttonsloop[For each chunk]Plugin re-invokes the providerbefore token expirydo not cache the JWT yourselfUserClick AI button or open chat1invoke2fetch /api/ai-tokencredentials include3{ token eyJ... }4{ token }5HTTPS requestAuthorization Bearer eyJ...6SSE stream7Render streaming text8User \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-7.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-7.mmd new file mode 100644 index 0000000000..a3059f2048 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-7.mmd @@ -0,0 +1,4 @@ +flowchart LR + Editor[TinyMCE editor] <-->|"chat / quick actions"| AI[AI Service] + AI -->|"MCP tools/call"| MCP[MCP Server
knowledge-hub] + MCP -->|"read"| KB[(Confluence ·
Notion ·
GitBook ·
internal wiki)] diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-7.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-7.svg new file mode 100644 index 0000000000..3f0cb68ec5 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-7.svg @@ -0,0 +1 @@ +chat / quick actionsMCP tools/callreadTinyMCE editorAI ServiceMCP Serverknowledge-hubConfluence ·Notion ·GitBook ·internal wiki \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.mmd new file mode 100644 index 0000000000..b8eb0690a6 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.mmd @@ -0,0 +1,14 @@ +flowchart LR + subgraph Tenants[Your SaaS customers] + CA[Customer A users] + CB[Customer B users] + CC[Customer C users] + end + subgraph AISvc[Single AI service deployment] + EA[Environment A
access keys A
isolated conversations] + EB[Environment B
access keys B
isolated conversations] + EC[Environment C
access keys C
isolated conversations] + end + CA --> EA --> OpenAI[OpenAI] + CB --> EB --> Anthropic[Anthropic] + CC --> EC --> Azure[Azure OpenAI] diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.svg new file mode 100644 index 0000000000..8687168cdd --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-8.svg @@ -0,0 +1 @@ +Single AI service deploymentYour SaaS customersCustomer A usersCustomer B usersCustomer C usersEnvironment Aaccess keys Aisolated conversationsEnvironment Baccess keys Bisolated conversationsEnvironment Caccess keys Cisolated conversationsOpenAIAnthropicAzure OpenAI \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.mmd b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.mmd new file mode 100644 index 0000000000..5714d740be --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.mmd @@ -0,0 +1,20 @@ +flowchart TD + Start([Something is wrong]) --> Q1{Container is
running?
docker ps shows it} + Q1 -->|No - exited or wont pull| S1[Container startup failures] + Q1 -->|Yes| Q2{curl /health
returns 200?} + Q2 -->|No - times out or 5xx| S1 + Q2 -->|Yes| Q3{API call returns
auth error?} + Q3 -->|Yes - 401 allowed false
invalid-jwt-...| S2[API and JWT authentication] + Q3 -->|No| Q4{SSE stream
carries event error
from LLM?} + Q4 -->|Yes| S3[LLM provider errors] + Q4 -->|No| Q5{Editor side
broken?
no toolbar token 401
hanging stream} + Q5 -->|Yes| S4[Editor and front-end] + Q5 -->|No| Q6{Slow timing out
or failing under load?} + Q6 -->|Yes| S5[Performance and capacity] + Q6 -->|No| S6[Diagnostic recipes] + S1 --> Recipe([If none fit
see Diagnostic recipes
then escalate]) + S2 --> Recipe + S3 --> Recipe + S4 --> Recipe + S5 --> Recipe + S6 --> Recipe diff --git a/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.svg b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.svg new file mode 100644 index 0000000000..f611b021be --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/complete-guide-fig-9.svg @@ -0,0 +1 @@ +No - exited or wont pullYesNo - times out or 5xxYesYes - 401 allowed falseinvalid-jwt-...NoYesNoYesNoYesNoSomething is wrongContainer isrunning?docker ps shows itContainer startup failurescurl /healthreturns 200?API call returnsauth error?API and JWT authenticationSSE streamcarries event errorfrom LLM?LLM provider errorsEditor sidebroken?no toolbar token 401hanging streamEditor and front-endSlow timing outor failing under load?Performance and capacityDiagnostic recipesIf none fitsee Diagnostic recipesthen escalate \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.mmd new file mode 100644 index 0000000000..4e914e0bd2 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.mmd @@ -0,0 +1,12 @@ +flowchart TD + Start([Where will MySQL/Postgres and Redis run?]) --> Q1{Evaluating or
deploying to prod?} + Q1 -->|Evaluating locally| Compose[Docker Compose
image: mysql:8.0 or postgres:16
+ redis:7] + Q1 -->|Deploying| Q2{Cloud or self-managed?} + Q2 -->|Cloud / managed services| Managed[AWS RDS · Cloud SQL ·
Azure Database
+ ElastiCache · Memorystore ·
Azure Cache for Redis] + Q2 -->|Self-managed| Q3{Container runtime
available?} + Q3 -->|Docker or Podman| Containers[Containers on the same
network or pod as ai-service] + Q3 -->|None - bare metal or VM| Native[Native install
brew · apt · yum · dnf
service runs on host] + Compose --> Verify([Verify: nc -zv host port
then start ai-service]) + Managed --> Verify + Containers --> Verify + Native --> Verify diff --git a/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.svg new file mode 100644 index 0000000000..d0ae05837f --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/database-setup-fig-1.svg @@ -0,0 +1 @@ +Evaluating locallyDeployingCloud / managed servicesSelf-managedDocker or PodmanNone - bare metal or VMWhere will MySQL/Postgresand Redis run?Evaluating ordeploying to prod?Docker Composeimage: mysql:8.0 orpostgres:16+ redis:7Cloud or self-managed?AWS RDS · Cloud SQL ·Azure Database+ ElastiCache ·Memorystore ·Azure Cache for RedisContainer runtimeavailable?Containers on the samenetwork or pod asai-serviceNative installbrew · apt · yum · dnfservice runs on hostVerify: nc -zv host portthen start ai-service \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/eap-setup-guide-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/eap-setup-guide-fig-1.svg new file mode 100644 index 0000000000..1f7fc0d0fc --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/eap-setup-guide-fig-1.svg @@ -0,0 +1 @@ +

Data Layer

Application Layer

obtain JWT

HTTPS + JWT

HTTPS

optional

Client
(TinyMCE Editor / REST API)

JWT Endpoint
(customer-provided)

Load Balancer
(optional)

TinyMCE AI Service
(1+ container instances)

SQL Database
MySQL 8.0+ · Postgres 13+

Redis 3.2.6+

File Storage
S3 · Azure Blob · filesystem · DB

LLM Provider
(OpenAI · Anthropic · Google
Azure · Bedrock · Vertex AI · self-hosted)

OpenTelemetry / Langfuse

\ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/framework-integration-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/framework-integration-fig-1.mmd new file mode 100644 index 0000000000..9d1c8bc3e8 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/framework-integration-fig-1.mmd @@ -0,0 +1,20 @@ +sequenceDiagram + autonumber + actor User + participant Editor as TinyMCE + tinymceai plugin + participant Provider as tinymceai_token_provider
(your function) + participant App as Your backend
(token endpoint) + participant AI as AI service + + Note over Editor: tinymce.init() runs once
plugin registers toolbar buttons + User->>Editor: Click AI button or open chat + Editor->>Provider: invoke() + Provider->>App: fetch('/api/ai-token', { credentials: 'include' }) + App-->>Provider: { token: "eyJ..." } + Provider-->>Editor: { token } + Editor->>AI: HTTPS request
Authorization: Bearer eyJ... + AI-->>Editor: SSE stream + loop For each chunk + Editor->>Editor: Render streaming text + end + Note over Editor,Provider: Plugin re-invokes the provider
before token expiry — do not
cache the JWT yourself diff --git a/modules/ROOT/images/tinymceai-on-premises/framework-integration-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/framework-integration-fig-1.svg new file mode 100644 index 0000000000..a8fbe09718 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/framework-integration-fig-1.svg @@ -0,0 +1 @@ +AI serviceYour backend(token endpoint)tinymceai_token_provider(your function)TinyMCE + tinymceai pluginAI serviceYour backend(token endpoint)tinymceai_token_provider(your function)TinyMCE + tinymceai plugintinymce.init() runs onceplugin registers toolbar buttonsloop[For each chunk]Plugin re-invokes the providerbefore token expiry — do notcache the JWT yourselfUserClick AI button or open chat1invoke()2fetch('/api/ai-token', { credentials: 'include' })3{ token: "eyJ..." }4{ token }5HTTPS requestAuthorization: Bearer eyJ...6SSE stream7Render streaming text8User \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.mmd new file mode 100644 index 0000000000..587af33a91 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.mmd @@ -0,0 +1,29 @@ +sequenceDiagram + autonumber + actor User + participant Editor as TinyMCE editor
(tinymceai plugin) + participant App as Your application backend
(token endpoint) + participant AI as AI service + participant LLM as LLM provider + + User->>Editor: Triggers an AI feature + Editor->>App: POST /api/ai-token
session cookie or Bearer + App->>App: Authenticate the user + Note over App: Sign HS256 JWT with API Secret
aud = environment ID
sub = user ID
auth.ai.permissions = [...] + App-->>Editor: { "token": "eyJ..." } + Editor->>AI: POST /v1/conversations/{id}/messages
Authorization: Bearer eyJ... + AI->>AI: Verify HS256 signature
check aud, exp, permissions + + alt Token valid and permissions allow the action + AI->>LLM: Forward prompt + LLM-->>AI: Stream response chunks + AI-->>Editor: SSE: text-delta events + else Signature does not match + AI-->>Editor: 401 invalid-jwt-signature + else aud is not registered with AI runtime + AI-->>Editor: 401 invalid-jwt-payload + else Past expiry plus 60s leeway + AI-->>Editor: 401 invalid-jwt + else Permissions do not cover the action + AI-->>Editor: 200 with allowed:false + end diff --git a/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.svg new file mode 100644 index 0000000000..8cd0598928 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/jwt-authentication-fig-1.svg @@ -0,0 +1 @@ +LLM providerAI serviceYour application backend(token endpoint)TinyMCE editor(tinymceai plugin)LLM providerAI serviceYour application backend(token endpoint)TinyMCE editor(tinymceai plugin)Sign HS256 JWT with API Secretaud = environment IDsub = user IDauth.ai.permissions = [...]alt[Token valid and permissions allow the action][Signature does not match][aud is not registered with AI runtime][Past expiry plus 60s leeway][Permissions do not cover the action]UserTriggers an AI feature1POST /api/ai-tokensession cookie or Bearer2Authenticate the user3{ "token": "eyJ..." }4POST /v1/conversations/{id}/messagesAuthorization: Bearer eyJ...5Verify HS256 signaturecheck aud, exp, permissions6Forward prompt7Stream response chunks8SSE: text-delta events9401 invalid-jwt-signature10401 invalid-jwt-payload11401 invalid-jwt12200 with allowed:false13User \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.mmd new file mode 100644 index 0000000000..8f5031c7b0 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.mmd @@ -0,0 +1,30 @@ +flowchart TB + Browser([TinyMCE in browser]) + Browser -->|HTTPS + JWT| LB[Reverse Proxy / Load Balancer
nginx · ALB · Ingress
TLS termination
proxy_buffering off] + LB -->|HTTP :8000| AI1[ai-service replica 1] + LB -->|HTTP :8000| AI2[ai-service replica 2] + LB -->|HTTP :8000| AIN[ai-service replica N] + + subgraph DataLayer["Shared data layer"] + DB[("MySQL 8.0+ /
Postgres 13+
Multi-AZ in prod")] + Cache[("Redis 7
cluster or managed")] + Storage[("S3 · Azure Blob ·
filesystem · DB")] + end + + AI1 --> DB + AI1 --> Cache + AI1 --> Storage + AI2 --> DB + AI2 --> Cache + AI2 --> Storage + AIN --> DB + AIN --> Cache + AIN --> Storage + + AI1 --> LLM[LLM Provider
OpenAI · Anthropic · Google ·
Azure · Bedrock · Vertex · self-hosted] + AI2 --> LLM + AIN --> LLM + + AI1 -.->|optional| Obs[OpenTelemetry · Langfuse ·
log aggregator] + AI2 -.-> Obs + AIN -.-> Obs diff --git a/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.svg new file mode 100644 index 0000000000..c3fb67a075 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/production-guide-fig-1.svg @@ -0,0 +1 @@ +Shared data layerHTTPS + JWTHTTP :8000HTTP :8000HTTP :8000optionalTinyMCE in browserReverse Proxy / LoadBalancernginx · ALB · IngressTLS terminationproxy_buffering offai-service replica 1ai-service replica 2ai-service replica NMySQL 8.0+ /Postgres 13+Multi-AZ in prodRedis 7cluster or managedS3 · Azure Blob ·filesystem · DBLLM ProviderOpenAI · Anthropic ·Google ·Azure · Bedrock · Vertex ·self-hostedOpenTelemetry · Langfuse ·log aggregator \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.mmd new file mode 100644 index 0000000000..2086e9cf30 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.mmd @@ -0,0 +1,25 @@ +flowchart LR + subgraph PR[PROVIDERS env var · JSON object] + P1["my-openai-key
type: openai
apiKeys: [sk-...]"] + P2["my-bedrock
type: bedrock
credentials: {...}"] + P3["my-ollama
type: openai-compatible
baseUrl: .../v1"] + end + subgraph MD[MODELS env var · JSON array] + M1["id: gpt-4.1
provider: my-openai-key
features: [...]"] + M2["id: us.anthropic.claude-sonnet-4-...
provider: my-bedrock
features: [...]"] + M3["id: qwen3:0.6b
provider: my-ollama
features: [...]"] + end + subgraph JWT[JWT auth.ai.permissions] + K1["ai:models:my-openai-key:gpt-4.1"] + K2["ai:models:my-bedrock:us.anthropic.claude-sonnet-4-..."] + K3["ai:models:my-ollama:qwen3:0.6b"] + end + M1 -.references provider key.-> P1 + M2 -.references provider key.-> P2 + M3 -.references provider key.-> P3 + K1 -.gates per-user access.-> M1 + K2 -.gates per-user access.-> M2 + K3 -.gates per-user access.-> M3 + M1 ==>|forwarded to upstream| LLM1[OpenAI API] + M2 ==>|forwarded to upstream| LLM2[AWS Bedrock] + M3 ==>|forwarded to upstream| LLM3[Local Ollama] diff --git a/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.svg new file mode 100644 index 0000000000..a5f55d50a3 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/providers-guide-fig-1.svg @@ -0,0 +1 @@ +JWT auth.ai.permissionsMODELS env var · JSON arrayPROVIDERS env var · JSON objectreferences provider keyreferences provider keyreferences provider keygates per-user accessgates per-user accessgates per-user accessforwarded to upstreamforwarded to upstreamforwarded to upstreammy-openai-keytype: openaiapiKeys: [sk-...]my-bedrocktype: bedrockcredentials: {...}my-ollamatype: openai-compatiblebaseUrl: .../v1id: gpt-4.1provider: my-openai-keyfeatures: [...]id:us.anthropic.claude-sonnet-4-...provider: my-bedrockfeatures: [...]id: qwen3:0.6bprovider: my-ollamafeatures: [...]ai:models:my-openai-key:gpt-4.1ai:models:my-bedrock:us.anthropic.claude-sonnet-4-...ai:models:my-ollama:qwen3:0.6bOpenAI APIAWS BedrockLocal Ollama \ No newline at end of file diff --git a/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.mmd b/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.mmd new file mode 100644 index 0000000000..e866425fd9 --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.mmd @@ -0,0 +1,20 @@ +flowchart TD + Start([Something is wrong]) --> Q1{Container is
running?
docker ps shows it} + Q1 -->|No - exited or won't pull| S1[Container startup failures] + Q1 -->|Yes| Q2{curl /health
returns 200?} + Q2 -->|No - times out or 5xx| S1 + Q2 -->|Yes| Q3{API call returns
auth error?} + Q3 -->|Yes - 401, allowed:false,
invalid-jwt-*| S2[API and JWT authentication] + Q3 -->|No| Q4{SSE stream
carries event: error
from LLM?} + Q4 -->|Yes| S3[LLM provider errors] + Q4 -->|No| Q5{Editor side
broken?
no toolbar, token 401,
hanging stream} + Q5 -->|Yes| S4[Editor and front-end] + Q5 -->|No| Q6{Slow, timing out,
or failing under load?} + Q6 -->|Yes| S5[Performance and capacity] + Q6 -->|No| S6[Production and scaling] + S1 --> Recipe([If none fit:
see Diagnostic recipes
then escalate to
support@tiny.cloud]) + S2 --> Recipe + S3 --> Recipe + S4 --> Recipe + S5 --> Recipe + S6 --> Recipe diff --git a/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.svg b/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.svg new file mode 100644 index 0000000000..a8651978ca --- /dev/null +++ b/modules/ROOT/images/tinymceai-on-premises/troubleshooting-fig-1.svg @@ -0,0 +1 @@ +No - exited or won't pullYesNo - times out or 5xxYesYes - 401, allowed:false,invalid-jwt-*NoYesNoYesNoYesNoSomething is wrongContainer isrunning?docker ps shows itContainer startup failurescurl /healthreturns 200?API call returnsauth error?API and JWT authenticationSSE streamcarries event: errorfrom LLM?LLM provider errorsEditor sidebroken?no toolbar, token 401,hanging streamEditor and front-endSlow, timing out,or failing under load?Performance and capacityProduction and scalingIf none fit:see Diagnostic recipesthen escalate tosupport@tiny.cloud \ No newline at end of file diff --git a/modules/ROOT/nav.adoc b/modules/ROOT/nav.adoc index 9f85be3f19..f4e210e596 100644 --- a/modules/ROOT/nav.adoc +++ b/modules/ROOT/nav.adoc @@ -264,11 +264,22 @@ ****** xref:tinymceai-api-overview.adoc[API Overview] ****** xref:tinymceai-api-quick-start.adoc[API Quick Start] ****** xref:tinymceai-streaming.adoc[Streaming] -**** xref:tinymceai-jwt-authentication-intro.adoc[JWT Authentication] +**** xref:tinymceai-jwt-authentication-intro.adoc[JWT authentication] ***** xref:tinymceai-permissions.adoc[Permissions] ***** Guides ****** xref:tinymceai-with-jwt-authentication-nodejs.adoc[JWT authentication (Node.js)] ****** xref:tinymceai-with-jwt-authentication-php.adoc[JWT authentication (PHP)] +**** On-premises deployment +***** xref:tinymceai-on-premises.adoc[Overview] +***** xref:tinymceai-on-premises-getting-started.adoc[Getting started] +***** xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] +***** xref:tinymceai-on-premises-providers.adoc[LLM providers] +***** xref:tinymceai-on-premises-jwt.adoc[JWT authentication] +***** xref:tinymceai-on-premises-frameworks.adoc[Framework integration] +***** xref:tinymceai-on-premises-production.adoc[Production deployment] +***** xref:tinymceai-on-premises-advanced.adoc[Advanced scenarios] +***** xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] +***** xref:tinymceai-on-premises-reference.adoc[Reference] **** xref:ai.adoc[AI Assistant (legacy)] ***** xref:ai-openai.adoc[OpenAI ChatGPT integration guide] ***** xref:ai-azure.adoc[Azure AI integration guide] diff --git a/modules/ROOT/pages/tinymceai-on-premises-advanced.adoc b/modules/ROOT/pages/tinymceai-on-premises-advanced.adoc new file mode 100644 index 0000000000..e0a9aee863 --- /dev/null +++ b/modules/ROOT/pages/tinymceai-on-premises-advanced.adoc @@ -0,0 +1,439 @@ += TinyMCE AI on-premises: advanced scenarios +:navtitle: Advanced scenarios +:description: Advanced scenarios for TinyMCE AI on-premises service +:keywords: AI, on-premises, multi-tenant, MCP, regulated + +[.lead] +This guide covers enterprise scenarios for the AI on-premises service through concrete examples. Each scenario builds on the xref:tinymceai-on-premises-getting-started.adoc[Getting started guide]; teams may implement any scenario on its own. + +== Internal knowledge base through MCP + +*Use case:* Engineers writing documentation can query internal wikis, API specs, and runbooks directly from the editor, without switching context. + +=== Architecture + +[.text-center] +image::tinymceai-on-premises/advanced-scenarios-fig-1.svg[MCP integration: TinyMCE rich text editor communicates with AI service which calls MCP knowledge-hub server,width=100%] + +=== Configuration reference + +The Model Context Protocol (MCP) allows the AI service to call external tools (internal wikis, API specs, runbooks, contract databases, compliance checkers) from inside conversations. The AI service connects over *Streamable HTTP transport*. + +[cols="1,3",options="header"] +|=== +|Option |Description +|`url` |HTTP endpoint of the MCP server (Streamable HTTP transport). +|`headers` |Auth headers sent with every request. Single shared token per server. See <>. +|`tools.disabled` |Array of tool names to block from LLM access. +|`options.callToolTimeout` |Per-tool-call timeout in seconds (default 60). +|=== + +NOTE: MCP tools are available in AI *conversations* only, not in reviews or quick actions. + +TIP: On Linux Docker, add `extra_hosts: ["host.docker.internal:host-gateway"]` to the AI service to reach MCP servers running on the host. + +[[single-shared-token-limitation]] +=== Single-shared-token limitation + +The `headers` field is fixed at deploy time. Every MCP tool call shares the same token; there is no per-user MCP authentication path yet. If the MCP server needs per-user context, encode it in the conversation prompt or in a header that maps user identity at the MCP server side (for example, using a token the MCP server itself swaps for a per-user identity). + +=== Implementation + +. *Create an MCP server that exposes the knowledge base:* ++ +.Knowledge-base MCP server (Express) +[%collapsible] +==== +[source,javascript] +---- +// knowledge-mcp-server.js +const express = require('express'); +const app = express(); +app.use(express.json()); + +const KNOWLEDGE_BASE = { + 'api-guidelines': 'All REST APIs must use JSON, include pagination through Link headers, and return 4xx for client errors with a machine-readable error code.', + 'deployment-process': 'Deployments require: 1) PR approval, 2) passing CI, 3) staging verification, 4) production canary (5% traffic for 30min), 5) full rollout.', + 'security-policy': 'All user data must be encrypted at rest (AES-256) and in transit (TLS 1.3). PII requires additional field-level encryption.', +}; + +app.post('/mcp', (req, res) => { + const { method, id, params } = req.body; + + if (method === 'initialize') { + return res.json({ + jsonrpc: '2.0', id, + result: { + protocolVersion: '2024-11-05', + capabilities: { tools: {} }, + serverInfo: { name: 'knowledge-hub', version: '1.0.0' } + } + }); + } + + if (method === 'tools/list') { + return res.json({ + jsonrpc: '2.0', id, + result: { + tools: [{ + name: 'search_knowledge_base', + description: 'Search the company knowledge base for policies, guidelines, and procedures', + inputSchema: { + type: 'object', + properties: { + query: { type: 'string', description: 'Search query' } + }, + required: ['query'] + } + }, { + name: 'get_api_spec', + description: 'Get the OpenAPI spec for an internal service', + inputSchema: { + type: 'object', + properties: { + service: { type: 'string', description: 'Service name (for example user-service, billing-api)' } + }, + required: ['service'] + } + }] + } + }); + } + + if (method === 'tools/call') { + const { name, arguments: args } = params; + if (name === 'search_knowledge_base') { + const query = (args?.query || '').toLowerCase(); + const results = Object.entries(KNOWLEDGE_BASE) + .filter(([key]) => key.includes(query) || query.includes(key.split('-')[0])) + .map(([key, value]) => `##${key}\n${value}`) + .join('\n\n'); + return res.json({ + jsonrpc: '2.0', id, + result: { content: [{ type: 'text', text: results || 'No results found.' }] } + }); + } + return res.json({ + jsonrpc: '2.0', id, + result: { content: [{ type: 'text', text: 'Spec not found for: ' + args?.service }] } + }); + } + + res.json({ jsonrpc: '2.0', id, error: { code: -32601, message: 'Unknown method' } }); +}); + +app.listen(3001, () => console.log('Knowledge MCP server on http://localhost:3001/mcp')); +---- +==== + +. *Configure the AI service:* ++ +[source,bash] +---- +-e MCP_SERVERS='{ + "knowledge-hub": { + "url": "http://host.docker.internal:3001/mcp", + "options": { "callToolTimeout": 30 } + } +}' +---- + +. *Sample AI chat message:* ++ +____ +"What are the API guidelines for error handling?" +____ ++ +The assistant calls the `search_knowledge_base` tool, retrieves the relevant policy, and responds with sourced information without leaving the rich text editor. + + + +== Multi-tenant SaaS platform + +*Use case:* A SaaS platform provides AI writing features to customers. Each customer gets isolated conversations, separate large language model (LLM) budgets, and per-tenant configuration. + +=== Architecture + +[.text-center] +image::tinymceai-on-premises/advanced-scenarios-fig-2.svg[alt="Multi-tenant SaaS architecture with per-customer environments access keys and conversation isolation",width=100%] + +Each environment provides: + +* Its own access keys (independent rotation) +* Isolated conversation history (queries are partitioned by `sub` within an environment) +* Separate billing and usage tracking (through Langfuse or a custom metrics pipeline) + +=== Implementation + +. *Create one environment per customer through the Management Panel:* ++ +* Customer A -> Environment `env-customer-a` +* Customer B -> Environment `env-customer-b` +* Customer C -> Environment `env-customer-c` + +. *Token server generates JSON Web Tokens (JWTs) with the correct environment:* ++ +.Multi-tenant JWT generation +[%collapsible] +==== +[source,javascript] +---- +app.post('/api/ai-token', requireAuth, (req, res) => { + const customer = getCustomerConfig(req.user.organizationId); + + const token = jwt.sign({ + aud: customer.envId, + sub: req.user.id, + user: { name: req.user.name, email: req.user.email }, + auth: { + ai: { + permissions: getPermissionsForPlan(customer.plan) + } + } + }, customer.apiSecret, { algorithm: 'HS256', expiresIn: '1h' }); + + res.json({ token }); +}); + +function getPermissionsForPlan(plan) { + switch (plan) { + case 'enterprise': + return ['ai:conversations:*', 'ai:models:agent', 'ai:actions:system:*', 'ai:reviews:system:*']; + case 'pro': + return ['ai:conversations:*', 'ai:actions:system:*']; + case 'basic': + return ['ai:actions:system:*']; + } +} + +function envFor(tenantId) { + return { + envId: process.env[`AI_ENV_ID_${tenantId}`], + apiSecret: process.env[`AI_API_SECRET_${tenantId}`] + }; +} +---- +==== + +. *Result:* Full data isolation between customers, with feature gating based on subscription tier. + +CAUTION: Conversation history is isolated by the `sub` claim in the JWT. Reusing a single `sub` value for multiple users within one environment causes those users to share conversation history. Always use a stable, unique-per-user identifier (such as an internal user ID) as the `sub` value. + + + +== Custom models with guardrails + +*Use case:* A regulated industry (healthcare, finance, legal) needs AI writing assistance but must use approved models with content filtering. + +=== Implementation + +. *Use a self-hosted model with an OpenAI-compatible API (such as vLLM or Ollama):* ++ +[source,bash] +---- +-e PROVIDERS='{ + "approved-llm": { + "type": "openai-compatible", + "baseUrl": "http://host.docker.internal:8080/v1", + "headers": {"Authorization": "Bearer internal-key"} + } +}' +---- + +. *Restrict to specific models only:* ++ +[source,bash] +---- +-e MODELS='[{ + "id": "llama-3.1-70b-medical", + "name": "Medical Assistant (Llama 3.1 70B)", + "description": "Fine-tuned for medical documentation. HIPAA-compliant.", + "provider": "approved-llm", + "recommended": true, + "features": ["conversations", "reviews", "actions"] +}, { + "id": "llama-3.1-8b-general", + "name": "General Writing (Llama 3.1 8B)", + "description": "Fast general-purpose model for drafting and editing.", + "provider": "approved-llm", + "features": ["actions"] +}]' +---- + +. *Result:* The configuration exposes only approved, audited models. Content does not leave the network. Combine with Langfuse for a full audit trail. + + + +== AI-powered document pipeline + +*Use case:* Legal team drafts contracts. AI assists with clause generation, compliance checking, and precedent search, powered by internal legal databases. + +=== Architecture + +[.text-center] +image::tinymceai-on-premises/advanced-scenarios-fig-3.svg[Regulated industry scenario: legal editor connects to AI service with contract-db and compliance MCP servers,width=100%] + +=== Configuration + +.Document pipeline MCP server configuration +[%collapsible] +==== +[source,bash] +---- +-e MCP_SERVERS='{ + "contract-db": { + "url": "http://host.docker.internal:3001/mcp", + "options": {"callToolTimeout": 30} + }, + "compliance-checker": { + "url": "http://host.docker.internal:3002/mcp", + "options": {"callToolTimeout": 60} + }, + "precedent-search": { + "url": "http://host.docker.internal:3003/mcp", + "tools": {"disabled": ["delete_precedent"]}, + "options": {"callToolTimeout": 120} + } +}' +---- +==== + +*Example prompts:* + +* "Draft a non-compete clause for California employees" +* "Check this contract section for GDPR compliance issues" +* "Find precedent for limitation of liability in SaaS agreements" + +Internal databases supply the data for these prompts. Aside from the LLM request itself, no content goes to external services. + + + +== Web-augmented research assistant + +*Use case:* Content team writing blog posts and marketing copy can pull live data from the web and internal sources. + +=== Configuration + +[source,bash] +---- +-e WEBRESOURCES_ENABLED='true' \ +-e WEBRESOURCES_ENDPOINT='http://host.docker.internal:4000/scrape' \ +-e WEBRESOURCES_REQUEST_TIMEOUT='10000' \ +-e WEBSEARCH_ENABLED='true' \ +-e WEBSEARCH_ENDPOINT='http://host.docker.internal:4001/search' \ +-e WEBSEARCH_REQUEST_TIMEOUT='10000' \ +-e WEBSEARCH_HEADERS='{"Authorization":"Bearer search-api-key"}' +---- + +=== Web scraping endpoint contract + +[cols="1,2",options="header"] +|=== +|Direction |Payload +|Request |JSON object with a `url` field (page to fetch). +|Response |JSON object with `type` (`text/html` or `text/markdown`) and `data` (body content). +|=== + +.Request body +[source,json] +---- +{ "url": "https://example.com/article" } +---- + +.Response body +[source,json] +---- +{ "type": "text/html", "data": "

Example page body

" } +---- + +==== Scraper example (Playwright) + +[source,javascript] +---- +// scraper-service.js +const { chromium } = require('playwright'); +const express = require('express'); +const app = express(); +app.use(express.json()); + +app.post('/scrape', async (req, res) => { + const browser = await chromium.launch(); + const page = await browser.newPage(); + await page.goto(req.body.url, { waitUntil: 'networkidle' }); + const content = await page.content(); + await browser.close(); + res.json({ type: 'text/html', data: content }); +}); + +app.listen(4000); +---- + +=== Web search endpoint contract + +[cols="1,2",options="header"] +|=== +|Direction |Payload +|Request |JSON object with a `query` field (search string). +|Response |JSON object with a `results` array; each item includes `url`, `text`, `title`, and optional `author`, `publishedAt`, and `favicon`. +|=== + +.Request body +[source,json] +---- +{ "query": "search string" } +---- + +.Response body +[source,json] +---- +{ + "results": [ + { + "url": "https://example.com/article", + "text": "Content snippet", + "title": "Article Title", + "author": "Author", + "publishedAt": "2026-04-30T10:00:00Z", + "favicon": "https://example.com/favicon.ico" + } + ] +} +---- + +==== Search example (SerpAPI) + +[source,javascript] +---- +// search-service.js +const express = require('express'); +const app = express(); +app.use(express.json()); + +app.post('/search', async (req, res) => { + const response = await fetch( + `https://serpapi.com/search.json?q=${encodeURIComponent(req.body.query)}&api_key=${process.env.SERP_API_KEY}` + ); + const data = await response.json(); + const results = (data.organic_results || []).slice(0, 5).map(r => ({ + url: r.link, + title: r.title, + text: r.snippet + })); + res.json({ results }); +}); + +app.listen(4001); +---- + +NOTE: A model must include `capabilities.webSearch: true` in its `MODELS` entry to expose the web search toggle. + +*Example prompts:* + +* "Research the latest trends in AI governance and write a summary" +* "Read this URL and rewrite the key points for the target audience: pass:[https://…]" + + + +For production deployment guidance including Kubernetes manifests, scaling, security hardening, rate limiting, and observability, see xref:tinymceai-on-premises-production.adoc[Production deployment]. + +For common errors and debugging steps, see xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting]. diff --git a/modules/ROOT/pages/tinymceai-on-premises-database.adoc b/modules/ROOT/pages/tinymceai-on-premises-database.adoc new file mode 100644 index 0000000000..215dd1759e --- /dev/null +++ b/modules/ROOT/pages/tinymceai-on-premises-database.adoc @@ -0,0 +1,668 @@ += Database, Redis, and infrastructure setup +:navtitle: Database, Redis, and storage +:description: Database, Redis, and file storage setup for the TinyMCE AI On-Premises service +:keywords: AI, on-premises, database, MySQL, PostgreSQL, Redis, Docker, Podman, file storage, S3, Azure Blob + +This page covers the data layer: the SQL database, Redis, and file storage. +For container runtimes, reverse proxies, Transport Layer Security (TLS), Kubernetes, and ECS deployment, see the xref:tinymceai-on-premises-production.adoc[Production deployment guide]. + +== Supported versions + +[cols="1,1,1,2",options="header"] +|=== +|Component |Minimum |Recommended |Notes + +|MySQL +|8.0 +|8.0.x (latest patch) +|Pin to `mysql:8.0`. See <>. + +|PostgreSQL +|13 +|16 +| + +|Redis +|3.2.6 +|7.x +|Redis Cluster and TLS supported through `REDIS_CLUSTER_NODES` and `REDIS_TLS_ENABLE`. +|=== + +The AI service supports both MySQL and PostgreSQL equally. Pick whichever the operations team already runs. + +== Choosing a setup path + +[.text-center] +image::tinymceai-on-premises/database-setup-fig-1.svg[Database setup decision tree: local Docker Compose vs managed cloud database for evaluation and production,width=100%] + +All paths produce the same end state: a running database the AI service can connect to. + +[cols="1,1,1",options="header"] +|=== +|Path |MySQL |PostgreSQL + +|Docker / Podman |Yes |Yes +|Docker Compose |Yes |Yes +|Native (macOS / Linux) |Yes |Yes +|Managed cloud (RDS, Cloud SQL, Azure) |Yes |Yes +|=== + + + +[[postgresql-schema-prerequisite]] +== PostgreSQL schema prerequisite + +The AI service expects a schema named `cs-on-premises` (with a hyphen). If that schema does not exist, the container crashes on first boot with: + +.... +error: schema "cs-on-premises" does not exist +.... + +Apply one of the following fixes *before* starting the AI service for the first time. + +=== Option A: pre-create the schema + +The double-quotes are mandatory because the schema name contains a hyphen. + +[source,sql] +---- +CREATE SCHEMA "cs-on-premises"; +---- + +Verify with `\dn` in psql. `cs-on-premises` should appear in the list. + +=== Option B: use the default `public` schema + +Set the `DATABASE_SCHEMA` environment variable on the AI service container: + +.... +DATABASE_SCHEMA=public +.... + +This bypasses the hyphenated schema entirely. + +NOTE: MySQL does not have this issue. The database itself is the namespace, set through `DATABASE_DATABASE`. + + + +[[mysql-version-pinning]] +== MySQL version pinning + +Do *not* use `mysql:8`. That tag now floats to MySQL 8.4, which removes the `default-authentication-plugin=mysql_native_password` startup flag the AI service relies on. The container crashloops with: + +.... +[ERROR] [MY-000067] [Server] unknown variable 'default-authentication-plugin=mysql_native_password'. +[ERROR] [MY-010119] [Server] Aborting +.... + +Pin to `mysql:8.0` in every manifest: `docker run`, Docker Compose, Kubernetes, Helm, ECS. Running MySQL 8.4 with workarounds (removing the flag and switching to `caching_sha2_password`) is not a supported configuration. + +TIP: The same principle applies to PostgreSQL. Pin `postgres:16` rather than `postgres:latest`. + + + +[[database-user-privileges]] +== Database user privileges + +On first boot the AI service runs schema migrations and creates roughly 32 tables across the following namespaces: `ai_assistant_*`, `environments__*`, `security__*`, `insights__*`, `blob_storage__*`, and `cs_migrations*`. + +The database user needs enough privilege to create, alter, and operate on these tables. + +=== MySQL + +[source,sql] +---- +CREATE USER 'ai_service'@'%' IDENTIFIED BY 'STRONG_PASSWORD'; +GRANT SELECT, INSERT, UPDATE, DELETE, + ALTER, CREATE, DROP, INDEX, + TRIGGER, LOCK TABLES, REFERENCES + ON ai_service.* TO 'ai_service'@'%'; +FLUSH PRIVILEGES; +---- + +.Development shortcut +[%collapsible] +==== +[source,sql] +---- +GRANT ALL PRIVILEGES ON ai_service.* TO 'ai_service'@'%'; +---- +==== + +[NOTE] +-- +Some builds report false-positive "Not enough permissions to access database" errors even with `ALL PRIVILEGES`. If this occurs, grant the privileges globally rather than per-database, or use the MySQL `root` user for development. +-- + +=== PostgreSQL + +[source,sql] +---- +CREATE USER ai_service WITH PASSWORD 'STRONG_PASSWORD'; +CREATE DATABASE ai_service OWNER ai_service; +\c ai_service +CREATE SCHEMA "cs-on-premises" AUTHORIZATION ai_service; +GRANT CREATE, USAGE ON SCHEMA "cs-on-premises" TO ai_service; +GRANT ALL ON ALL TABLES IN SCHEMA "cs-on-premises" TO ai_service; +GRANT ALL ON ALL SEQUENCES IN SCHEMA "cs-on-premises" TO ai_service; +ALTER DEFAULT PRIVILEGES IN SCHEMA "cs-on-premises" + GRANT ALL ON TABLES TO ai_service; +ALTER DEFAULT PRIVILEGES IN SCHEMA "cs-on-premises" + GRANT ALL ON SEQUENCES TO ai_service; +---- + +.Development shortcut +[%collapsible] +==== +[source,sql] +---- +GRANT ALL ON SCHEMA "cs-on-premises" TO ai_service; +---- +==== + +If `DATABASE_SCHEMA=public` was chosen, substitute `public` for `"cs-on-premises"` in each grant statement. + + + +== Database setup + +=== Docker Compose (recommended for evaluation) + +.MySQL compose file +[%collapsible] +==== +[source,yaml] +---- +services: + mysql: + image: mysql:8.0 + environment: + MYSQL_ROOT_PASSWORD: ROOT_PASSWORD + MYSQL_DATABASE: ai_service + MYSQL_USER: ai_service + MYSQL_PASSWORD: STRONG_PASSWORD + ports: + - "3306:3306" + volumes: + - mysql_data:/var/lib/mysql + healthcheck: + test: ["CMD", "mysqladmin", "ping", "-h", "localhost"] + interval: 10s + timeout: 5s + retries: 5 + + redis: + image: redis:7 + ports: + - "6379:6379" + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 5s + retries: 5 + +volumes: + mysql_data: +---- +==== + +.PostgreSQL compose file +[%collapsible] +==== +[source,yaml] +---- +services: + postgres: + image: postgres:16 + environment: + POSTGRES_DB: ai_service + POSTGRES_USER: ai_service + POSTGRES_PASSWORD: STRONG_PASSWORD + ports: + - "5432:5432" + volumes: + - pg_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ai_service -d ai_service"] + interval: 10s + timeout: 5s + retries: 5 + + redis: + image: redis:7 + ports: + - "6379:6379" + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 5s + retries: 5 + +volumes: + pg_data: +---- +==== + +After `docker compose up -d`, create the PostgreSQL schema (if not using `DATABASE_SCHEMA=public`): + +[source,bash] +---- +docker compose exec postgres psql -U ai_service -d ai_service \ + -c 'CREATE SCHEMA "cs-on-premises";' +---- + +=== Docker single container + +.MySQL +[%collapsible] +==== +[source,bash] +---- +docker run -d \ + --name ai-mysql \ + -e MYSQL_ROOT_PASSWORD=ROOT_PASSWORD \ + -e MYSQL_DATABASE=ai_service \ + -e MYSQL_USER=ai_service \ + -e MYSQL_PASSWORD=STRONG_PASSWORD \ + -p 3306:3306 \ + -v ai_mysql_data:/var/lib/mysql \ + mysql:8.0 +---- +==== + +.PostgreSQL +[%collapsible] +==== +[source,bash] +---- +docker run -d \ + --name ai-postgres \ + -e POSTGRES_DB=ai_service \ + -e POSTGRES_USER=ai_service \ + -e POSTGRES_PASSWORD=STRONG_PASSWORD \ + -p 5432:5432 \ + -v ai_pg_data:/var/lib/postgresql/data \ + postgres:16 +---- + +Then create the schema: + +[source,bash] +---- +docker exec -i ai-postgres psql -U ai_service -d ai_service \ + -c 'CREATE SCHEMA "cs-on-premises";' +---- +==== + +TIP: For Podman, substitute `podman` for `docker` throughout. On rootless Podman, use named volumes rather than bind-mounted host paths to avoid SELinux and UID mapping issues. + +=== Native install (macOS) + +.MySQL and PostgreSQL on macOS +[%collapsible] +==== +*MySQL:* + +[source,bash] +---- +brew install mysql +brew services start mysql +mysql_secure_installation +mysql -u root -p <<'SQL' +CREATE DATABASE ai_service; +CREATE USER 'ai_service'@'%' IDENTIFIED BY 'STRONG_PASSWORD'; +GRANT SELECT, INSERT, UPDATE, DELETE, ALTER, CREATE, DROP, + INDEX, TRIGGER, LOCK TABLES, REFERENCES + ON ai_service.* TO 'ai_service'@'%'; +FLUSH PRIVILEGES; +SQL +---- + +*PostgreSQL:* + +[source,bash] +---- +brew install postgresql@16 +brew services start postgresql@16 +createuser -P ai_service +createdb -O ai_service ai_service +psql -d ai_service -c 'CREATE SCHEMA "cs-on-premises" AUTHORIZATION ai_service;' +---- + +Verify all services are running: + +[source,bash] +---- +brew services list +---- +==== + +=== Native install (Linux) + +.MySQL and PostgreSQL on Debian/Ubuntu +[%collapsible] +==== +*MySQL:* + +[source,bash] +---- +sudo apt update +sudo apt install -y mysql-server +sudo systemctl enable --now mysql +sudo mysql_secure_installation +sudo mysql <<'SQL' +CREATE DATABASE ai_service; +CREATE USER 'ai_service'@'%' IDENTIFIED BY 'STRONG_PASSWORD'; +GRANT SELECT, INSERT, UPDATE, DELETE, ALTER, CREATE, DROP, + INDEX, TRIGGER, LOCK TABLES, REFERENCES + ON ai_service.* TO 'ai_service'@'%'; +FLUSH PRIVILEGES; +SQL +---- + +To allow remote connections, edit `/etc/mysql/mysql.conf.d/mysqld.cnf`, set `bind-address = 0.0.0.0`, and restart with `sudo systemctl restart mysql`. + +*PostgreSQL:* + +[source,bash] +---- +sudo apt update +sudo apt install -y postgresql postgresql-contrib +sudo systemctl enable --now postgresql +sudo -u postgres psql <<'SQL' +CREATE USER ai_service WITH PASSWORD 'STRONG_PASSWORD'; +CREATE DATABASE ai_service OWNER ai_service; +SQL +sudo -u postgres psql -d ai_service \ + -c 'CREATE SCHEMA "cs-on-premises" AUTHORIZATION ai_service;' +---- + +To allow remote connections, edit `/etc/postgresql/16/main/postgresql.conf` (`listen_addresses = '*'`) and add to `/etc/postgresql/16/main/pg_hba.conf`: + +.... +host ai_service ai_service 0.0.0.0/0 scram-sha-256 +.... + +Restart with `sudo systemctl restart postgresql`. +==== + +=== Managed cloud + +The AI service handles schema migration automatically. The pre-steps are: + +. Provision the database instance (RDS, Cloud SQL, or Azure Database). +. Create the database (`ai_service`). +. Create a dedicated user with the privileges documented in <>. +. *PostgreSQL only:* create the `cs-on-premises` schema or set `DATABASE_SCHEMA=public`. +. Open the security group or firewall for the AI service on port `3306` (MySQL) or `5432` (PostgreSQL). + +[cols="1,1,1,1",options="header"] +|=== +|Provider |MySQL |PostgreSQL |Redis + +|AWS |RDS for MySQL |RDS for PostgreSQL |ElastiCache for Redis +|GCP |Cloud SQL (MySQL) |Cloud SQL (PostgreSQL) |Memorystore for Redis +|Azure |Azure Database for MySQL |Azure Database for PostgreSQL |Azure Cache for Redis +|=== + +For production, enable Multi-AZ (or the equivalent zonal redundancy) and automated backups. + +[[host-docker-internal]] +=== Connecting to a host-local database from Docker + +When the AI service runs in Docker but the database or Redis runs natively on the host, the container must resolve the host's IP address. + +*Docker Desktop (macOS, Windows)* and *Podman 4{plus}* inject `host.docker.internal` automatically. + +*Native Linux Docker* does not. Add `host-gateway` explicitly: + +[source,yaml] +---- +services: + ai-service: + image: registry.containers.tiny.cloud/ai-service:latest + extra_hosts: + - "host.docker.internal:host-gateway" + environment: + DATABASE_HOST: host.docker.internal + REDIS_HOST: host.docker.internal +---- + +Or with `docker run`: + +[source,bash] +---- +docker run --add-host=host.docker.internal:host-gateway ... +---- + + + +== Redis + +Every AI service instance must reach Redis. Redis holds session coordination, Server-Sent Events (SSE) delivery, and rate-limiting state. A temporary Redis outage degrades streaming but does not destroy persistent data. + +=== Setup + +Redis is typically included in the Docker Compose file alongside the database (see the compose examples above). For standalone setup: + +[source,bash] +---- +docker run -d --name ai-redis -p 6379:6379 -v ai_redis_data:/data redis:7 +---- + +.macOS / Linux native install +[%collapsible] +==== +*macOS:* + +[source,bash] +---- +brew install redis +brew services start redis +---- + +*Linux (Debian/Ubuntu):* + +[source,bash] +---- +sudo apt install -y redis-server +sudo systemctl enable --now redis-server +---- +==== + +=== Connection variables + +[cols="1,1,2",options="header"] +|=== +|Variable |Required |Description + +|`REDIS_HOST` |Yes |Hostname +|`REDIS_PORT` |No |Default `6379` +|`REDIS_PASSWORD` |No |Password +|`REDIS_USER` |No |Username (Redis 6{plus} ACL) +|`REDIS_DB` |No |Database number (default `1`) +|`REDIS_IP_FAMILY` |No |Set to `6` for IPv6 +|=== + +=== TLS + +[cols="1,2",options="header"] +|=== +|Variable |Description + +|`REDIS_TLS_ENABLE` |`true` to enable TLS +|`REDIS_TLS_CA` |Path to CA certificate +|`REDIS_TLS_KEY` |Path to client key +|`REDIS_TLS_CERT` |Path to client certificate +|=== + +=== Cluster + +[cols="1,2",options="header"] +|=== +|Variable |Description + +|`REDIS_CLUSTER_NODES` |Comma-separated `host:port[:password]` list +|`REDIS_IP_FAMILY` |Set to `6` for IPv6 domains +|=== + +.Cluster examples +[%collapsible] +==== +[source,bash] +---- +# Standard cluster +REDIS_CLUSTER_NODES="redis1.example.com:7000,redis2.example.com:7001,redis3.example.com:7002" + +# Cluster with per-node passwords +REDIS_CLUSTER_NODES="redis1.example.com:7000:pass1,redis2.example.com:7001:pass2" + +# IPv6 cluster +REDIS_IP_FAMILY=6 +REDIS_CLUSTER_NODES="[::1]:7000,[::1]:7001,[::1]:7002" +---- +==== + +IMPORTANT: In production, always set `REDIS_PASSWORD` or use a managed Redis instance with authentication enabled. + + + +== File storage + +Separate from the SQL database, the AI service persists user file uploads (attachments, images). The storage back end is selected by the `STORAGE_DRIVER` environment variable. + +[cols="1,2,2",options="header"] +|=== +|Driver |When to use |Notes + +|`database` +|Demos and smallest deployments +|Stores files as SQL blobs. Hard cap around 4 GB total. No extra configuration required. + +|`filesystem` +|Single-instance with a persistent volume +|Requires a writable mounted volume. See <>. + +|`s3` +|Production on AWS, or S3-compatible (MinIO, Wasabi) +|Use a same-region bucket. + +|`azure` +|Production on Azure +|Azure Blob Storage. +|=== + +=== S3 + +[source,bash] +---- +STORAGE_DRIVER=s3 +STORAGE_REGION=us-east-1 +STORAGE_ACCESS_KEY_ID=ACCESS_KEY +STORAGE_SECRET_ACCESS_KEY=SECRET_KEY +STORAGE_BUCKET=BUCKET_NAME +STORAGE_ENDPOINT=https://custom-s3-endpoint # optional, for S3-compatible +---- + +NOTE: The correct variable names are `STORAGE_BUCKET` and `STORAGE_REGION`, not `STORAGE_S3_BUCKET` or `STORAGE_S3_REGION`. + +=== Azure Blob + +[source,bash] +---- +STORAGE_DRIVER=azure +STORAGE_ACCOUNT_NAME=ACCOUNT_NAME +STORAGE_ACCOUNT_KEY=ACCOUNT_KEY +STORAGE_CONTAINER=CONTAINER_NAME +STORAGE_ENDPOINT=https://custom-endpoint # optional +---- + +[[filesystem-storage]] +=== Filesystem + +[source,bash] +---- +STORAGE_DRIVER=filesystem +STORAGE_LOCATION=/tmp/ai-storage +---- + +IMPORTANT: The container runs as a non-root user and cannot write under `/var`. Mount a writable volume and point `STORAGE_LOCATION` at the mount point: `-v ./ai-storage:/tmp/ai-storage`. + +=== Database + +[source,bash] +---- +STORAGE_DRIVER=database +---- + +Files are stored in the SQL database as blobs, capped at roughly 4 GB total. This is the simplest option for initial evaluation. + + + +== Verification + +=== MySQL + +[source,bash] +---- +mysql --host=DB_HOST --user=ai_service --password=STRONG_PASSWORD \ + ai_service --port=3306 -e "SELECT 1" +---- + +Expected: a table with `1` in a single column. + +=== PostgreSQL + +[source,bash] +---- +psql -h DB_HOST -U ai_service -d ai_service -c "SELECT 1" +---- + +Expected: `?column?` returning `1`. + +=== Redis + +[source,bash] +---- +redis-cli -h REDIS_HOST ping +---- + +Expected: `PONG`. + +=== AI service migration + +After starting the AI service, confirm it has connected and run migrations: + +[source,bash] +---- +docker logs ai-service 2>&1 | grep -i 'migrat\|schema\|database' +---- + +Expected output (paraphrased): + +.... +Connecting to database (driver=postgres host=...) +Running migrations on schema "cs-on-premises" +Migrations complete: 32 tables ready +Server is listening on port 8000. +.... + +If `schema "cs-on-premises" does not exist` appears, return to <>. If `unknown variable 'default-authentication-plugin'` appears, return to <>. + +To list the tables created by migration: + +.PostgreSQL +[source,sql] +---- +SELECT table_name FROM information_schema.tables + WHERE table_schema = 'cs-on-premises' + ORDER BY table_name; +---- + +.MySQL +[source,sql] +---- +SHOW TABLES IN ai_service; +---- + +Tables prefixed `ai_assistant_`, `environments__`, `security__`, `insights__`, `blob_storage__`, and `cs_migrations` should appear. diff --git a/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc new file mode 100644 index 0000000000..6990af476b --- /dev/null +++ b/modules/ROOT/pages/tinymceai-on-premises-frameworks.adoc @@ -0,0 +1,222 @@ += TinyMCE AI on-premises: editor-side integration +:navtitle: Framework integration +:description: Connecting the TinyMCE editor to the on-premises AI service from React, Vue, Angular, Svelte, or vanilla JavaScript. +:keywords: AI, on-premises, React, Vue, Angular, Svelte, token provider + + +This page covers the *editor-side* configuration that connects TinyMCE to the on-premises AI service. It assumes: + +* The AI service is already running. See xref:tinymceai-on-premises-getting-started.adoc[Getting started] for setup instructions. +* A token endpoint exists that signs JSON Web Tokens (JWTs) for the AI service. See xref:tinymceai-on-premises-jwt.adoc[JWT authentication] for back-end implementations. +* The TinyMCE API key has the AI feature enabled. Retrieve or upgrade a key at https://www.tiny.cloud/my-account/integrate/. + +For general framework setup (installing wrappers, component structure, server-side rendering (SSR) patterns), see the existing integration guides: + +* xref:react-cloud.adoc[React] +* xref:vue-cloud.adoc[Vue.js] +* xref:angular-cloud.adoc[Angular] +* xref:svelte-cloud.adoc[Svelte] + +The on-premises AI integration adds the options documented below to the standard TinyMCE `init` configuration. + + + +== Required editor options + +[cols="1,3",options="header"] +|=== +|Option |Description + +|`plugins` +|Must include `tinymceai`. + +|`toolbar` +|Include one or more of `tinymceai-chat`, `tinymceai-review`, `tinymceai-quickactions`. + +|`tinymceai_service_url` +|The origin of the AI service (no trailing slash, no path), for example `\https://ai.yourcompany.com`. + +|`tinymceai_token_provider` +|A function returning `Promise<{ token: string }>`. See <> below. +|=== + + + +== Minimal example + +The following vanilla JavaScript example contains every on-premises-specific option. The same `init` options apply identically inside the React, Vue, Angular, and Svelte wrapper components. + +[source,html] +---- + + + + + + + + + + +---- + +Replace `/path/to/tinymce/` with the location of the self-hosted TinyMCE assets. See xref:installation.adoc[Self-hosted installation] for download and setup instructions. + + + +[[token-provider]] +== `tinymceai_token_provider` + +A function that returns a `Promise` resolving to an object with a `token` property containing the JWT string. + +.Expected return shape +[source,javascript] +---- +{ token: 'eyJhbGciOiJIUzI1NiIs...' } +---- + +.Example provider +[source,javascript] +---- +tinymceai_token_provider: () => { + return fetch('/api/ai-token', { method: 'POST' }) + .then((r) => r.json()) + .then((data) => ({ token: data.token })); +} +---- + +[cols="1,3",options="header"] +|=== +|Behavior |Detail + +|Automatic refresh +|The plugin calls the provider on initialization and again when the cached token nears expiry (60-second safety margin). Do not cache the JWT inside the provider. + +|Error handling +|If the function rejects or the endpoint returns a non-OK response, the plugin surfaces an error in the editor UI. + +|Token lifetime +|Tokens should be short-lived (5-15 minutes recommended). See xref:tinymceai-on-premises-jwt.adoc[JWT authentication] for signing key, payload structure, and lifetime guidance. +|=== + + + +== Authenticating the token request + +The `tinymceai_token_provider` fetches a JWT from the application back end. How that back end authenticates the browser request depends on the application architecture. + +=== Session cookie + +If the page and the token endpoint share an origin (or a parent domain), the browser sends session cookies automatically: + +[source,javascript] +---- +fetch('/api/ai-token', { method: 'POST', credentials: 'include' }) +---- + +For cross-origin token endpoints, the back end must respond with `Access-Control-Allow-Origin: ` (not `*`) and `Access-Control-Allow-Credentials: true`, and the session cookie must be set with `SameSite=None; Secure`. + +=== Bearer header + +If the application already holds a session JWT (injected at render time, or from an auth library), forward it as a header: + +[source,javascript] +---- +fetch('/api/ai-token', { + method: 'POST', + headers: { 'Authorization': `Bearer ${sessionJwt}` } +}) +---- + +This pattern avoids cookies entirely and works well for cross-origin setups. + + + +== Cross-origin requests to the AI service + +When `tinymceai_service_url` points to a different origin from the page (the common production case), the AI service must return Cross-Origin Resource Sharing (CORS) headers permitting the editor origin. The service reads the `ALLOWED_ORIGINS` environment variable for this. + +To verify CORS from a terminal: + +[source,bash] +---- +curl -i -X OPTIONS https://ai.yourcompany.com/v1/conversations \ + -H 'Origin: https://app.yourcompany.com' \ + -H 'Access-Control-Request-Method: POST' \ + -H 'Access-Control-Request-Headers: authorization,content-type' +---- + +The response should include `Access-Control-Allow-Origin: \https://app.yourcompany.com`. If it shows `*` or no CORS header, update `ALLOWED_ORIGINS` on the AI service container and restart. + + + +== Content Security Policy (CSP) + +If the application sets a `Content-Security-Policy` header, allow the AI service origin in `connect-src`: + +.... +Content-Security-Policy: + connect-src 'self' https://ai.yourcompany.com; + script-src 'self'; +.... + +If using the Tiny CDN instead of self-hosted assets, also add `\https://cdn.tiny.cloud` to `script-src`. + + + + +== Common integration errors + +[cols="1,1,2",options="header"] +|=== +|Symptom |Likely cause |Fix + +|Editor loads but no AI buttons appear +|`plugins` does not include `tinymceai`, or TinyMCE is version 7.x or earlier +|Set `plugins: 'tinymceai'` and confirm the script URL uses `/tinymce/8/`. Verify the API key has the AI feature enabled. + +|`POST /api/ai-token` returns 401 +|The token endpoint rejects the fetch +|Confirm the fetch sends the session cookie (`credentials: 'include'`) or `Authorization` header that the back end expects. + +|AI responses hang then time out +|Reverse proxy is buffering Server-Sent Events (SSE) +|Disable proxy buffering. See xref:tinymceai-on-premises-production.adoc[Production deployment]. + +|Browser console shows a CORS error on `/v1/conversations` +|`ALLOWED_ORIGINS` does not include the editor origin +|Update `ALLOWED_ORIGINS` and restart the AI service. + +|`tinymceai_token_provider` called in a tight loop +|Token endpoint returns invalid JSON or non-200 +|Validate: `curl -X POST http://localhost:3000/api/ai-token` should return `pass:c[{"token":"eyJ..."}]` with HTTP 200. +|=== + +For other issues, see xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting]. + + + +== See also + +* xref:tinymceai-on-premises-getting-started.adoc[Getting started] +* xref:tinymceai-on-premises-jwt.adoc[JWT authentication] +* xref:tinymceai-on-premises-providers.adoc[large language model (LLM) providers] +* xref:tinymceai-on-premises-production.adoc[Production deployment] +* xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] diff --git a/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc new file mode 100644 index 0000000000..487554c0a4 --- /dev/null +++ b/modules/ROOT/pages/tinymceai-on-premises-getting-started.adoc @@ -0,0 +1,467 @@ += Getting started with TinyMCE AI On-Premise +:navtitle: Getting started +:description: Five-minute Docker Compose quick start for TinyMCE AI on-premises service +:keywords: AI, on-premises, getting started, docker, quick start + +This section produces a fully working setup (AI service, database, Redis, token server, and a browser editor) in roughly five minutes on any machine with Docker. This quick start validates the stack components before designing a production deployment. Production engineers should still review this section to understand the conceptual flow before continuing to xref:tinymceai-on-premises-production.adoc[the Production Deployment Guide]. + +== Five-minute demo with Docker Compose + +=== Create the project folder + +[source,bash] +---- +mkdir tinymce-ai-onpremise && cd tinymce-ai-onpremise +---- + +=== Authenticate with the container registry + +The service image lives at `registry.containers.tiny.cloud/ai-service`. + +For Docker: + +[source,bash] +---- +docker login -u 'TINY_REGISTRY_USERNAME' https://registry.containers.tiny.cloud +# Docker prompts for the password; this avoids leaking it in shell history. +---- + +For Podman: + +[source,bash] +---- +podman login -u 'TINY_REGISTRY_USERNAME' registry.containers.tiny.cloud +---- + +Replace `TINY_REGISTRY_USERNAME` with the username supplied by the Tiny account representative. If credentials have not been received, contact `support@tiny.cloud`. + +=== Pull the AI service image + +[source,bash] +---- +docker pull registry.containers.tiny.cloud/ai-service:latest +---- + +For Podman, substitute `podman pull`. For production, pin a specific version tag (for example `:5.1.0`) rather than `:latest`. + +=== Create `docker-compose.yml` + +Create the file with exactly the contents below. Indentation is two spaces, never tabs. + +[source,yaml] +---- +services: + mysql: + image: mysql:8.0 + environment: + MYSQL_ROOT_PASSWORD: ${DB_PASSWORD:-changeme} + MYSQL_DATABASE: ai_service + ports: + - "3306:3306" + volumes: + - mysql_data:/var/lib/mysql + healthcheck: + test: ["CMD", "mysqladmin", "ping", "-h", "localhost"] + interval: 10s + timeout: 5s + retries: 5 + + redis: + image: redis:7 + ports: + - "6379:6379" + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 5s + retries: 5 + +volumes: + mysql_data: +---- + +TIP: Pin `mysql:8.0`, not `mysql:8`. The `:8` tag points to MySQL 8.4, which is incompatible with the AI service. See xref:tinymceai-on-premises-database.adoc#mysql-version-pinning[MySQL version pinning] for details. + +PostgreSQL is equally supported. See xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] for an equivalent compose file. Review the xref:tinymceai-on-premises-database.adoc#postgresql-schema-prerequisite[PostgreSQL schema prerequisite] before switching. + +[NOTE] +-- +If any service in the stack needs to reach the host machine (for example a self-hosted Ollama running on the host), add an `extra_hosts` entry to the `ai-service` block in the compose file above: + +[source,yaml] +---- +extra_hosts: + - "host.docker.internal:host-gateway" +---- + +Docker Desktop (macOS, Windows) and Podman 4{plus} auto-inject this alias. Native Linux Docker does not. +-- + +=== Create the `.env` file + +[source,bash] +---- +# --- Required: provided by Tiny --- +LICENSE_KEY=PASTE_SUPPLIED_LICENSE_KEY_HERE +TINYMCE_API_KEY=PASTE_TINYMCE_API_KEY_HERE + +# --- Required: strong secret used to log into the Management Panel --- +MANAGEMENT_SECRET=REPLACE_WITH_STRONG_SECRET + +# --- Required: database password (must match docker-compose.yml) --- +DB_PASSWORD=changeme + +# --- Required: at least one LLM provider key --- +OPENAI_API_KEY=sk-proj-PASTE_OPENAI_KEY_HERE +# ANTHROPIC_API_KEY=sk-ant-PASTE_ANTHROPIC_KEY_HERE +# GOOGLE_API_KEY=AIza-PASTE_GOOGLE_KEY_HERE + +# --- Filled in after creating an environment (leave blank for now) --- +AI_ENV_ID= +AI_API_SECRET= +---- + +IMPORTANT: `LICENSE_KEY` and `TINYMCE_API_KEY` are different credentials. `LICENSE_KEY` is the long string from the account representative. `TINYMCE_API_KEY` is the short string from the tiny.cloud dashboard. + +=== Start MySQL and Redis + +[source,bash] +---- +docker compose up -d +---- + +Wait ~15 seconds for MySQL to initialize, then verify: + +[source,bash] +---- +docker compose ps +---- + +Both containers should report `healthy` in the STATUS column. If MySQL still shows `starting`, wait another 10 seconds and re-run. + +=== Launch the AI service + +Run from the same folder as the `.env` file: + +.Full launch script +[%collapsible] +==== +[source,bash] +---- +set -a && source .env && set +a + +PROVIDERS='{' +if [ -n "$OPENAI_API_KEY" ]; then + PROVIDERS+='"openai":{"type":"openai","apiKeys":["'"$OPENAI_API_KEY"'"]}' +fi +if [ -n "$ANTHROPIC_API_KEY" ]; then + [ "$PROVIDERS" != '{' ] && PROVIDERS+=',' + PROVIDERS+='"anthropic":{"type":"anthropic","apiKeys":["'"$ANTHROPIC_API_KEY"'"]}' +fi +if [ -n "$GOOGLE_API_KEY" ]; then + [ "$PROVIDERS" != '{' ] && PROVIDERS+=',' + PROVIDERS+='"google":{"type":"google","apiKeys":["'"$GOOGLE_API_KEY"'"]}' +fi +PROVIDERS+='}' + +# Resolve the compose network name (varies across Docker versions and folder names) +NETWORK=$(docker network ls --format '{{.Name}}' | grep "^$(basename "$PWD" | tr '[:upper:]' '[:lower:]')_default$" | head -1) +if [ -z "$NETWORK" ]; then + NETWORK="$(basename "$PWD" | tr '[:upper:]' '[:lower:]')_default" +fi + +docker run --init -d -p 8000:8000 \ + --network "$NETWORK" \ + --name ai-service \ + -e LICENSE_KEY="$LICENSE_KEY" \ + -e ENVIRONMENTS_MANAGEMENT_SECRET_KEY="$MANAGEMENT_SECRET" \ + -e DATABASE_DRIVER='mysql' \ + -e DATABASE_HOST='mysql' \ + -e DATABASE_USER='root' \ + -e DATABASE_PASSWORD="$DB_PASSWORD" \ + -e DATABASE_DATABASE='ai_service' \ + -e REDIS_HOST='redis' \ + -e PROVIDERS="$PROVIDERS" \ + -e STORAGE_DRIVER='database' \ + -e ENABLE_METRIC_LOGS='true' \ + registry.containers.tiny.cloud/ai-service:latest +---- +==== + +For Podman, replace `docker run` with `podman run` and use a Podman pod instead of a compose network. See xref:tinymceai-on-premises-production.adoc[Production deployment] for Podman-specific guidance. + +For native databases (the database runs on the host or in a managed service rather than in Docker), drop the `--network` flag and set `DATABASE_HOST=host.docker.internal` (Docker Desktop and Podman 4{plus}). On native Linux Docker, additionally pass `--add-host=host.docker.internal:host-gateway`. + +Wait five seconds, then verify: + +[source,bash] +---- +curl http://localhost:8000/health +---- + +Expected response: + +[source,json] +---- +{"serviceName":"on-premises-http","uptime":5.123} +---- + +[WARNING] +-- +If the container exits immediately, run `docker logs ai-service`. The most common causes are documented in the xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] guide. The top three are: malformed `LICENSE_KEY` (line breaks from word wrap), missing PostgreSQL schema, and JSON syntax error in `PROVIDERS`. +-- + +=== Create an environment and access key + +The AI service isolates users into Environments. Each environment has its own access keys. + +[arabic] +. Open the Management Panel: *http://localhost:8000/panel/* +. Sign in using the `MANAGEMENT_SECRET` from `.env`. +. Click *Create Environment* and give it a name (for example "Development"). +. Note the *Environment ID* displayed (a short identifier like `viOu8BnjJHb0HGK091p`). +. Inside the environment, click *Create Access Key*. +. *Copy the API Secret immediately*. The Management Panel shows it only once. + +Update `.env` with the new values: + +[source,bash] +---- +AI_ENV_ID=PASTE_ENVIRONMENT_ID_HERE +AI_API_SECRET=PASTE_API_SECRET_HERE +---- + +[IMPORTANT] +-- +Always create environments through the Management Panel UI. Environments created through the raw Management API are not fully registered and cause `invalid-jwt-payload` or `Environment not found` errors. See the xref:tinymceai-on-premises-jwt.adoc[JWT authentication] guide for details on environment and access key management. +-- + +=== Create the token server + +The token server signs JSON Web Tokens (JWTs) for the editor. The Node.js example below is for the demo only; the xref:tinymceai-on-premises-jwt.adoc[JWT authentication] guide contains production-ready endpoints in 8 languages (Node, Django, Flask, Laravel, Rails, .NET, Go, Spring Boot). + +Create `package.json`: + +[source,json] +---- +{ + "name": "tinymce-ai-onpremise-demo", + "private": true, + "scripts": { + "start": "node token-server.js" + }, + "dependencies": { + "dotenv": "^16.0.0", + "express": "^4.18.0", + "jsonwebtoken": "^9.0.0" + } +} +---- + +Create `token-server.js`: + +.Full token-server.js listing +[%collapsible] +==== +[source,javascript] +---- +require('dotenv').config(); +const express = require('express'); +const jwt = require('jsonwebtoken'); + +const PORT = process.env.PORT || 3000; +const AI_ENV_ID = process.env.AI_ENV_ID; +const AI_API_SECRET = process.env.AI_API_SECRET; +const AI_SERVICE_URL = process.env.AI_SERVICE_URL || 'http://localhost:8000'; +const TINYMCE_API_KEY = process.env.TINYMCE_API_KEY || 'no-api-key'; + +if (!AI_ENV_ID || !AI_API_SECRET) { + console.error('ERROR: AI_ENV_ID and AI_API_SECRET must be set in .env'); + console.error('Create an environment first: visit http://localhost:8000/panel/'); + process.exit(1); +} + +const app = express(); +app.use(express.json()); + +app.post('/api/ai-token', (req, res) => { + const token = jwt.sign({ + aud: AI_ENV_ID, + sub: 'demo-user-001', + user: { name: 'Demo User', email: 'demo@example.com' }, + auth: { + ai: { + permissions: [ + 'ai:conversations:*', + 'ai:models:agent', + 'ai:actions:system:*', + 'ai:reviews:system:*' + ] + } + } + }, AI_API_SECRET, { algorithm: 'HS256', expiresIn: '1h' }); + + res.json({ token }); +}); + +app.get('/', (req, res) => { + res.send(` + + + TinyMCE AI on-premises Demo + + + + +

TinyMCE AI on-premises Demo

+

Select text and use the AI toolbar, or open the AI chat sidebar.

+ + + +`); +}); + +app.listen(PORT, () => { + console.log('Editor: http://localhost:' + PORT); + console.log('Token API: http://localhost:' + PORT + '/api/ai-token'); + console.log('AI Service: ' + AI_SERVICE_URL); +}); +---- +==== + +=== Install and run + +[source,bash] +---- +npm install +npm start +---- + +=== Open the demo + +Open *http://localhost:3000* in a browser. The editor loads with the AI toolbar. Select text and try the AI features. Responses stream in real time from the chosen large language model (LLM) provider, processed entirely within the local infrastructure. + +The TinyMCE AI on-premises service is now running. + +== Verifying the installation + +After completing the quick start, exercise the pipeline end-to-end from the command line. + +[source,bash] +---- +# 1. Health check +curl http://localhost:8000/health +---- + +Expected: + +[source,json] +---- +{"serviceName":"on-premises-http","uptime":12.345} +---- + +[source,bash] +---- +# 2. Generate a token +curl -s -X POST http://localhost:3000/api/ai-token | python3 -m json.tool +---- + +Expected: + +[source,json] +---- +{ + "token": "eyJhbGciOiJIUzI1NiIs..." +} +---- + +[source,bash] +---- +# 3. Create a conversation and send a message +TOKEN=$(curl -s -X POST http://localhost:3000/api/ai-token | python3 -c "import sys,json;print(json.load(sys.stdin)['token'])") + +curl -s -X POST http://localhost:8000/v1/conversations \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"id":"verify-1","title":"Verification"}' + +curl -s -N -X POST http://localhost:8000/v1/conversations/verify-1/messages \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"prompt":"Say hello in five words.","model":"agent-1"}' +---- + +The message endpoint returns a Server-Sent Events stream: + +[source,text] +---- +event: message-metadata +data: {"messageId":"abc123"} + +event: text-delta +data: {"textDelta":"Hello "} + +event: text-delta +data: {"textDelta":"there, "} + +event: text-delta +data: {"textDelta":"friend!"} + +event: done +data: {} +---- + +If the stream emits `event: error`, inspect the `data` payload. Provider errors (invalid API key, IAM denial, model unavailable) ride inside the Server-Sent Events (SSE) response. The HTTP status stays 200. See the xref:tinymceai-on-premises-troubleshooting.adoc[LLM provider errors] section in the Troubleshooting guide for details. + +A successful round-trip confirms: container health, database connectivity, Redis connectivity, JWT signing, JWT verification, permissions checking, environment registration, LLM provider authentication, and SSE streaming. If problems persist after these checks, focus on the editor configuration next. + +== Updating configuration + +IMPORTANT: `docker compose restart` after `.env` changes silently keeps the old environment values. The restart preserves the container and does not re-read `.env`. Always use `docker compose up -d --force-recreate` instead. + +[source,bash] +---- +docker compose up -d --force-recreate +# Or recreate only the AI service: +docker compose up -d --force-recreate ai-service +---- + +For Kubernetes, update the Secret and trigger a rollout restart: + +[source,bash] +---- +kubectl rollout restart deployment/ai-service -n tinymce-ai +---- + +== Stopping and cleaning up + +[source,bash] +---- +# Stop the AI service (standalone Docker) +docker stop ai-service && docker rm ai-service + +# Stop the Docker Compose stack +docker compose down + +# Remove all data including volumes (destructive) +docker compose down -v +---- + +For Kubernetes, scale the deployment to zero or delete it. Persistent volumes for the database are retained unless explicitly deleted. + +[source,bash] +---- +kubectl delete deployment ai-service -n tinymce-ai +---- diff --git a/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc new file mode 100644 index 0000000000..7c88f42535 --- /dev/null +++ b/modules/ROOT/pages/tinymceai-on-premises-jwt.adoc @@ -0,0 +1,911 @@ += JWT authentication for the on-premises AI service +:navtitle: JWT authentication +:description: JWT authentication for the TinyMCE AI on-premises service using HS256 symmetric signing +:keywords: AI, on-premises, JWT, authentication, HS256 + +The on-premises AI service uses *HS256* (HMAC-SHA256, symmetric shared secret) for JSON Web Token (JWT) authentication. This is different from the Tiny Cloud AI service, which uses RS256. + +[WARNING] +-- +Do not follow the xref:tinymceai-jwt-authentication-intro.adoc[Cloud JWT guide] for on-premises deployments. The on-premises verifier silently rejects RS256-signed tokens with `invalid-jwt-signature` and no indication that the algorithm is wrong. +-- + + + +== End-to-end flow + +[.text-center] +image::tinymceai-on-premises/jwt-authentication-fig-1.svg[alt="JWT token exchange sequence between user application back end and AI service with error branches",width=100%] + +The shared secret (API Secret) never leaves the application back end. The editor only ever sees signed tokens, and the AI service only ever sees signed tokens; neither has direct access to the secret. + + + +== Signing model + +[cols=",",options="header",] +|=== +|Property |Value +|Algorithm |`HS256` (HMAC-SHA256) +|Key type |Symmetric shared secret +|Key source |*API Secret* generated for an access key inside an environment through the Management Panel +|Header format |`Authorization: Bearer ` +|=== + +Pin implementations to `HS256`. + +=== The API Secret + +The API Secret is generated when creating an access key inside an environment, in the Management Panel under *Environments → → Access keys → New access key*. + +* It is shown *once* on the creation screen. Copy it immediately into a secret manager such as Vault, AWS Secrets Manager, Doppler, or a local `.env` file. +* If the secret is lost, rotate: create a new access key, deploy the new secret, then revoke the old key. +* This is *not* the `ENVIRONMENTS_MANAGEMENT_SECRET_KEY` environment variable; that one is used for Management Panel logins, not user-facing AI tokens. Mixing them up produces `invalid-jwt-signature`. + +=== API Secret compared with `ENVIRONMENTS_MANAGEMENT_SECRET_KEY` + +[cols=",,",options="header",] +|=== +|Credential |Purpose |Used by +|*API Secret* |Signs user-facing JWTs presented to the AI runtime endpoints. Created per access key inside an environment. |The application token endpoint. Never appears in any management call. +|*`ENVIRONMENTS_MANAGEMENT_SECRET_KEY`* |Signs Management Panel logins. Set as an environment variable on the AI service container. |The Management Panel UI. +|=== + +These two credentials are unrelated. Using one in place of the other produces `invalid-jwt-signature`. + + + +== Required claims + +Every token MUST contain the following claims. + +[cols=",,",options="header",] +|=== +|Claim |Type |Description +|`aud` |string |The Environment ID, copied from the Management Panel. UUID-shaped. Type *must be string*, not array; the verifier rejects array-shaped `aud` (the default in some JWT libraries) with `invalid-jwt-payload`. +|`iat` |number |Issued-at, seconds since epoch (UTC). +|`exp` |number |Expiry, seconds since epoch (UTC). Recommend `iat {plus} 3600` for demos, `iat {plus} 900` for production. The server applies 60 seconds of clock-skew leeway; tokens up to 60 seconds past `exp` still verify. +|`sub` |string |Unique, stable user identifier. Conversation history is isolated per-`sub`; do not reuse one `sub` across users or conversations will leak between them. +|`auth.ai.permissions` |`string[]` |Array of feature permission strings. See the permissions reference below. Wildcards (`*`) are accepted only in the documented positions; the bare string `"*"` is rejected. +|=== + +== Optional claims + +[cols=",,",options="header",] +|=== +|Claim |Type |Description +|`user.name` |string |Display name shown in the conversation history UI. +|`user.email` |string |Email shown in the conversation history UI. Not used for authentication. +|=== + +The verifier ignores additional unknown claims. Standard JWT claims (`iss`, `nbf`, `jti`) cause no harm when included; the verifier does not validate them, but they pass through. + + + +== Permissions reference + +This is the canonical permission list for the AI service. + +=== Conversation and global features + +[cols=",",options="header",] +|=== +|Permission |Grants +|`ai:conversations:*` |All conversation operations: create, list, send message, delete +|`ai:conversations:create` |Create new conversations +|`ai:conversations:read` |List and read existing conversations +|`ai:conversations:delete` |Delete conversations +|`ai:models:agent` |Access the built-in agent model (model ID `agent-1`) +|`ai:models::` |Access a specific custom model configured through the `MODELS` env var +|`ai:actions:system:*` |All built-in quick actions (rewrite, summarize, expand, translate, change tone, and related operations) +|`ai:reviews:system:*` |All built-in review features (correctness, clarity, readability, tone, and related checks) +|=== + +=== Model permission syntax + +`ai:models::` selects a specific custom model. The parser is *not* a greedy colon-split; it understands that `` may itself contain colons and dots. + +Examples: + +.... +ai:models:openai:gpt-5-mini +ai:models:openai:gpt-4o +ai:models:anthropic:claude-sonnet-4-5 +ai:models:bedrock:us.anthropic.claude-sonnet-4-20250514-v1:0 +ai:models:vertex:gemini-2.5-pro +ai:models:azure:my-gpt5-deployment +.... + +For Azure, `` is the *deployment name* configured in the Azure portal, not the underlying OpenAI model name. + +For Bedrock models with an inference profile prefix (`us.`, `eu.`, `apac.`) and embedded version colons (`v1:0`), include them verbatim; the parser handles them. + +=== What not to put in `auth.ai.permissions` + +[cols=",",options="header",] +|=== +|Do not use |Reason +|`ai:admin` |Appears in the *cloud* JWT doc. The on-premises service rejects this with `allowed: false` on every endpoint. There is no admin scope in on-premises deployments; admin actions go through the Management Panel. +|`"*"` (the bare string) |Rejected. The verifier requires structured permission strings. +|`useAllFeatures: true` |The on-premises service requires the explicit `auth.ai.permissions` array. +|A single string instead of an array |Rejected. `auth.ai.permissions` must be ``string[]``. +|=== + +=== Full-access set + +For demos and admin-tier users, this is the standard grant: + +[source,json] +---- +[ + "ai:conversations:*", + "ai:models:agent", + "ai:actions:system:*", + "ai:reviews:system:*" +] +---- + +When adding custom models through the `MODELS` environment variable, append one `ai:models::` entry for each custom model to expose in the selector. + + + +== Example payload + +A complete, decoded payload for a logged-in user with full access to a single OpenAI model: + +[source,json] +---- +{ + "aud": "5f1a2b3c-1234-5678-9abc-def012345678", + "iat": 1746950400, + "exp": 1746954000, + "sub": "user_8f3c9a12", + "user": { + "name": "Priya Patel", + "email": "priya.patel@example.com" + }, + "auth": { + "ai": { + "permissions": [ + "ai:conversations:*", + "ai:models:agent", + "ai:models:openai:gpt-5-mini", + "ai:actions:system:*", + "ai:reviews:system:*" + ] + } + } +} +---- + +Signed with HS256 using the API Secret, then sent as: + +.... +Authorization: Bearer eyJhbGciOiJIUzI1NiIs... +.... + +=== Clock-skew leeway + +The service allows up to 60 seconds of clock skew on the `exp` claim. Keep the token server and the AI service synchronized with Network Time Protocol (NTP). + + + +== Production token endpoint examples + +Each example reads `AI_ENV_ID` and `AI_API_SECRET` from environment variables, authenticates the user through the framework's session/auth layer, signs an HS256 token, and returns `{"token": "..."}` as JSON. The endpoint runs in the application back end; the AI service never sees the API Secret directly. + +[cols="1,1",options="header"] +|=== +|Language |Framework / Library + +|Node.js |Express + `jsonwebtoken` +|Python |Django + `PyJWT`, Flask + `PyJWT` +|PHP |Laravel + `firebase/php-jwt` +|Ruby |Rails + `jwt` +|C# |.NET + `System.IdentityModel.Tokens.Jwt` +|Go |`golang-jwt/jwt/v5` +|Java |Spring Boot + `jjwt` +|=== + +.Node.js (Express + jsonwebtoken) +[%collapsible] +==== +[source,bash] +---- +npm install express jsonwebtoken +---- + +[source,javascript] +---- +const express = require('express'); +const jwt = require('jsonwebtoken'); + +const app = express(); +const ENV_ID = process.env.AI_ENV_ID; +const API_SECRET = process.env.AI_API_SECRET; + +app.post('/api/ai-token', requireLogin, (req, res) => { + const user = req.user; + const now = Math.floor(Date.now() / 1000); + + const payload = { + aud: ENV_ID, + iat: now, + exp: now + 3600, + sub: String(user.id), + user: { + name: user.displayName, + email: user.email, + }, + auth: { + ai: { + permissions: [ + 'ai:conversations:*', + 'ai:models:agent', + 'ai:actions:system:*', + 'ai:reviews:system:*', + ], + }, + }, + }; + + const token = jwt.sign(payload, API_SECRET, { algorithm: 'HS256' }); + res.json({ token }); +}); + +function requireLogin(req, res, next) { + if (!req.user) return res.status(401).json({ error: 'unauthenticated' }); + next(); +} + +app.listen(3000); +---- +==== + +.Python (Django + PyJWT) +[%collapsible] +==== +[source,bash] +---- +pip install PyJWT +---- + +[source,python] +---- +import os +import time +import jwt +from django.http import JsonResponse +from django.views.decorators.http import require_POST +from django.contrib.auth.decorators import login_required + +ENV_ID = os.environ["AI_ENV_ID"] +API_SECRET = os.environ["AI_API_SECRET"] + + +@require_POST +@login_required +def ai_token(request): + user = request.user + now = int(time.time()) + + payload = { + "aud": ENV_ID, + "iat": now, + "exp": now + 3600, + "sub": str(user.pk), + "user": { + "name": user.get_full_name() or user.username, + "email": user.email, + }, + "auth": { + "ai": { + "permissions": [ + "ai:conversations:*", + "ai:models:agent", + "ai:actions:system:*", + "ai:reviews:system:*", + ], + }, + }, + } + + token = jwt.encode(payload, API_SECRET, algorithm="HS256") + return JsonResponse({"token": token}) +---- + +Register the view in `urls.py`: + +[source,python] +---- +from django.urls import path +from . import views + +urlpatterns = [ + path("api/ai-token", views.ai_token, name="ai-token"), +] +---- +==== + +.Python (Flask + PyJWT) +[%collapsible] +==== +[source,bash] +---- +pip install Flask PyJWT +---- + +[source,python] +---- +import os +import time +import jwt +from flask import Flask, jsonify, abort, session + +app = Flask(__name__) +ENV_ID = os.environ["AI_ENV_ID"] +API_SECRET = os.environ["AI_API_SECRET"] + + +@app.post("/api/ai-token") +def ai_token(): + user = session.get("user") + if not user: + abort(401) + + now = int(time.time()) + payload = { + "aud": ENV_ID, + "iat": now, + "exp": now + 3600, + "sub": str(user["id"]), + "user": { + "name": user["name"], + "email": user["email"], + }, + "auth": { + "ai": { + "permissions": [ + "ai:conversations:*", + "ai:models:agent", + "ai:actions:system:*", + "ai:reviews:system:*", + ], + }, + }, + } + + token = jwt.encode(payload, API_SECRET, algorithm="HS256") + return jsonify({"token": token}) +---- +==== + +.PHP (Laravel {plus} firebase/php-jwt) +[%collapsible] +==== +[source,bash] +---- +composer require firebase/php-jwt +---- + +[source,php] +---- + $envId, + 'iat' => $now, + 'exp' => $now + 3600, + 'sub' => (string) $user->id, + 'user' => [ + 'name' => $user->name, + 'email' => $user->email, + ], + 'auth' => [ + 'ai' => [ + 'permissions' => [ + 'ai:conversations:*', + 'ai:models:agent', + 'ai:actions:system:*', + 'ai:reviews:system:*', + ], + ], + ], + ]; + + $token = JWT::encode($payload, $apiSecret, 'HS256'); + return response()->json(['token' => $token]); + } +} +---- + +Route (`routes/web.php` or `routes/api.php`): + +[source,php] +---- +use App\Http\Controllers\AiTokenController; + +Route::post('/api/ai-token', [AiTokenController::class, 'issue']) + ->middleware('auth'); +---- +==== + +.Ruby (Rails {plus} jwt) +[%collapsible] +==== +[source,ruby] +---- +# Gemfile +gem 'jwt' +---- + +[source,ruby] +---- +class AiTokensController < ApplicationController + before_action :authenticate_user! + + def create + env_id = ENV.fetch('AI_ENV_ID') + api_secret = ENV.fetch('AI_API_SECRET') + + now = Time.now.to_i + payload = { + aud: env_id, + iat: now, + exp: now + 3600, + sub: current_user.id.to_s, + user: { + name: current_user.name, + email: current_user.email + }, + auth: { + ai: { + permissions: [ + 'ai:conversations:*', + 'ai:models:agent', + 'ai:actions:system:*', + 'ai:reviews:system:*' + ] + } + } + } + + token = JWT.encode(payload, api_secret, 'HS256') + render json: { token: token } + end +end +---- + +Route (`config/routes.rb`): + +[source,ruby] +---- +post '/api/ai-token', to: 'ai_tokens#create' +---- +==== + +.C# (.NET {plus} System.IdentityModel.Tokens.Jwt) +[%collapsible] +==== +[source,bash] +---- +dotnet add package System.IdentityModel.Tokens.Jwt +---- + +[source,c#] +---- +using System; +using System.IdentityModel.Tokens.Jwt; +using System.Security.Claims; +using System.Text; +using System.Text.Json; +using Microsoft.AspNetCore.Authorization; +using Microsoft.AspNetCore.Mvc; +using Microsoft.IdentityModel.Tokens; + +[ApiController] +[Route("api/ai-token")] +[Authorize] +public class AiTokenController : ControllerBase +{ + [HttpPost] + public IActionResult Issue() + { + var envId = Environment.GetEnvironmentVariable("AI_ENV_ID")!; + var apiSecret = Environment.GetEnvironmentVariable("AI_API_SECRET")!; + + var userId = User.FindFirst(ClaimTypes.NameIdentifier)!.Value; + var userName = User.FindFirst(ClaimTypes.Name)?.Value ?? ""; + var userEmail = User.FindFirst(ClaimTypes.Email)?.Value ?? ""; + + var now = DateTimeOffset.UtcNow.ToUnixTimeSeconds(); + + var payload = new JwtPayload + { + { "aud", envId }, + { "iat", now }, + { "exp", now + 3600 }, + { "sub", userId }, + { "user", new { name = userName, email = userEmail } }, + { "auth", new { + ai = new { + permissions = new[] { + "ai:conversations:*", + "ai:models:agent", + "ai:actions:system:*", + "ai:reviews:system:*" + } + } + }} + }; + + var key = new SymmetricSecurityKey(Encoding.UTF8.GetBytes(apiSecret)); + var creds = new SigningCredentials(key, SecurityAlgorithms.HmacSha256); + var header = new JwtHeader(creds); + + var jwt = new JwtSecurityToken(header, payload); + var token = new JwtSecurityTokenHandler().WriteToken(jwt); + + return Ok(new { token }); + } +} +---- +==== + +.Go (golang-jwt/jwt/v5) +[%collapsible] +==== +[source,bash] +---- +go get github.com/golang-jwt/jwt/v5 +---- + +[source,go] +---- +package main + +import ( + "encoding/json" + "net/http" + "os" + "time" + + "github.com/golang-jwt/jwt/v5" +) + +type tokenResponse struct { + Token string `json:"token"` +} + +func aiTokenHandler(w http.ResponseWriter, r *http.Request) { + user, ok := userFromSession(r) + if !ok { + http.Error(w, "unauthenticated", http.StatusUnauthorized) + return + } + + envID := os.Getenv("AI_ENV_ID") + apiSecret := os.Getenv("AI_API_SECRET") + + now := time.Now().Unix() + claims := jwt.MapClaims{ + "aud": envID, + "iat": now, + "exp": now + 3600, + "sub": user.ID, + "user": map[string]string{ + "name": user.Name, + "email": user.Email, + }, + "auth": map[string]any{ + "ai": map[string]any{ + "permissions": []string{ + "ai:conversations:*", + "ai:models:agent", + "ai:actions:system:*", + "ai:reviews:system:*", + }, + }, + }, + } + + token := jwt.NewWithClaims(jwt.SigningMethodHS256, claims) + signed, err := token.SignedString([]byte(apiSecret)) + if err != nil { + http.Error(w, "sign failed", http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(tokenResponse{Token: signed}) +} + +func main() { + http.HandleFunc("/api/ai-token", aiTokenHandler) + http.ListenAndServe(":3000", nil) +} +---- +==== + +.Java (Spring Boot {plus} jjwt) +[%collapsible] +==== +[source,xml] +---- + + + io.jsonwebtoken + jjwt-api + 0.12.6 + + + io.jsonwebtoken + jjwt-impl + 0.12.6 + runtime + + + io.jsonwebtoken + jjwt-jackson + 0.12.6 + runtime + +---- + +[source,java] +---- +package com.example.ai; + +import io.jsonwebtoken.Jwts; +import io.jsonwebtoken.security.Keys; +import org.springframework.security.core.annotation.AuthenticationPrincipal; +import org.springframework.security.core.userdetails.UserDetails; +import org.springframework.web.bind.annotation.*; + +import javax.crypto.SecretKey; +import java.nio.charset.StandardCharsets; +import java.time.Instant; +import java.util.List; +import java.util.Map; + +@RestController +@RequestMapping("/api/ai-token") +public class AiTokenController { + + private final String envId = System.getenv("AI_ENV_ID"); + private final String apiSecret = System.getenv("AI_API_SECRET"); + + @PostMapping + public Map issue(@AuthenticationPrincipal UserDetails user) { + SecretKey key = Keys.hmacShaKeyFor(apiSecret.getBytes(StandardCharsets.UTF_8)); + Instant now = Instant.now(); + + String token = Jwts.builder() + .audience().add(envId).and() + .issuedAt(java.util.Date.from(now)) + .expiration(java.util.Date.from(now.plusSeconds(3600))) + .subject(user.getUsername()) + .claim("user", Map.of( + "name", user.getUsername(), + "email", "" + )) + .claim("auth", Map.of( + "ai", Map.of( + "permissions", List.of( + "ai:conversations:*", + "ai:models:agent", + "ai:actions:system:*", + "ai:reviews:system:*" + ) + ) + )) + .signWith(key, Jwts.SIG.HS256) + .compact(); + + return Map.of("token", token); + } +} +---- +==== + + + +== Editor-side token provider + +Configure the TinyMCE editor to fetch a token from the application endpoint. The plugin calls the provider on demand and re-fetches when the token nears expiry. + +[source,javascript] +---- +tinymce.init({ + selector: 'textarea', + plugins: 'tinymceai', + toolbar: 'undo redo | bold italic | tinymceai-chat tinymceai-review tinymceai-quickactions', + + tinymceai_service_url: 'https://ai.example.com', + + tinymceai_token_provider: () => + fetch('/api/ai-token', { method: 'POST', credentials: 'include' }) + .then(r => r.json()) + .then(d => ({ token: d.token })), +}); +---- + +IMPORTANT: Do not cache the JWT in application code. The plugin calls the provider on initialization and again as the token nears expiry; it manages refresh internally. + +The provider must return a Promise that resolves to `pass:c[{ token: '' }]`. Returning the raw string fails silently. If the provider rejects or returns a non-OK response, the plugin surfaces an error in the editor UI. + +TIP: Set `credentials: 'include'` on the fetch when the token endpoint relies on session cookies. Without it, the browser does not send cookies on cross-origin requests. When the token endpoint is on the same origin as the editor, `credentials: 'include'` is harmless but unnecessary. + +For cross-origin setups, configure the back end server to respond with `Access-Control-Allow-Origin: ` (not `*`) and `Access-Control-Allow-Credentials: true`. Set the session cookie with `SameSite=None; Secure`. + +For framework-specific (React, Vue, Angular) integration, see xref:tinymceai-on-premises-frameworks.adoc[Framework integration]. + + + +== Permission gating patterns + +A common deployment shape: one AI service serving multiple subscription tiers. The token endpoint derives the permission set from role, plan, or tenant. + +=== Tiered permissions (basic / pro / enterprise) + +[source,javascript] +---- +function permissionsFor(user) { + const base = [ + 'ai:conversations:*', + 'ai:actions:system:*', + ]; + + switch (user.plan) { + case 'basic': + return [ + ...base, + 'ai:models:openai:gpt-5-mini', + ]; + + case 'pro': + return [ + ...base, + 'ai:reviews:system:*', + 'ai:models:agent', + 'ai:models:openai:gpt-5-mini', + 'ai:models:openai:gpt-4o', + ]; + + case 'enterprise': + return [ + ...base, + 'ai:reviews:system:*', + 'ai:models:agent', + 'ai:models:openai:gpt-5-mini', + 'ai:models:openai:gpt-4o', + 'ai:models:anthropic:claude-sonnet-4-5', + 'ai:models:bedrock:us.anthropic.claude-sonnet-4-20250514-v1:0', + ]; + + default: + return base; + } +} +---- + +=== Read-only viewers + +For deployments that should expose history without allowing new conversations: + +[source,javascript] +---- +[ + 'ai:conversations:read', +] +---- + +=== Multi-tenant: separate environments + +If tenants must be *fully isolated* (separate conversation history, separate access keys, separate audit logs), give each tenant its own Environment in the Management Panel, mint tokens with the tenant-specific `aud` and `AI_API_SECRET`, and route in the token endpoint: + +[source,javascript] +---- +function envFor(tenantId) { + return { + envId: process.env[`AI_ENV_ID_${tenantId}`], + apiSecret: process.env[`AI_API_SECRET_${tenantId}`], + }; +} +---- + + + +== Verification and troubleshooting + +=== Decode a token without verifying + +`jwt.io` accepts pasted tokens and shows the header and payload. Alternatively: + +[source,bash] +---- +python3 -c "import jwt; print(jwt.decode('', options={'verify_signature': False}))" +---- + +[source,bash] +---- +node -e "console.log(JSON.parse(Buffer.from(process.argv[1].split('.')[1],'base64url')))" '' +---- + +When debugging, start here. Most "auth failures" reflect wrong claim values rather than signing problems. + +=== Common failure modes + +[cols=",,",options="header",] +|=== +|Symptom |Cause |Fix +|`invalid-jwt-signature` |API Secret mismatch |Verify `AI_API_SECRET` matches the value displayed at access-key creation. If lost, create a new access key and rotate. +|`invalid-jwt-signature` (after copying cloud guide) |Token signed with RS256 |Switch to HS256 with the API Secret. See top-of-page warning. +|`invalid-jwt-payload` |`aud` does not match a real Environment ID |Confirm the Environment ID from the Management Panel matches `aud` exactly. +|`invalid-jwt-payload` (env "exists") |Environment created through raw management API rather than the Management Panel UI |Recreate through the panel. See the Environment creation section below. +|`invalid-jwt` (not `jwt-expired`) |Token is past `exp` by more than 60 seconds |Request a new token. The server allows 60-second clock-skew leeway; anything beyond is rejected with `invalid-jwt`. +|`Environment not found` |Environment is in `environments__environment` / `security__environment` but not in `ai_assistant_environments` |Recreate through Management Panel UI. +|`allowed: false` on every endpoint |Wrong shape for `auth.ai.permissions` |Must be ``string[]``. Not a single string. Not `useAllFeatures`. Not `ai:admin`. +|`allowed: false` on specific endpoints only |Missing the specific permission |Decode token, check the `auth.ai.permissions` array against the table above. +|Token silently rejected, no decoded error |RS256 signature |Re-sign with HS256. +|`aud` claim type mismatch |`aud` issued as array instead of string |Some JWT libraries default to array `aud`. Force string. +|Editor shows "Failed to authenticate" |Token endpoint returned non-JSON, returned `token` as nested object, or Cross-Origin Resource Sharing (CORS) blocked the request |Open browser devtools → Network → inspect the response from `/api/ai-token`. +|=== + +=== Sanity-check a token manually + +[source,bash] +---- +TOKEN=$(curl -s -X POST http://localhost:3001/api/ai-token | jq -r .token) + +curl -i https://ai.example.com/v1/conversations \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{}' +---- + +A `201 Created` confirms the full chain works: secret, claims, permissions, environment registration. + + + +== Token lifetime guidance + +[cols=",",options="header",] +|=== +|Scenario |Recommended `exp - iat` +|Local development |1 hour (`3600`) +|Demos |1 hour +|Production |5–15 minutes (`300`–`900`) +|High-security / regulated |5 minutes, plus short-lived sessions on the auth layer +|=== + +Short-lived tokens limit exposure if a token leaks through a browser extension, log capture, or error report. The editor re-requests a token as needed through `tinymceai_token_provider`, so long-lived tokens provide no practical benefit. + + + + +== See also + +* xref:tinymceai-on-premises-getting-started.adoc[Getting started] -- end-to-end deployment, including a demo token server +* xref:tinymceai-on-premises-providers.adoc[large language model (LLM) providers] -- configuring custom models through `MODELS` and the `ai:models::` permission syntax +* xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] -- full troubleshooting catalog beyond JWT +* xref:tinymceai-on-premises-frameworks.adoc[Framework integration] -- editor-side integration patterns for React, Vue, and Angular, including `tinymceai_token_provider` wrappers diff --git a/modules/ROOT/pages/tinymceai-on-premises-production.adoc b/modules/ROOT/pages/tinymceai-on-premises-production.adoc new file mode 100644 index 0000000000..a3806dd277 --- /dev/null +++ b/modules/ROOT/pages/tinymceai-on-premises-production.adoc @@ -0,0 +1,644 @@ += TinyMCE AI on-premises: Production deployment guide +:navtitle: Production deployment +:description: Production deployment guide for the TinyMCE AI on-premises service +:keywords: AI, on-premises, production, Kubernetes, ECS, scaling + + + + +== Architecture overview + +[.text-center] +image::tinymceai-on-premises/production-guide-fig-1.svg[alt="Production deployment topology with reverse proxy AI service replicas database and Redis behind TLS",width=100%] + +The AI service is stateless, persists all state to MySQL/PostgreSQL and Redis, and scales horizontally behind a load balancer. + + + +== TLS / HTTPS + +The AI service does not terminate Transport Layer Security (TLS). Place a reverse proxy in front. + +=== Nginx example + +[source,nginx] +---- +server { + listen 443 ssl; + server_name ai.example.com; + + ssl_certificate /etc/ssl/certs/ai.example.com.pem; + ssl_certificate_key /etc/ssl/private/ai.example.com.key; + + location / { + proxy_pass http://ai-service:8000; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # SSE streaming support + proxy_buffering off; + proxy_cache off; + proxy_read_timeout 300s; + } +} +---- + +[IMPORTANT] +-- +Server-Sent Events (SSE) streaming requires `proxy_buffering off`. Without it, AI responses appear to hang until the entire response is generated. +-- + +=== AWS ALB + +* Target group: HTTP on port 8000 +* Health check path: `/health` +* Idle timeout: 300 seconds (for long AI responses) +* Stickiness: not required (service is stateless) + + + +== Horizontal scaling + +The AI service is stateless. All persistent state lives in the SQL database, Redis, and the file-storage back end. Any number of replicas can run behind a load balancer. All replicas must share identical environment variable configuration. + +=== Scaling considerations + +[cols=",",options="header",] +|=== +|Component |Scaling approach +|AI service |Add more containers (stateless) +|MySQL / PostgreSQL |Read replicas or managed DB (RDS, Cloud SQL, Azure Database) +|Redis |Redis Cluster or Sentinel; managed Redis (ElastiCache, Memorystore, Azure Cache) +|File storage |S3 / Azure Blob recommended for production. The `database` storage driver is intended for development only. +|=== + +[IMPORTANT] +-- +When deploying for the first time or upgrading to a new version, start a single instance and wait for it to become healthy before scaling up. Subsequent scale events do not require this precaution. +-- + + + + +== Podman deployment + +The AI service works with Podman as an alternative to Docker. In Podman, containers within a pod share a network namespace, so use `127.0.0.1` instead of container names for hostnames. + +[source,bash] +---- +podman login -u 'TINY_REGISTRY_USERNAME' registry.containers.tiny.cloud + +podman pull registry.containers.tiny.cloud/ai-service:latest + +podman pod create --name ai-pod -p 8000:8000 -p 3306:3306 -p 6379:6379 + +podman run -d --pod ai-pod --name mysql \ + -e MYSQL_ROOT_PASSWORD=ROOT_PASSWORD \ + -e MYSQL_DATABASE=ai_service \ + mysql:8.0 + +podman run -d --pod ai-pod --name redis redis:7 + +podman run --init -d --pod ai-pod --name ai-service \ + -e LICENSE_KEY='T8LK:...' \ + -e ENVIRONMENTS_MANAGEMENT_SECRET_KEY='MANAGEMENT_SECRET' \ + -e DATABASE_DRIVER='mysql' \ + -e DATABASE_HOST='127.0.0.1' \ + -e DATABASE_USER='root' \ + -e DATABASE_PASSWORD='ROOT_PASSWORD' \ + -e DATABASE_DATABASE='ai_service' \ + -e REDIS_HOST='127.0.0.1' \ + -e PROVIDERS='{"openai":{"type":"openai","apiKeys":["sk-proj-..."]}}' \ + -e STORAGE_DRIVER='database' \ + registry.containers.tiny.cloud/ai-service:latest +---- + +IMPORTANT: Pin to `mysql:8.0`. The `mysql:8` tag floats to MySQL 8.4, which removes the `default-authentication-plugin` flag and causes a crash loop. See xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] for details. + +== Kubernetes deployment + +=== Namespace and image pull secret + +[source,bash] +---- +kubectl create namespace tinymce-ai + +kubectl create secret docker-registry tiny-registry \ + --namespace tinymce-ai \ + --docker-server=registry.containers.tiny.cloud \ + --docker-username=TINY_REGISTRY_USERNAME \ + --docker-password='TINY_REGISTRY_ACCESS_TOKEN' +---- + +=== Application secrets + +[source,yaml] +---- +apiVersion: v1 +kind: Secret +metadata: + name: ai-service-secrets + namespace: tinymce-ai +type: Opaque +stringData: + license-key: "EXAMPLE_LICENSE_KEY" + management-secret: "EXAMPLE_MANAGEMENT_SECRET" + db-password: "EXAMPLE_DB_PASSWORD" + redis-password: "EXAMPLE_REDIS_PASSWORD" + providers: | + { + "openai": { + "type": "openai", + "apiKeys": ["sk-proj-EXAMPLE_KEY"] + } + } +---- + +In production, use Sealed Secrets, External Secrets Operator, or HashiCorp Vault rather than committing raw secret manifests. + +=== Deployment + +.Full Kubernetes Deployment manifest +[%collapsible] +==== +[source,yaml] +---- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ai-service + namespace: tinymce-ai +spec: + replicas: 2 + selector: + matchLabels: + app: ai-service + template: + metadata: + labels: + app: ai-service + spec: + imagePullSecrets: + - name: tiny-registry + containers: + - name: ai-service + image: registry.containers.tiny.cloud/ai-service:latest + ports: + - containerPort: 8000 + env: + - name: LICENSE_KEY + valueFrom: + secretKeyRef: + name: ai-service-secrets + key: license-key + - name: ENVIRONMENTS_MANAGEMENT_SECRET_KEY + valueFrom: + secretKeyRef: + name: ai-service-secrets + key: management-secret + - name: DATABASE_DRIVER + value: "mysql" + - name: DATABASE_HOST + value: "mysql.tinymce-ai.svc.cluster.local" + - name: DATABASE_USER + value: "ai_service" + - name: DATABASE_PASSWORD + valueFrom: + secretKeyRef: + name: ai-service-secrets + key: db-password + - name: DATABASE_DATABASE + value: "ai_service" + - name: REDIS_HOST + value: "redis.tinymce-ai.svc.cluster.local" + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: ai-service-secrets + key: redis-password + - name: PROVIDERS + valueFrom: + secretKeyRef: + name: ai-service-secrets + key: providers + - name: STORAGE_DRIVER + value: "s3" + - name: STORAGE_REGION + value: "us-east-1" + - name: STORAGE_BUCKET + value: "example-ai-storage-bucket" + - name: ENABLE_METRIC_LOGS + value: "true" + readinessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 10 + periodSeconds: 5 + livenessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 30 + periodSeconds: 10 + resources: + requests: + memory: "512Mi" + cpu: "500m" + limits: + memory: "2Gi" + cpu: "2000m" +---- +==== + +=== Service + +[source,yaml] +---- +apiVersion: v1 +kind: Service +metadata: + name: ai-service + namespace: tinymce-ai +spec: + selector: + app: ai-service + ports: + - port: 8000 + targetPort: 8000 +---- + +=== Ingress + +[source,yaml] +---- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: ai-service + namespace: tinymce-ai + annotations: + nginx.ingress.kubernetes.io/proxy-buffering: "off" + nginx.ingress.kubernetes.io/proxy-read-timeout: "300" + nginx.ingress.kubernetes.io/proxy-send-timeout: "300" +spec: + tls: + - hosts: + - ai.example.com + secretName: ai-tls-cert + rules: + - host: ai.example.com + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: ai-service + port: + number: 8000 +---- + +=== Horizontal pod autoscaler + +[source,yaml] +---- +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: ai-service-hpa + namespace: tinymce-ai +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: ai-service + minReplicas: 3 + maxReplicas: 20 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 +---- + + + + +== AWS ECS / Fargate + +=== Task definition + +.Full ECS Fargate task definition +[%collapsible] +==== +[source,json] +---- +{ + "family": "ai-service", + "networkMode": "awsvpc", + "requiresCompatibilities": ["FARGATE"], + "cpu": "1024", + "memory": "2048", + "containerDefinitions": [ + { + "name": "ai-service", + "image": "registry.containers.tiny.cloud/ai-service:latest", + "portMappings": [{ "containerPort": 8000 }], + "healthCheck": { + "command": ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"], + "interval": 30, + "timeout": 5, + "retries": 3 + }, + "secrets": [ + { "name": "LICENSE_KEY", "valueFrom": "arn:aws:secretsmanager:us-east-1:111122223333:secret:ai-license" }, + { "name": "ENVIRONMENTS_MANAGEMENT_SECRET_KEY", "valueFrom": "arn:aws:secretsmanager:us-east-1:111122223333:secret:ai-mgmt-secret" }, + { "name": "DATABASE_PASSWORD", "valueFrom": "arn:aws:secretsmanager:us-east-1:111122223333:secret:ai-db" }, + { "name": "PROVIDERS", "valueFrom": "arn:aws:secretsmanager:us-east-1:111122223333:secret:ai-providers" } + ], + "environment": [ + { "name": "DATABASE_DRIVER", "value": "mysql" }, + { "name": "DATABASE_HOST", "value": "example-rds-endpoint.region.rds.amazonaws.com" }, + { "name": "DATABASE_USER", "value": "ai_service" }, + { "name": "DATABASE_DATABASE", "value": "ai_service" }, + { "name": "REDIS_HOST", "value": "example-elasticache-endpoint.region.cache.amazonaws.com" }, + { "name": "STORAGE_DRIVER", "value": "s3" }, + { "name": "STORAGE_BUCKET", "value": "example-ai-storage-bucket" }, + { "name": "STORAGE_REGION", "value": "us-east-1" } + ] + } + ] +} +---- +==== + +=== Infrastructure recommendations + +[cols=",",options="header",] +|=== +|Service |AWS recommendation +|Database |RDS for MySQL 8.0 (Multi-AZ for high availability (HA)) +|Redis |ElastiCache for Redis 7 (cluster mode) +|Storage |Same-region S3 bucket +|Load balancer |ALB with `/health` target health check, 300 s idle timeout +|Secrets |AWS Secrets Manager +|Registry pull credentials |Secrets Manager {plus} ECR pull-through cache, or a private repository mirroring `registry.containers.tiny.cloud` +|=== + + + +== Security hardening + +[cols=",",options="header",] +|=== +|Practice |Implementation +|Network isolation |Place the AI service in a private subnet; expose only through a load balancer. Restrict database and Redis to the AI service security group. +|Block panel from the public internet |Restrict `/panel/` to an admin VPN or IP allowlist. The panel manages secrets and access keys. +|TLS everywhere |Terminate TLS 1.3 at the reverse proxy. Use internal mutual TLS (mTLS) between the AI service and the data layer where supported. +|Secrets management |Use Vault, AWS Secrets Manager, Azure Key Vault, or GCP Secret Manager. Never store secrets directly in orchestration manifests or commit them to source control. +|Database encryption at rest |Turn on encryption at rest in the cloud provider console. RDS, Cloud SQL, and Azure Database enable this by default. +|Redis authentication |Always set `REDIS_PASSWORD` (or use a managed Redis instance with authentication enabled). +|Container security |Run as non-root, use a read-only filesystem where possible, and drop unnecessary Linux capabilities. +|Image scanning |Scan `registry.containers.tiny.cloud/ai-service` with Trivy, Snyk, or the registry's built-in scanner. +|Least-privilege JSON Web Tokens (JWTs) |Grant only the permissions each user role requires. Avoid full-access tokens in production. +|API secret rotation |Periodically create a new access key, add the new key to the configuration, then revoke the old key. The token endpoint reads the secret at request time. +|Audit logging |Enable `ENABLE_METRIC_LOGS=true` and ship logs to a Security Information and Event Management (SIEM). +|Large language model (LLM) API key rotation |Add the new key to the `PROVIDERS` array, restart the service, then revoke the old key after confirming the new one works. +|=== + +== Rate limiting + +The AI service has no built-in rate limiting. Place rate-limit rules in front of the service to prevent a runaway client from consuming LLM provider quota or overloading the database. + +=== nginx + +[source,nginx] +---- +limit_req_zone $http_authorization zone=ai_jwt:10m rate=10r/s; + +server { + location /v1/ { + limit_req zone=ai_jwt burst=20 nodelay; + proxy_pass http://ai-service:8000; + proxy_buffering off; + proxy_read_timeout 300s; + } +} +---- + +=== AWS ALB / WAF + +ALB does not rate limit natively. Use AWS WAF with a rate-based rule keyed on the `Authorization` header. + +=== Cloudflare + +Use Cloudflare Rate Limiting with a custom rule keyed on the `Authorization` header for the AI service hostname. + +For per-tenant rate limiting, key on the `aud` claim by parsing it in the reverse proxy, or gate token issuance per tenant per minute at the token endpoint. + + +== Observability + +=== Health monitoring + +Poll `/health` on each instance to confirm it is running. A healthy instance responds with HTTP 200. + +[source,bash] +---- +curl -f http://ai-service:8000/health +---- + +=== Structured metric logs + +Set the `ENABLE_METRIC_LOGS` environment variable to enable request-level JSON logs to stdout: + +[source,bash] +---- +-e ENABLE_METRIC_LOGS='true' +---- + +When enabled, the service writes a structured JSON entry for each request. Key fields include the request duration, HTTP status code, and outcome status. These entries are suitable for ingestion into any log aggregator that supports JSON parsing. + +=== OpenTelemetry + +[source,bash] +---- +-e LLM_TELEMETRY_ENABLED='true' \ +-e OTEL_EXPORTER_OTLP_TRACES_ENDPOINT='http://otel-collector:4318/v1/traces' \ +-e OTEL_TRACES_SAMPLER_ARG='1.0' \ +-e OTEL_DEBUG='true' +---- + +[cols="1,1,1,3",options="header"] +|=== +|Variable |Required |Default |Description +|`LLM_TELEMETRY_ENABLED` |Yes |`false` |Primary telemetry switch +|`OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` |Yes |- |OpenTelemetry Protocol (OTLP) endpoint URL +|`OTEL_TRACES_SAMPLER_ARG` |No |`1.0` |Sampling rate (0.0 to 1.0) +|`OTEL_DEBUG` |No |- |Verbose OTLP diagnostic logging +|=== + +Compatible with Jaeger, Grafana Tempo, Datadog, New Relic, Honeycomb, and any OTLP-compatible back end. + +=== Langfuse + +Langfuse provides AI-specific observability: token usage, latency per LLM call, prompt quality scores, and cost tracking. + +[source,bash] +---- +-e LANGFUSE_PUBLIC_KEY='pk-lf-...' \ +-e LANGFUSE_SECRET_KEY='sk-lf-...' \ +-e LANGFUSE_BASE_URL='https://cloud.langfuse.com' \ +-e LANGFUSE_DEBUG='true' +---- + +[cols="1,1,1,3",options="header"] +|=== +|Variable |Required |Default |Description +|`LANGFUSE_PUBLIC_KEY` |Yes (if used) |- |Langfuse public key +|`LANGFUSE_SECRET_KEY` |Yes (if used) |- |Langfuse secret key +|`LANGFUSE_BASE_URL` |No |`https://cloud.langfuse.com` |Self-hosted Langfuse URL +|`LANGFUSE_DEBUG` |No |- |Verbose Langfuse logging +|=== + +Langfuse also requires `LLM_TELEMETRY_ENABLED=true` and a valid `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT`. + +OpenTelemetry and Langfuse can run at the same time. The service emits to both without conflict. + +=== Distributed logging + +For production multi-instance deployments, ship container logs to a central aggregator. + +[cols="1,3",options="header"] +|=== +|Platform |Log driver / approach +|AWS |CloudWatch Logs through the `awslogs` driver, or Fluent Bit DaemonSet on EKS +|GCP |Cloud Logging (automatic on GKE), or Fluent Bit +|Azure |Azure Monitor (automatic on Azure Container Apps and AKS) +|Self-hosted (ELK) |Fluent Bit or Filebeat to Elasticsearch {plus} Kibana +|Self-hosted (Loki) |Fluent Bit or Promtail to Grafana Loki +|Fluentd |Use the Docker fluentd log driver +|=== + +.Fluentd log driver example +[source,bash] +---- +docker run ... \ + --log-driver=fluentd \ + --log-opt fluentd-address=localhost:24224 \ + --log-opt tag=ai-service \ + ... +---- + +The metric logs produced by the `ENABLE_METRIC_LOGS` option are already structured JSON and parse cleanly in any aggregator. + +=== Recommended monitoring + +The following checks help catch common issues early: + +* **Health endpoint** -- poll `/health` on each instance; alert if any instance returns a non-200 response for more than 60 seconds. +* **Error rate** -- monitor the HTTP 5xx rate in the metric logs or traces; a sustained increase may indicate an LLM provider outage or a misconfigured environment. +* **Latency** -- track request duration; a sudden increase typically points to LLM provider throttling or network issues. +* **Container restarts** -- alert on repeated container restarts, which may indicate a missing environment variable or a database connectivity problem. + +For troubleshooting specific error patterns, see xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting]. + + + +== Backup and recovery + +=== Database + +The database contains environments, access keys, conversations, messages, and file metadata. Back up the database using standard production practices: + +* *MySQL:* `mysqldump` or managed snapshots (RDS automated backups). +* *PostgreSQL:* `pg_dump` or managed snapshots. + +Enable point-in-time recovery. + +=== File storage + +[cols=",",options="header",] +|=== +|Back end |Backup approach +|`database` |The SQL database stores file blobs; database backups include them. +|`filesystem` |Back up the mounted volume. +|`s3` |Enable versioning on the bucket for point-in-time recovery. +|`azure` |Enable Blob versioning. +|=== + +=== Redis + +Redis holds ephemeral state. Losing Redis data does not affect persistent data. No backup is required. + + + +== Upgrade process + +. Pull the new image: ++ +[source,bash] +---- +docker pull registry.containers.tiny.cloud/ai-service:NEW_VERSION +---- +. For rolling deploys across version boundaries: start *one* instance at the new version and wait for it to become healthy before rolling the rest. +. For Kubernetes: update the image tag in the Deployment. The default `RollingUpdate` strategy handles zero-downtime upgrades, provided the first new pod becomes Ready before the rollout continues. +. Verify `/health` on every replica before declaring the upgrade complete. + +Review the release notes for the target version and take a database backup before upgrading. + +License keys are per-deployment, not per-replica. One key covers any number of replicas of a single deployment. + + + +== Performance characteristics + +[cols="1,1",options="header"] +|=== +|Metric |Typical value + +|Cold start +|Approximately 3 seconds + +|Health check response +|Less than 10 ms + +|Token validation +|Less than 5 ms + +|Time to first token (LLM) +|200 ms to 2 s (depends on provider and model) + +|Memory per instance +|256 to 512 MB + +|Concurrent connections +|1,000{plus} per instance +|=== + +These values are approximate and vary with hardware, provider latency, and prompt complexity. The LLM provider's rate limits are typically the binding constraint before the AI service becomes one. + +== Sizing guide + +[cols=",,,,",options="header",] +|=== +|Users |AI service replicas |Database |Redis |Notes +|1 to 50 |1 |db.t3.small (or 2 vCPU / 4 GB self-managed) |cache.t3.micro |Development and small teams +|50 to 500 |2 |db.r6g.large |cache.r6g.large |Small production +|500 to 5,000 |3 to 5 |db.r6g.xlarge (Multi-AZ) |cache.r6g.xlarge (cluster) |Medium production +|5,000{plus} |5{plus} (Horizontal Pod Autoscaler (HPA)) |db.r6g.2xlarge{plus} |cache.r6g.2xlarge{plus} |Large production; contact Tiny for guidance +|=== + +Starting point for self-managed deployments: + +* AI service instance: 2 vCPU / 4 GB RAM +* Database instance: 2 vCPU / 8 GB RAM +* Redis instance: 1 vCPU / 2 GB RAM + +Scale based on user count, average prompt size, and concurrent streaming connections. The LLM provider's rate limits are usually the binding constraint long before the AI service or database becomes one. diff --git a/modules/ROOT/pages/tinymceai-on-premises-providers.adoc b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc new file mode 100644 index 0000000000..6676c3b7e0 --- /dev/null +++ b/modules/ROOT/pages/tinymceai-on-premises-providers.adoc @@ -0,0 +1,1017 @@ += LLM provider configuration +:navtitle: LLM providers +:description: LLM provider configuration for TinyMCE AI on-premises service +:keywords: AI, on-premises, LLM, OpenAI, Anthropic, Google, Gemini, Azure, Bedrock, Vertex, Ollama, vLLM, openai-compatible + + + + +The `PROVIDERS` environment variable tells the AI service how to reach the upstream large language model (LLM). The `MODELS` environment variable tells the service which models are exposed to clients and which features each model supports. This page is the definitive reference for both: every supported `type`, every required field, and every known issue encountered in production. + +Start with the xref:tinymceai-on-premises-getting-started.adoc[Getting Started guide] if the AI service container is not yet running. The following sections assume a running `ai-service` container. + +== Provider and model architecture + +The AI service uses two related environment variables: + +[cols=",,",options="header",] +|=== +|Variable |Type |What it does +|`PROVIDERS` |JSON object |Map of provider IDs to provider configurations. Each entry says how to authenticate with one upstream LLM API. +|`MODELS` |JSON array |List of models exposed to clients. Each model points at a `PROVIDERS` entry and declares which features it can serve. +|JSON Web Token (JWT) `auth.ai.permissions` |string array |Per-user authorization list. Includes `ai:models::` entries to gate access to individual models. +|=== + +The `PROVIDERS` keys are arbitrary identifiers (for example `"openai"`, `"my-bedrock"`, `"team-azure"`). Each value object has a `type` field that picks the implementation: + +[cols=",",options="header",] +|=== +|`type` |Provider +|`openai` |OpenAI (api.openai.com) +|`anthropic` |Anthropic (api.anthropic.com) +|`google` |Google AI Studio / Gemini direct +|`azure` |Azure OpenAI Service +|`bedrock` |AWS Bedrock +|`vertex` |Google Cloud Vertex AI +|`openai-compatible` |Any OpenAI-compatible HTTP API (Ollama, vLLM, LM Studio, llama.cpp, LocalAI, OpenRouter, Together, Groq, Fireworks, and similar) +|=== + +The *key* (not the `type`) is what gets referenced from: + +* ``MODELS[].provider``; to wire a model to a provider +* JWT `auth.ai.permissions`; for per-provider or per-model access control (see xref:tinymceai-on-premises-jwt.adoc[JWT authentication]) + +[source,json] +---- +{ + "my-openai-key": { "type": "openai", "apiKeys": ["sk-proj-..."] }, + "my-bedrock": { "type": "bedrock", "region": "us-east-1", "credentials": { "accessKeyId": "...", "secretAccessKey": "..." } }, + "my-ollama": { "type": "openai-compatible", "baseUrl": "http://host.docker.internal:11434/v1" } +} +---- + +=== How the pieces fit together + +[.text-center] +image::tinymceai-on-premises/providers-guide-fig-1.svg[alt="PROVIDERS JSON structure mapping provider keys to OpenAI Anthropic Azure Bedrock Vertex and self-hosted endpoints",width=100%] + +The diagram reflects three stacked layers: *how to authenticate* with each upstream (`PROVIDERS`), *what to expose* to clients (`MODELS`), and *who can use which model* (JWT permissions). The same provider key string ties all three together. + + + +[[models-required]] +== Defining the model list + +The on-premises service ships with a built-in default model list that covers *only* OpenAI, Anthropic, and Google direct. For every other provider type (Azure, Bedrock, Vertex, openai-compatible), define `MODELS` explicitly; otherwise nothing usable is exposed. + +When only `PROVIDERS` is configured and `MODELS` is omitted, a `GET /v1/models/1` call returns only the built-in `agent-1` placeholder with `allowed: false`: + +[source,json] +---- +{ + "models": [ + { "id": "agent-1", "allowed": false, "features": [] } + ] +} +---- + +Clients (the TinyMCE editor included) will then fall back to the disabled agent and every AI request will fail with no useful error in the UI. The model list endpoint also only accepts the literal version `1`: + +[source,bash] +---- +# Works +curl http://localhost:8000/v1/models/1 + +# All 500 +curl http://localhost:8000/v1/models/v1 +curl http://localhost:8000/v1/models/v2 +curl http://localhost:8000/v1/models/latest +---- + +*Minimum `MODELS` entry* to make a model usable: + +[source,json] +---- +[ + { + "id": "gpt-4o-mini", + "provider": "my-openai-key", + "description": "OpenAI GPT-4o mini", + "features": ["conversations", "reviews", "actions"] + } +] +---- + +The `features` array must include at least one of `conversations`, `reviews`, or `actions` for the model to be selectable for that feature. A model with no overlapping features is invisible to that part of the UI. + +A full field reference for `MODELS` is at the end of this page. + + + +== OpenAI + +API key from https://platform.openai.com/api-keys[platform.openai.com]. With OpenAI alone, `MODELS` can be omitted; the built-in catalog covers common models. + +.Configuration details +[%collapsible] +==== +*JSON shape:* + +[source,json] +---- +{ + "openai": { + "type": "openai", + "apiKeys": ["sk-proj-YOUR_KEY_HERE"] + } +} +---- + +*Fields:* + +[cols=",,",options="header",] +|=== +|Field |Required |Notes +|`type` |Yes |Literal `"openai"` +|`apiKeys` |Yes |Array of one or more API keys. Multiple keys allow zero-downtime rotation. +|`baseUrl` |No |Override only if proxying through a private OpenAI gateway. +|=== + +*Full `docker run` example:* + +[source,bash] +---- +docker run --init -d -p 8000:8000 \ + --name ai-service \ + -e LICENSE_KEY="$LICENSE_KEY" \ + -e ENVIRONMENTS_MANAGEMENT_SECRET_KEY="$MANAGEMENT_SECRET" \ + -e DATABASE_DRIVER='mysql' \ + -e DATABASE_HOST='mysql' \ + -e DATABASE_USER='root' \ + -e DATABASE_PASSWORD="$DB_PASSWORD" \ + -e DATABASE_DATABASE='ai_service' \ + -e REDIS_HOST='redis' \ + -e STORAGE_DRIVER='database' \ + -e PROVIDERS='{"openai":{"type":"openai","apiKeys":["sk-proj-YOUR_KEY_HERE"]}}' \ + registry.containers.tiny.cloud/ai-service:latest +---- + +*Verify:* + +[source,bash] +---- +curl -s -H "Authorization: Bearer sk-proj-YOUR_KEY_HERE" \ + https://api.openai.com/v1/models | head -20 +---- + +When that `curl` call succeeds from the host, the same key in `PROVIDERS` works inside the container. + +IMPORTANT: *Project-scoped keys* (`sk-proj-...`) only work for models the project has been granted access to. A 404 on `gpt-4o` usually means the key's project is restricted. *Org-level keys* require the org header, which the service does not send; use a project key instead. +==== + + + +== Anthropic + +API key from https://console.anthropic.com/[console.anthropic.com]. Same shape as OpenAI. + +.Configuration details +[%collapsible] +==== +*JSON shape:* + +[source,json] +---- +{ + "anthropic": { + "type": "anthropic", + "apiKeys": ["sk-ant-YOUR_KEY_HERE"] + } +} +---- + +*Fields:* + +[cols=",,",options="header",] +|=== +|Field |Required |Notes +|`type` |Yes |Literal `"anthropic"` +|`apiKeys` |Yes |Array. Rotation behaves the same as OpenAI. +|=== + +*Reasoning models:* + +Claude 4.x models (Sonnet 4, Opus 4) support extended thinking. To surface the reasoning toggle in the TinyMCE UI, add `capabilities.reasoning: true` to the model entry in `MODELS`: + +[source,json] +---- +{ + "id": "claude-sonnet-4-5", + "provider": "anthropic", + "description": "Anthropic Claude Sonnet 4.5 with extended thinking", + "capabilities": { "reasoning": true }, + "features": ["conversations", "reviews", "actions"] +} +---- + +*Minimal example:* + +[source,bash] +---- +-e PROVIDERS='{"anthropic":{"type":"anthropic","apiKeys":["sk-ant-YOUR_KEY_HERE"]}}' +---- + +*Verify:* + +[source,bash] +---- +curl -s https://api.anthropic.com/v1/models \ + -H "x-api-key: sk-ant-YOUR_KEY_HERE" \ + -H "anthropic-version: 2023-06-01" +---- +==== + + + +== Google (Gemini direct) + +Direct Gemini access through https://aistudio.google.com/app/apikey[Google AI Studio]. Distinct from Vertex AI; different keys, different endpoints, different billing path. + +.Configuration details +[%collapsible] +==== +*JSON shape:* + +[source,json] +---- +{ + "google": { + "type": "google", + "apiKeys": ["AIza-YOUR_KEY_HERE"] + } +} +---- + +*Fields:* + +[cols=",,",options="header",] +|=== +|Field |Required |Notes +|`type` |Yes |Literal `"google"` +|`apiKeys` |Yes |Array of `AIza...` keys from AI Studio. +|=== + +*Minimal example:* + +[source,bash] +---- +-e PROVIDERS='{"google":{"type":"google","apiKeys":["AIza-YOUR_KEY_HERE"]}}' +---- + +*Verify:* + +[source,bash] +---- +curl -s "https://generativelanguage.googleapis.com/v1beta/models?key=AIza-YOUR_KEY_HERE" +---- + +IMPORTANT: AI Studio `AIza` keys do *not* work against Vertex endpoints. For Vertex, see the <> section. + +TIP: Free-tier keys are heavily rate limited and return 429 responses under modest load. Move to a paid tier before load testing or production traffic. +==== + + + +== Azure OpenAI + +Azure-hosted OpenAI models. Requires an Azure subscription, an Azure OpenAI resource, and at least one deployment. The ``MODELS[].id`` must match the Azure deployment name exactly. + +.Configuration details +[%collapsible] +==== +*Prerequisites in the Azure portal:* + +[arabic] +. Create an Azure OpenAI resource. Note the *resource name*; this is the subdomain prefix in `https://.openai.azure.com`. +. Apply for model access if required by the region. +. In Azure AI Studio, create a *deployment* for each model to expose. The deployment name is arbitrary (for example `prod-gpt4o`, `cheap-mini`). +. Copy one of the two API keys from *Keys and Endpoint* in the resource overview. + +*JSON shape:* + +[source,json] +---- +{ + "azure": { + "type": "azure", + "resourceName": "YOUR_RESOURCE_NAME", + "apiKeys": ["YOUR_AZURE_KEY_HERE"], + "apiVersion": "2024-10-21" + } +} +---- + +*Fields:* + +[cols=",,",options="header",] +|=== +|Field |Required |Notes +|`type` |Yes |Literal `"azure"` +|`resourceName` |Yes |The `*.openai.azure.com` prefix only, not the full URL. +|`apiKeys` |Yes |Array. Azure issues two keys per resource for zero-downtime key rotation. +|`apiVersion` |Yes |Always set explicitly. Omitting it produces a confusing SDK error about a missing query string parameter. Refer to https://learn.microsoft.com/azure/ai-services/openai/reference[Microsoft's API version matrix] for current stable values. +|=== + +IMPORTANT: The ``MODELS[].id`` value must match the Azure *deployment name* exactly. A mismatch produces a `DeploymentNotFound` error. Use human-readable deployment names because the ID also appears in JWT permission strings and the editor model picker. + +*Two-deployment example:* + +[source,bash] +---- +-e PROVIDERS='{ + "azure": { + "type": "azure", + "resourceName": "tinymce-ai", + "apiKeys": ["YOUR_AZURE_KEY_HERE"], + "apiVersion": "2024-10-21" + } +}' + +-e MODELS='[ + { + "id": "prod-gpt4o", + "provider": "azure", + "name": "GPT-4o (production)", + "description": "Azure deployment of GPT-4o for production traffic", + "recommended": true, + "features": ["conversations", "reviews", "actions"] + }, + { + "id": "cheap-mini", + "provider": "azure", + "name": "GPT-4o mini (low cost)", + "description": "Azure deployment of GPT-4o mini for cheap actions", + "features": ["reviews", "actions"] + } +]' +---- + +*Verify:* + +[source,bash] +---- +curl -s -H "api-key: YOUR_AZURE_KEY_HERE" \ + "https://YOUR_RESOURCE_NAME.openai.azure.com/openai/deployments?api-version=2024-10-21" +---- + +Expect a JSON list that includes the deployment IDs from `MODELS`. + +IMPORTANT: *Wrong region quota:* each Azure region has independent quotas. Bursty workloads may throttle at modest QPS; split traffic across resources in different regions if needed. *Content filter false positives:* Azure's safety filter is stricter than OpenAI direct. Reviews on certain technical content can return `content_filter` errors. Configure custom content filter policies in Azure AI Studio. +==== + + + +== AWS Bedrock + +Amazon's hosted-model marketplace (Anthropic, Meta, Mistral, Cohere, Amazon Titan). Credentials must be inlined; the AWS SDK default credential chain is not used. + +.Configuration details +[%collapsible] +==== +IMPORTANT: The AI service does *not* use the AWS SDK default credential chain. `AWS_PROFILE`, `~/.aws/credentials`, IAM Roles for Service Accounts (IRSA), EC2 instance profiles, ECS task roles, and web identity tokens are all ignored. Inline the credentials in the `PROVIDERS` JSON. + +*JSON shape:* + +[source,json] +---- +{ + "bedrock": { + "type": "bedrock", + "region": "us-east-1", + "credentials": { + "accessKeyId": "AKIA_YOUR_KEY_HERE", + "secretAccessKey": "YOUR_SECRET_HERE", + "sessionToken": "OPTIONAL_FOR_STS" + } + } +} +---- + +The `sessionToken` field is optional but required for STS-issued short-lived credentials. Plan a rotation procedure when using temporary credentials. + +*Prerequisites checklist:* + +[arabic] +. *Enable model access.* Bedrock console -> *Model access* -> Manage model access. Each model must be approved per-region. +. *Subscribe through AWS Marketplace* for non-Amazon models. Anthropic Claude on Bedrock requires a one-time Marketplace subscription. +. *Create an IAM user or role* with the permissions below. +. *Pick a region that has the model.* Not every model is in every region. Check Bedrock console > Model catalog before assuming. + +*Required IAM permissions:* + +[source,json] +---- +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "BedrockInvoke", + "Effect": "Allow", + "Action": [ + "bedrock:InvokeModel", + "bedrock:InvokeModelWithResponseStream", + "bedrock:Converse", + "bedrock:ConverseStream" + ], + "Resource": "*" + } + ] +} +---- + +The service uses both `InvokeModel` and `Converse` paths depending on the model family, so include both. + +*Claude 4.x cross-region inference profiles:* + +Claude 4.x on Bedrock uses *cross-region inference profiles* rather than per-region model IDs. The ``MODELS[].id`` must be the inference profile ID, prefixed with the regional group: + +[cols=",",options="header",] +|=== +|Region group |Example profile ID +|US |`us.anthropic.claude-sonnet-4-5-20250929-v1:0` +|EU |`eu.anthropic.claude-sonnet-4-5-20250929-v1:0` +|APAC |`apac.anthropic.claude-sonnet-4-5-20250929-v1:0` +|=== + +Using the bare model ID (`anthropic.claude-sonnet-4-5-...`) returns `ValidationException`. + +*Minimal example:* + +[source,bash] +---- +-e PROVIDERS='{ + "bedrock": { + "type": "bedrock", + "region": "us-east-1", + "credentials": { + "accessKeyId": "AKIA_YOUR_KEY_HERE", + "secretAccessKey": "YOUR_SECRET_HERE" + } + } +}' + +-e MODELS='[ + { + "id": "us.anthropic.claude-sonnet-4-5-20250929-v1:0", + "provider": "bedrock", + "name": "Claude Sonnet 4.5 (Bedrock)", + "description": "Anthropic Claude Sonnet 4.5 through AWS Bedrock cross-region inference", + "recommended": true, + "capabilities": { "reasoning": true }, + "features": ["conversations", "reviews", "actions"] + } +]' +---- + +*Verify before running the AI service:* + +[source,bash] +---- +aws bedrock list-foundation-models --region us-east-1 \ + --query 'modelSummaries[?contains(modelId,`claude`)].[modelId,modelLifecycle.status]' \ + --output table +---- + +[source,bash] +---- +aws bedrock-runtime converse \ + --region us-east-1 \ + --model-id us.anthropic.claude-sonnet-4-5-20250929-v1:0 \ + --messages '[{"role":"user","content":[{"text":"Say hello in five words."}]}]' +---- + +If either fails, the AI service fails with the same root cause; fix IAM and model access before continuing. + +IMPORTANT: *`AccessDeniedException`* almost always means missing model access in the Bedrock console. *`ThrottlingException`* on the first call indicates low default quotas; request an increase through Service Quotas. *`ValidationException`* means a mismatch between regional model IDs and cross-region inference profiles. *`INVALID_PAYMENT_INSTRUMENT`* indicates a missing AWS Marketplace subscription. +==== + + + +[[google-vertex-ai]] +== Google Vertex AI + +Google's enterprise model surface. Project-scoped, IAM-driven, GCP-billed. Credentials must be inlined (same limitation as Bedrock). + +.Configuration details +[%collapsible] +==== +IMPORTANT: The Vertex adapter ignores Application Default Credentials (ADC), `GOOGLE_APPLICATION_CREDENTIALS`, GKE Workload Identity, and Compute Engine metadata server credentials. Inline either a service-account key or an account-bound API key in the `PROVIDERS` JSON. + +*JSON shape (service account):* + +[source,json] +---- +{ + "vertex": { + "type": "vertex", + "project": "YOUR_GCP_PROJECT_ID", + "location": "us-central1", + "credentials": { + "clientEmail": "ai-service@YOUR_GCP_PROJECT_ID.iam.gserviceaccount.com", + "privateKey": "-----BEGIN PRIVATE KEY-----\nMIIE...\n-----END PRIVATE KEY-----\n" + } + } +} +---- + +*JSON shape (account-bound API key):* + +[source,json] +---- +{ + "vertex": { + "type": "vertex", + "project": "YOUR_GCP_PROJECT_ID", + "location": "us-central1", + "apiKeys": ["YOUR_VERTEX_API_KEY"] + } +} +---- + +*Fields:* + +[cols=",,",options="header",] +|=== +|Field |Required |Notes +|`type` |Yes |Literal `"vertex"` +|`project` |Yes |GCP project ID, not the project number. +|`location` |Yes |Region, for example `us-central1`, `europe-west4`. Must support the model family. +|`credentials.clientEmail` |If using SA |The `client_email` field from the SA JSON key. +|`credentials.privateKey` |If using SA |The `private_key` field from the SA JSON key. See the newline escaping note below. +|`apiKeys` |If using account-bound key |Array of one or more Vertex-bound API keys. +|=== + +*`private_key` newline escaping:* + +When copying the `private_key` from a GCP service account JSON key into a `.env` file and then expanding with `set -a && source .env`, the shell collapses the `\n` sequences and the key becomes invalid (Vertex returns `invalid_grant`). A reliable approach is to build the entire `PROVIDERS` value using a JSON serializer: + +[source,bash] +---- +python3 - <<'EOF' > providers.json +import json +sa = json.load(open("/path/to/service-account.json")) +providers = { + "vertex": { + "type": "vertex", + "project": sa["project_id"], + "location": "us-central1", + "credentials": { + "clientEmail": sa["client_email"], + "privateKey": sa["private_key"] + } + } +} +print(json.dumps(providers)) +EOF + +export PROVIDERS="$(cat providers.json)" +---- + +*Account-bound API keys still require a service account:* + +The Vertex "account-bound API key" is bound to a Google Cloud project AND a service account inside that project. A service account must exist and have `roles/aiplatform.user` granted; the API key only avoids distributing the private key. + +*GCP org policies that block setup:* + +[cols=",",options="header",] +|=== +|Org policy |Blocks +|`iam.disableServiceAccountCreation` |Creating the service account. Symptom: `403 Permission iam.serviceAccounts.create denied`. +|`iam.disableServiceAccountKeyCreation` |Downloading a JSON key. Symptom: *Create key* greyed out, or `FAILED_PRECONDITION`. +|`iam.allow.disabled` |Account-bound API key creation. Symptom: `API_KEY_INVALID`. +|=== + +When a GCP org enforces any of these, the security or cloud team must grant a project-level exception or pre-provision the credentials. + +*Minimal example:* + +[source,bash] +---- +-e PROVIDERS='{ + "vertex": { + "type": "vertex", + "project": "tinymce-ai", + "location": "us-central1", + "credentials": { + "clientEmail": "ai-service@tinymce-ai.iam.gserviceaccount.com", + "privateKey": "-----BEGIN PRIVATE KEY-----\nMIIEv...\n-----END PRIVATE KEY-----\n" + } + } +}' + +-e MODELS='[ + { + "id": "gemini-2.0-flash-001", + "provider": "vertex", + "name": "Gemini 2.0 Flash (Vertex)", + "description": "Google Gemini 2.0 Flash through Vertex AI", + "features": ["conversations", "reviews", "actions"] + } +]' +---- + +The service account needs `roles/aiplatform.user` (or a custom role with `aiplatform.endpoints.predict` and `aiplatform.endpoints.streamGenerateContent`). + +*Verify:* + +[source,bash] +---- +gcloud auth activate-service-account \ + --key-file=/path/to/service-account.json + +gcloud ai models list \ + --region=us-central1 \ + --project=YOUR_GCP_PROJECT_ID +---- + +IMPORTANT: *Region mismatch:* set `location` to a region that hosts the model. *`SERVICE_DISABLED`* means the Vertex AI API is not enabled; run `gcloud services enable aiplatform.googleapis.com`. *Quota:* new projects default to 5 QPS; request an increase before production traffic. +==== + + + +== OpenAI-compatible (Ollama, vLLM, LM Studio, and similar) + +For any HTTP API that implements the OpenAI Chat Completions interface, including self-hosted runtimes and commercial aggregators (OpenRouter, Together, Groq, Fireworks). The `baseUrl` *must* include the `/v1` suffix. + +.Configuration details +[%collapsible] +==== +*JSON shape:* + +[source,json] +---- +{ + "local-llm": { + "type": "openai-compatible", + "baseUrl": "http://host.docker.internal:11434/v1", + "apiKeys": ["optional-bearer-token"], + "headers": { "X-Custom-Header": "value" } + } +} +---- + +*Fields:* + +[cols=",,",options="header",] +|=== +|Field |Required |Notes +|`type` |Yes |Literal `"openai-compatible"` +|`baseUrl` |Yes |*Must include the `/v1` suffix.* Without it, every request fails with a misleading "Not Found" Server-Sent Events (SSE) error. +|`apiKeys` |No |Sent as `Authorization: Bearer `. Most local runtimes ignore it. +|`headers` |No |Additional headers such as auth tokens or tenant IDs. +|=== + +NOTE: File uploads through this adapter are limited to `image/*` MIME types. PDFs and Office documents are not forwarded. To work with non-image files, route through an OpenAI, Anthropic, or Bedrock provider instead. + +*Ollama-specific setup:* + +Ollama listens on `127.0.0.1:11434` by default, which is unreachable from inside a Docker container. Bind to all interfaces: + +[source,bash] +---- +OLLAMA_HOST=0.0.0.0:11434 ollama serve +---- + +On Linux with systemd, create an override file instead: + +[source,ini] +---- +# /etc/systemd/system/ollama.service.d/override.conf +[Service] +Environment="OLLAMA_HOST=0.0.0.0:11434" +---- + +Then reload and restart: `sudo systemctl daemon-reload && sudo systemctl restart ollama`. + +On Linux, add the host gateway so `host.docker.internal` resolves: + +[source,yaml] +---- +services: + ai-service: + image: registry.containers.tiny.cloud/ai-service:latest + extra_hosts: + - "host.docker.internal:host-gateway" +---- + +If Ollama returns "does not support tools", the model was built from a raw GGUF without a chat template. Use `ollama pull` for a Library model that includes a proper Modelfile, or author a custom one. + +.Custom Modelfile example +[%collapsible] +==== +[source] +---- +FROM /path/to/your-model.gguf + +TEMPLATE """{{ if .System }}<|im_start|>system +{{ .System }}<|im_end|> +{{ end }}{{ range .Messages }}<|im_start|>{{ .Role }} +{{ .Content }}<|im_end|> +{{ end }}<|im_start|>assistant +""" + +PARAMETER stop "<|im_end|>" +PARAMETER stop "<|im_start|>" +---- + +The exact template depends on the base model. Check the model card for the recommended chat template. Verify tool support with `ollama show ` before connecting to the AI service. +==== + +The reasoning toggle (`capabilities.reasoning: true`) is cosmetic for Ollama-backed models; the openai-compatible adapter does not translate it to the native Ollama API. + +*Timeout:* + +Large self-hosted models on consumer hardware can exceed the default 180-second timeout. Override with: + +[source,bash] +---- +-e LLM_TIMEOUT_MS='600000' +---- + +*Example -- Ollama:* + +[source,bash] +---- +-e PROVIDERS='{ + "ollama": { + "type": "openai-compatible", + "baseUrl": "http://host.docker.internal:11434/v1" + } +}' + +-e MODELS='[ + { + "id": "qwen3:0.6b", + "provider": "ollama", + "name": "Qwen3 0.6B (local)", + "description": "Local Ollama model for fast actions", + "features": ["conversations", "reviews", "actions"] + } +]' + +-e LLM_TIMEOUT_MS='600000' +---- + +*Example -- vLLM:* + +[source,bash] +---- +-e PROVIDERS='{ + "vllm": { + "type": "openai-compatible", + "baseUrl": "http://vllm-host.internal:8001/v1", + "apiKeys": ["YOUR_VLLM_TOKEN"] + } +}' + +-e MODELS='[ + { + "id": "meta-llama/Llama-3.1-8B-Instruct", + "provider": "vllm", + "name": "Llama 3.1 8B (vLLM)", + "description": "Self-hosted Llama 3.1 8B served through vLLM", + "features": ["conversations", "reviews", "actions"] + } +]' +---- + +*Example -- LM Studio:* + +[source,bash] +---- +-e PROVIDERS='{ + "lmstudio": { + "type": "openai-compatible", + "baseUrl": "http://host.docker.internal:1234/v1" + } +}' + +-e MODELS='[ + { + "id": "lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF", + "provider": "lmstudio", + "name": "Llama 3.1 8B (LM Studio)", + "description": "Local LM Studio runtime", + "features": ["conversations", "actions"] + } +]' +---- + +*Verify:* + +[source,bash] +---- +curl -s http://host.docker.internal:11434/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "qwen3:0.6b", + "messages": [{"role":"user","content":"Say hello in five words."}], + "stream": false + }' +---- + +When the `curl` call returns a chat completion, the AI service can use the same endpoint. +==== + + + +== Multiple providers coexistence + +`PROVIDERS` is a single JSON object that may contain any number of entries. Each entry is independent; the service maintains a separate client pool per provider. There is no limit beyond JSON-in-env-var size constraints (many shells allow 64 KB{plus} or more in a single variable). + +A `MODELS` array routes individual models to specific providers using the `provider` field, which must match a `PROVIDERS` key *exactly* (case-sensitive). + +=== Example: three providers, mixed routing + +.Click to expand: three-provider `PROVIDERS` and `MODELS` example +[%collapsible] +==== +[source,bash] +---- +-e PROVIDERS='{ + "openai": { "type": "openai", "apiKeys": ["sk-proj-YOUR_KEY_HERE"] }, + "bedrock-us": { + "type": "bedrock", + "region": "us-east-1", + "credentials": { + "accessKeyId": "AKIA_YOUR_KEY_HERE", + "secretAccessKey": "YOUR_SECRET_HERE" + } + }, + "ollama": { "type": "openai-compatible", "baseUrl": "http://host.docker.internal:11434/v1" } +}' + +-e MODELS='[ + { + "id": "gpt-4o", + "provider": "openai", + "name": "GPT-4o", + "description": "OpenAI flagship model for conversations", + "recommended": true, + "features": ["conversations"] + }, + { + "id": "us.anthropic.claude-sonnet-4-5-20250929-v1:0", + "provider": "bedrock-us", + "name": "Claude Sonnet 4.5 (Bedrock)", + "description": "Claude on Bedrock for reviews", + "capabilities": { "reasoning": true }, + "features": ["reviews"] + }, + { + "id": "qwen3:0.6b", + "provider": "ollama", + "name": "Qwen3 0.6B (local)", + "description": "Local model for cheap quick actions", + "features": ["actions"] + } +]' +---- +==== + +This wires conversations to OpenAI, reviews to Bedrock-hosted Claude, and quick actions to a local Ollama model. The TinyMCE editor will pick the appropriate provider for each feature based on which models declare which `features`. + +A `MODELS` entry with a `provider` value that does not exist in `PROVIDERS` is silently skipped; that model will not appear in `/v1/models/1`. When a model is missing from the model selector in the rich text editor, check the spelling of its `provider` field. + + + +== `MODELS` field reference + +=== Top-level fields + +[cols=",,,,",options="header",] +|=== +|Field |Required |Type |Default |Notes +|`id` |Yes |string |- |Model identifier sent to the upstream provider. For Azure, this must match the deployment name. For Bedrock, this must match the model ID or inference profile. +|`provider` |Yes |string |- |Must match a key in `PROVIDERS` exactly. +|`description` |Yes |string |- |Shown in model picker tooltips. +|`name` |No |string |value of `id` |Display name in the model picker. +|`recommended` |No |boolean |`false` |If `true`, marks the model as recommended in the picker. Only one model should be flagged per environment. +|`capabilities.webSearch` |No |boolean |`false` |Whether to allow the web search toggle for this model. Requires `WEBSEARCH_ENABLED`. +|`capabilities.reasoning` |No |boolean |`false` |Whether to expose a reasoning/extended-thinking toggle. Supported providers include Anthropic, Bedrock-Claude, and OpenAI o-series. Cosmetic for Ollama (see openai-compatible section). +|`contextLimits` |No |object |see below |Per-model context constraints. +|`features` |Yes |`string[]` |- |Which features the model is eligible for. Must contain at least one usable feature. +|=== + +=== `contextLimits` defaults + +[cols=",,",options="header",] +|=== +|Field |Default |Unit +|`maxContextLength` |`256000` |characters +|`maxFiles` |`100` |count +|`maxFileSize` |`5242880` (Anthropic) / `7340032` (all others) |bytes +|`maxTotalFileSize` |`31457280` |bytes +|`maxTotalPdfFilePages` |`100` |pages +|=== + +Override `contextLimits` only when necessary, for example when a model has a smaller real context window than the default 256K character budget, or when a deployment policy restricts file size. + +=== Available `features` strings + +The full set of feature strings recognized by the service: + +[source,text] +---- +conversations +conversations.titleGeneration +reviews +reviews.correctness +reviews.clarity +reviews.readability +reviews.make-longer +reviews.make-shorter +reviews.make-tone-casual +reviews.make-tone-direct +reviews.make-tone-friendly +reviews.make-tone-confident +reviews.make-tone-professional +reviews.translate +actions +actions.make-longer +actions.make-shorter +actions.continue +actions.make-tone-casual +actions.make-tone-direct +actions.make-tone-friendly +actions.make-tone-confident +actions.make-tone-professional +actions.translate +actions.fix-grammar +actions.improve-writing +---- + +The three umbrella values `conversations`, `reviews`, and `actions` enable the entire family. Use a specific sub-feature only when restricting a model to a subset; for example, a low-cost model that handles only `actions.fix-grammar`. + +A model with no `features` entry, or with only sub-features the editor does not request, will be hidden from the picker. + + + +== Rotating API keys without downtime + +The `apiKeys` field on every provider type that has one is an *array*. The service treats all entries as valid for incoming requests, which allows rotating keys with zero downtime: + +[arabic] +. Append the new key to the array: ++ +[source,json] +---- +"apiKeys": ["sk-proj-OLD_KEY", "sk-proj-NEW_KEY"] +---- +. Restart the AI service container so it picks up the new `PROVIDERS` value. +. Verify the new key works (run a chat completion through the service). +. Revoke the old key in the provider console. +. Remove the old key from `apiKeys`: ++ +[source,json] +---- +"apiKeys": ["sk-proj-NEW_KEY"] +---- +. Restart the AI service container again. + +The same procedure works for `anthropic`, `google`, `azure`, and `openai-compatible` provider types. For Bedrock and Vertex, swap the `credentials` object atomically; there is no array of credential objects, so plan a short maintenance window or run two AI service replicas behind a load balancer for a no-downtime swap. + + + +== Quick troubleshooting index + +[cols=",,",options="header",] +|=== +|Symptom |Most likely cause |Section +|Editor shows "model unavailable" / `agent-1 allowed:false` |`MODELS` not set or every entry skipped |<> +|`GET /v1/models/v1` returns 500 |Wrong compatibility version |<> +|Bedrock returns `NoValidApiKeysFoundError` |Relying on the AWS default credential chain |Bedrock +|Bedrock returns `AccessDeniedException` |Model access not enabled in console |Bedrock prerequisites +|Bedrock returns `ValidationException` on Claude 4 |Bare model ID used instead of cross-region inference profile |Bedrock +|Bedrock returns `INVALID_PAYMENT_INSTRUMENT` |Missing AWS Marketplace subscription for the model family |Bedrock prerequisites +|Vertex returns `invalid_grant` |`private_key` newline escaping mangled |Vertex +|Vertex returns `SERVICE_DISABLED` |aiplatform.googleapis.com not enabled |Vertex +|Azure returns `DeploymentNotFound` |``MODELS[].id`` not equal to deployment name |Azure +|Azure SDK errors about missing API version |`apiVersion` not set |Azure +|Ollama: "Not Found" in SSE |`baseUrl` missing `/v1` |OpenAI-compatible +|Ollama: "does not support tools" |Bare-GGUF Modelfile without chat template |OpenAI-compatible +|Ollama: reasoning toggle has no effect |Not supported through the openai-compatible adapter |OpenAI-compatible +|Self-hosted model times out at 180s |Default `LLM_TIMEOUT_MS` |OpenAI-compatible +|AI request hangs forever in browser |Reverse proxy buffering SSE |See xref:tinymceai-on-premises-production.adoc[Production deployment] for the reverse proxy and TLS section +|=== + +More general troubleshooting (database, JWT, storage, networking) lives in xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting]. + + + +== See also + +* xref:tinymceai-on-premises-getting-started.adoc[Getting started] -- initial container bring-up and demo +* xref:tinymceai-on-premises-jwt.adoc[JWT authentication] -- per-model and per-provider JWT permissions +* xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] -- MySQL/Postgres configuration for the AI service +* xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] -- general troubleshooting beyond provider configuration diff --git a/modules/ROOT/pages/tinymceai-on-premises-reference.adoc b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc new file mode 100644 index 0000000000..7470783f81 --- /dev/null +++ b/modules/ROOT/pages/tinymceai-on-premises-reference.adoc @@ -0,0 +1,188 @@ += TinyMCE AI on-premises reference +:navtitle: Reference +:description: Environment variable, API endpoint, SSE, and error code reference for the TinyMCE AI on-premises service +:keywords: AI, on-premises, reference, API, environment variables, error codes + +== Environment variable reference + +Alphabetized. Required-ness is marked relative to a minimum working deployment. + +[cols=",,,",options="header",] +|=== +|Variable |Required |Default |Description +|`ALLOWED_ORIGINS` |Recommended |- |Comma-separated list of Cross-Origin Resource Sharing (CORS)-allowed editor origins. Required for cross-origin editor deployments. +|`DATABASE_DATABASE` |Yes |- |Database name (`ai_service` is the convention). +|`DATABASE_DRIVER` |Yes |- |`mysql` or `postgres`. +|`DATABASE_HOST` |Yes |- |Database hostname or IP. +|`DATABASE_PASSWORD` |Yes |- |Database password. +|`DATABASE_PORT` |No |3306 (MySQL) / 5432 (PostgreSQL) |Database port. +|`DATABASE_SCHEMA` |PostgreSQL only |`cs-on-premises` |PostgreSQL schema name. Pre-create or set to `public`. +|`DATABASE_SSL_CA` |No |- |Path to CA cert for database Transport Layer Security (TLS). +|`DATABASE_SSL_CERT` |No |- |Path to client cert. +|`DATABASE_SSL_KEY` |No |- |Path to client key. +|`DATABASE_USER` |Yes |- |Database user. +|`ENABLE_METRIC_LOGS` |No |`false` |Emit JSON request logs to stdout. +|`ENVIRONMENTS_MANAGEMENT_SECRET_KEY` |Yes |- |Management Panel login secret. *Not* used to sign user JSON Web Tokens (JWTs). +|`LANGFUSE_BASE_URL` |No |`https://cloud.langfuse.com` |Self-hosted Langfuse URL. +|`LANGFUSE_DEBUG` |No |- |Verbose Langfuse logging. +|`LANGFUSE_PUBLIC_KEY` |If using Langfuse |- |Langfuse public key. +|`LANGFUSE_SECRET_KEY` |If using Langfuse |- |Langfuse secret key. +|`LICENSE_KEY` |Yes |- |AI service license key (long string from Tiny). +|`LLM_TELEMETRY_ENABLED` |No |`false` |Primary OpenTelemetry switch. +|`LLM_TIMEOUT_MS` |No |180000 |Per-request large language model (LLM) timeout in ms. Raise for large self-hosted models. +|`MCP_SERVERS` |No |- |JSON object; Model Context Protocol (MCP) server configuration. See xref:tinymceai-on-premises-advanced.adoc[Advanced scenarios]. +|`MODELS` |Sometimes |- |JSON array; required for Azure / Bedrock / Vertex / openai-compatible. See xref:tinymceai-on-premises-providers.adoc[LLM providers]. +|`OTEL_DEBUG` |No |- |Verbose OpenTelemetry Protocol (OTLP) diagnostic logging. +|`OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` |If using OTEL |- |OTLP traces endpoint URL. +|`OTEL_TRACES_SAMPLER_ARG` |No |`1.0` |OTLP sampling rate (0.0 to 1.0). +|`PROVIDERS` |Yes |- |JSON object; LLM provider configuration. See xref:tinymceai-on-premises-providers.adoc[LLM providers]. +|`REDIS_CLUSTER_NODES` |No |- |Comma-separated `host:port[:password]` for Redis Cluster mode. +|`REDIS_DB` |No |1 |Redis database number. +|`REDIS_HOST` |Yes |- |Redis hostname. +|`REDIS_IP_FAMILY` |No |- |Set to `6` for IPv6. +|`REDIS_PASSWORD` |No |- |Redis password. +|`REDIS_PORT` |No |6379 |Redis port. +|`REDIS_TLS_CA` |No |- |Path to CA cert for Redis TLS. +|`REDIS_TLS_CERT` |No |- |Path to Redis client cert. +|`REDIS_TLS_ENABLE` |No |`false` |Enable Redis TLS. +|`REDIS_TLS_KEY` |No |- |Path to Redis client key. +|`REDIS_USER` |No |- |Redis username (ACL). +|`STORAGE_ACCESS_KEY_ID` |If using S3 |- |S3 access key. +|`STORAGE_ACCOUNT_KEY` |If using Azure Blob |- |Azure storage account key. +|`STORAGE_ACCOUNT_NAME` |If using Azure Blob |- |Azure storage account name. +|`STORAGE_BUCKET` |If using S3 |- |S3 bucket name. +|`STORAGE_CONTAINER` |If using Azure Blob |- |Azure container name. +|`STORAGE_DRIVER` |Yes |- |`database`, `filesystem`, `s3`, or `azure`. +|`STORAGE_ENDPOINT` |No |- |Custom endpoint (S3-compatible or Azure-compatible). +|`STORAGE_LOCATION` |If using filesystem |- |Mount point for filesystem storage. Must be writable by the container user. +|`STORAGE_REGION` |If using S3 |- |S3 region. +|`STORAGE_SECRET_ACCESS_KEY` |If using S3 |- |S3 secret access key. +|`WEBRESOURCES_ENABLED` |No |`false` |Enable web scraping endpoint forwarding. +|`WEBRESOURCES_ENDPOINT` |If web resources enabled |- |Scraper URL. +|`WEBRESOURCES_REQUEST_TIMEOUT` |No |- |Scraper request timeout in ms. +|`WEBSEARCH_ENABLED` |No |`false` |Enable web search forwarding. +|`WEBSEARCH_ENDPOINT` |If web search enabled |- |Search URL. +|`WEBSEARCH_HEADERS` |No |- |JSON object; extra headers sent to the search endpoint. +|`WEBSEARCH_REQUEST_TIMEOUT` |No |- |Search request timeout in ms. +|=== + +== API endpoint reference + +[cols=",,,",options="header",] +|=== +|Method |Path |Auth |Description +|GET |`/health` |None |Liveness probe. Returns `{"serviceName":"on-premises-http","uptime":}`. Not metric-logged. +|GET |`/docs/` |None |ReDoc-rendered API documentation. +|GET |`/v1/api/doc.json` |None |OpenAPI 3 JSON spec. +|GET |`/panel/` |Management secret |Management Panel UI. Sign in with `ENVIRONMENTS_MANAGEMENT_SECRET_KEY`. +|GET |`/v1/models/1` |JWT |List available models for the current token. The compatibility version literal `1` is the only accepted value; `v1`, `v2`, `latest` all return 500. +|POST |`/v1/conversations` |JWT |Create a conversation. Body *must* include client-supplied `id`. +|GET |`/v1/conversations` |JWT |List conversations for the current `sub`. +|GET |`/v1/conversations/\{id}` |JWT |Read one conversation. +|POST |`/v1/conversations/\{id}/messages` |JWT |Send a message. Returns Server-Sent Events (SSE) stream. +|DELETE |`/v1/conversations/\{id}` |JWT |Delete a conversation. +|POST |`/v1/actions/\{actionId}` |JWT |Run a quick action. Body shape: `{"content":[{"type":"text","content":"..."}]}` (no `modelId`). +|POST |`/v1/reviews/\{reviewId}` |JWT |Run a review. +|=== + +NOTE: Environment management (create, read, update, delete) is handled through the Management Panel UI at `/panel/`. + +== Server-Sent Events reference + +The message endpoint returns `Content-Type: text/event-stream`. Events use named types: + +[cols=",,",options="header",] +|=== +|Event |Payload shape |Meaning +|`message-metadata` |`{"messageId":"..."}` |Sent once at the start of each message. +|`text-delta` |`{"textDelta":"..."}` |Incremental text fragment. The editor concatenates these. +|`tool-call` |`{"toolName":"...","arguments":{...}}` |Emitted when the model invokes an MCP tool. +|`tool-result` |`{"toolName":"...","result":{...}}` |Emitted when an MCP tool returns. +|`error` |`{"message":"...","cause":{...}}` |Provider error. HTTP status remains 200; the error is in-stream. +|`done` |`{}` |Sent once at the end of the stream. +|=== + +Healthy stream example: + +[source,text] +---- +event: message-metadata +data: {"messageId":"abc123"} + +event: text-delta +data: {"textDelta":"Hello "} + +event: text-delta +data: {"textDelta":"there!"} + +event: done +data: {} +---- + +Error stream example: + +[source,text] +---- +event: message-metadata +data: {"messageId":"abc123"} + +event: error +data: {"message":"Incorrect API key provided","cause":{"providerStatusCode":401}} +---- + +Browser client parsing notes: + +* Each event is two lines: `event: ` and `data: `, separated from the next event by a blank line. +* `data` is always valid JSON. +* Unknown `event` types carry informational payloads and can be ignored for forward compatibility. +* `text-delta` is the only event that contributes to the visible response body. + +== Error code reference + +Error codes returned in HTTP 4xx responses and inside SSE `event: error` payloads. + +[cols=",,,",options="header",] +|=== +|Code |Origin |Likely cause |Fix +|`invalid-jwt-signature` |JWT verifier |Wrong API Secret, or used `ENVIRONMENTS_MANAGEMENT_SECRET_KEY`, or signed with RS256 |Re-sign with HS256 using the correct API Secret +|`invalid-jwt-payload` |JWT verifier |`aud` does not match a real Environment ID, OR environment created through raw API not Panel UI |Re-copy env ID from `/panel/`, or recreate the env through the Panel UI +|`invalid-jwt` |JWT verifier |Token >60 s past `exp` |Issue tokens with shorter lifetime and refresh sooner +|`Environment not found` |AI runtime |Same as `invalid-jwt-payload` second sub-cause |Recreate env through Panel UI +|`missing-permissions` |Permission checker |`auth.ai.permissions` array does not cover the requested action |Add the missing permission string +|`invalid-request-data` |Input validator |Field validation failed (most commonly the 100,000 char prompt cap) |Fix the request body. See error message +|`environment-not-found` |AI runtime |Same as `Environment not found` |Recreate through Panel UI +|`conversation in use` |Conversation runtime |Stream-abort left stale state |Start a new conversation +|`conversation does not exist` |Conversation runtime |Follow-up to `conversation in use` |Start a new conversation +|`NoValidApiKeysFoundError` |Bedrock / Vertex adapter |Inline credentials missing |Inline `credentials` in `PROVIDERS` +|`AccessDeniedException` |Bedrock |Missing model access or IAM permissions |Enable Bedrock model access; attach the IAM policy from xref:tinymceai-on-premises-providers.adoc[LLM providers] +|`INVALID_PAYMENT_INSTRUMENT` |Bedrock |Anthropic on Bedrock without Marketplace subscription |Subscribe through AWS Marketplace +|`ValidationException` |Bedrock |Wrong model ID format (regional instead of cross-region) |Use the inference profile ID for Claude 4.x +|`DeploymentNotFound` |Azure |``MODELS[].id`` does not match Azure deployment name |Set ``MODELS[].id`` to the exact deployment name +|`invalid_grant` |Vertex |Mangled `private_key` newlines |Build `PROVIDERS` from `json.dumps()` of the SA key +|`SERVICE_DISABLED` |Vertex |`aiplatform.googleapis.com` not enabled |`gcloud services enable aiplatform.googleapis.com` +|`API_KEY_INVALID` |Vertex |Account-bound API key blocked by org policy |Grant policy exception +|`Incorrect API key provided` |OpenAI / Anthropic / Google |Bad API key |Update `PROVIDERS` and `--force-recreate` +|`Wrong license key.` |AI service startup |Truncated or whitespace-padded license key |Re-paste as a single line +|=== + +== Known limits + +[cols="1,1,3",options="header"] +|=== +|Limit |Value |Notes + +|Maximum prompt length |100,000 characters |Hard limit enforced by the service. Requests exceeding this return `invalid-request-data`. Summarize or shorten source content before it exceeds this threshold. +|Conversation create |Client-supplied `id` required |The plugin auto-generates `tiny-`. Raw API callers must supply a unique `id` in the create body. +|Stream-abort recovery |Stop button leaves stale state |The next message returns `409 conversation in use` then `404 conversation does not exist`. Recovery: start a new conversation or reload. +|Built-in rate limiting |None |Front the service with nginx `limit_req` or ALB rate-limit rules. See xref:tinymceai-on-premises-production.adoc#rate-limiting[Rate limiting]. +|File support (OpenAI-compatible providers) |Images only (`image/*`) |PDFs, text, and Office files are not forwarded to OpenAI-compatible providers. Use a non-OpenAI-compatible provider for non-image file attachments. +|MCP tool availability |Conversations only |MCP tools are not available in reviews or quick actions. +|MCP authentication |Single shared token per server |The `headers` field in `MCP_SERVERS` is fixed at deploy time. Per-user authentication is not supported. +|PostgreSQL default schema |`cs-on-premises` (with hyphen) |Pre-create with `CREATE SCHEMA "cs-on-premises";` or set `DATABASE_SCHEMA=public`. +|`/v1/models/\{compatibilityVersion}` |Only accepts `1` |Values such as `v1`, `v2`, or `latest` return 500. +|Environment creation through raw API |Not supported |Always create environments through the Management Panel UI. +|Bedrock credentials |Inline only |The SDK default credential chain (IAM Roles for Service Accounts (IRSA), instance roles, `AWS_PROFILE`) is not used. +|Vertex credentials |Inline only |Application Default Credentials, `GOOGLE_APPLICATION_CREDENTIALS`, and the metadata server are not used. +|Azure `MODELS[].id` |Must equal deployment name |There is no separate `deploymentName` field. The ID is the deployment name. +|OpenAI-compatible `baseUrl` |Must include `/v1` suffix |Omitting it produces a "Not Found" SSE error. +|=== diff --git a/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc b/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc new file mode 100644 index 0000000000..f18f327db5 --- /dev/null +++ b/modules/ROOT/pages/tinymceai-on-premises-troubleshooting.adoc @@ -0,0 +1,310 @@ += Troubleshooting +:navtitle: Troubleshooting +:description: Troubleshooting guide for the TinyMCE AI on-premises service +:keywords: AI, on-premises, troubleshooting, errors, debugging + +Match the symptom to the fix below. If the symptom does not fit any section, escalate to `support@tiny.cloud` with the output of `docker logs ai-service --tail 200` and a redacted copy of the `PROVIDERS` value. + +== Quick triage + +[.text-center] +image::tinymceai-on-premises/troubleshooting-fig-1.svg[alt="Troubleshooting triage decision tree covering container health JWT and LLM connectivity failures",width=100%] + +[cols="1,1",options="header"] +|=== +|Symptom area |Go to +|Container will not start or exits during boot |<> +|Container is running, `/health` returns OK, but API calls fail |<> +|Conversation starts, but the Server-Sent Events (SSE) stream carries an `event: error` |<> +|Editor renders, but AI toolbar is missing, token fetch fails, or responses hang |<> +|Responses are slow or time out |<> +|Scaling, upgrades, or deployment questions |xref:tinymceai-on-premises-production.adoc[Production deployment] +|=== + + +[[container-startup-failures]] +== Container startup failures + +Run `docker logs ai-service` first. All entries below assume the log output is available. + +[cols="2,2,3",options="header"] +|=== +|Error / symptom |Cause |Fix + +|`Wrong license key.` +|Key was truncated, contains a line break, or has surrounding whitespace +|Paste the key as a single unbroken line. Verify the first and last eight characters against the original. + +|`EACCES: permission denied, mkdir '/var/storage'` +|`STORAGE_LOCATION` points to a path the container user cannot write +|Switch to `STORAGE_DRIVER=database`, or mount a writable volume and point `STORAGE_LOCATION` at it (for example `/tmp/ai-storage`). + +|`Not enough permissions to access database.` +|MySQL user lacks required privileges +|Grant the privileges listed in the error. See xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] for the GRANT statement. + +|`schema "cs-on-premises" does not exist` +|Postgres schema not pre-created +|Run `CREATE SCHEMA "cs-on-premises";` (double quotes required), or set `DATABASE_SCHEMA=public`. See xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage]. + +|`[MY-000067] unknown variable 'default-authentication-plugin'` +|`mysql:8` tag now points to MySQL 8.4, which removed that variable +|Pin `mysql:8.0` in the compose file and run `docker compose up -d --force-recreate mysql`. + +|Container exits with no useful log +|Missing required env var, or malformed JSON in `PROVIDERS` / `MODELS` +|Run `docker inspect ai-service {vbar} jq '.[0].Config.Env'` and compare against the xref:tinymceai-on-premises-reference.adoc[environment variable reference]. Validate JSON with `echo "$PROVIDERS" {vbar} jq .` + +|`/health` times out despite successful boot +|Port mapping missing +|Add `-p 8000:8000` to `docker run`, or `ports: ["8000:8000"]` in compose. Confirm with `docker port ai-service`. + +|`getaddrinfo EAI_AGAIN mysql` or `getaddrinfo ENOTFOUND redis` +|AI service is on a different Docker network from the data layer +|Use `docker compose` (shared network), or set `DATABASE_HOST=host.docker.internal`. On Linux, add `extra_hosts: ["host.docker.internal:host-gateway"]` to the AI service. +|=== + + +[[api-and-jwt-authentication]] +== API and JSON Web Token (JWT) authentication + +These assume the container is running and `/health` returns OK. + +[cols="2,3,3",options="header"] +|=== +|Error / symptom |Cause |Fix + +|`invalid-jwt-signature` +|Token signed with the wrong key. Most commonly, signed with `ENVIRONMENTS_MANAGEMENT_SECRET_KEY` instead of the per-access-key *API Secret* +|Re-copy the API Secret from the Management Panel at `/panel/` and re-sign the token. + +|`invalid-jwt-payload` +|`aud` claim does not match a known Environment ID, or `aud` is an array instead of a string +|Copy the Environment ID from `/panel/`. Ensure `aud` is a string, not an array. Recreate environments through the Panel UI only. + +|`invalid-jwt` (expired) +|Token is past its `exp` claim +|Issue tokens with a reasonable lifetime (for example `exp = now {plus} 3600`) and refresh before expiry. Synchronize clocks with Network Time Protocol (NTP). + +|`Environment not found` +|Environment was not created through the Management Panel UI +|Delete and recreate the environment through `/panel/`. Update `AI_ENV_ID` in `.env`. + +|JWT silently rejected +|Token signed with RS256 instead of HS256 +|Re-sign with `algorithm: 'HS256'` and the API Secret. See xref:tinymceai-on-premises-jwt.adoc[JWT authentication]. + +|`allowed: false` on every endpoint +|`auth.ai.permissions` is a string, shorthand, or wrong shape +|Use the explicit array form. See the <> below. + +|`409 conversation in use` then `404 conversation does not exist` +|Stream abort left temporary state blocking the conversation +|Start a new conversation or reload the page. Custom UIs should create a fresh conversation after cancel. +|=== + +[[correct-permissions-shape]] +.Correct permissions shape +[%collapsible] +==== +[source,json] +---- +{ + "auth": { + "ai": { + "permissions": [ + "ai:conversations:*", + "ai:models:agent", + "ai:actions:system:*", + "ai:reviews:system:*" + ] + } + } +} +---- + +Common mistakes that produce `allowed: false`: `"permissions": "ai:admin"` (string shorthand), `"permissions": "*"`, `"useAllFeatures": true`, or a single permission as a string instead of an array. See xref:tinymceai-on-premises-jwt.adoc[JWT authentication] for the full permission catalog. +==== + + +[[llm-provider-errors]] +== Large language model (LLM) provider errors + +These appear as `event: error` inside the SSE stream. The HTTP response is still 200. + +=== Cloud providers (OpenAI, Anthropic, Google) + +[cols="2,3",options="header"] +|=== +|Error |Fix +|`Incorrect API key provided` |Update the key in `PROVIDERS`, then `docker compose up -d --force-recreate ai-service`. +|=== + +=== AWS Bedrock + +[cols="2,3",options="header"] +|=== +|Error |Fix +|`NoValidApiKeysFoundError` |Inline `accessKeyId` and `secretAccessKey` inside `credentials` in `PROVIDERS`. The AWS SDK default credential chain is not used. See xref:tinymceai-on-premises-providers.adoc[LLM providers]. +|`AccessDeniedException` |Enable model access in *Bedrock console -> Model access*. Attach an IAM policy with `bedrock:InvokeModel`, `bedrock:Converse`, and `bedrock:ConverseStream`. +|`INVALID_PAYMENT_INSTRUMENT` |Complete the AWS Marketplace subscription for Anthropic in *Bedrock console -> Model access -> Anthropic*. +|`ValidationException` (model invocation not supported) |Use the region-prefixed inference profile ID (for example `us.anthropic.claude-sonnet-4-...`). See xref:tinymceai-on-premises-providers.adoc[LLM providers]. +|=== + +=== Google Vertex AI + +[cols="2,3",options="header"] +|=== +|Error |Fix +|`NoValidApiKeysFoundError` |Inline `clientEmail` and `privateKey` inside `credentials` in `PROVIDERS`. Google Application Default Credentials (ADC) is not used. See xref:tinymceai-on-premises-providers.adoc[LLM providers]. +|Auth errors with a valid service account |`private_key` newlines were mangled during copy-paste. Build `PROVIDERS` with a script (`json.dumps()` on the SA JSON file) rather than hand-editing. +|`SERVICE_DISABLED` |Run `gcloud services enable aiplatform.googleapis.com --project=`. +|Blocked by GCP org policy |Check `iam.disableServiceAccountCreation`, `iam.disableServiceAccountKeyCreation`, and account-bound API key policies. Exempt the AI service project from all three. +|=== + +=== Azure OpenAI + +[cols="2,3",options="header"] +|=== +|Error |Fix +|Model not found / `DeploymentNotFound` |`MODELS[].id` must match the Azure deployment name exactly. +|API errors with no provider message |Set `apiVersion` explicitly. See https://learn.microsoft.com/azure/ai-services/openai/reference[Microsoft's API version matrix]. +|=== + +=== OpenAI-compatible (Ollama, vLLM, LM Studio) + +[cols="2,3",options="header"] +|=== +|Error |Fix +|"Not Found" in SSE error |`baseUrl` is missing the `/v1` suffix. Ollama default: `\http://host.docker.internal:11434/v1`. +|`ECONNREFUSED` on Linux |Start Ollama with `OLLAMA_HOST=0.0.0.0:11434 ollama serve`. Add `extra_hosts: ["host.docker.internal:host-gateway"]` to the AI service compose entry. +|"does not support tools" |Use an official model (`ollama pull qwen3:0.6b`) rather than a bare GGUF. Custom models need a Modelfile with `TEMPLATE` and tool support. See xref:tinymceai-on-premises-providers.adoc[LLM providers]. +|`createMessage` hangs ~180s then times out |Model is too slow for the default timeout. Set `LLM_TIMEOUT_MS` higher, use a lighter quantization, or use a smaller model. +|=== + + +[[editor-and-front-end]] +== Editor and front end + +Confirm `/health` is OK and a direct `curl` to `/v1/conversations` works before investigating the editor. + +[cols="2,3",options="header"] +|=== +|Symptom |Fix + +|No AI buttons in the toolbar +|Ensure TinyMCE 8{plus} is loaded, `plugins: 'tinymceai'` is set, and the toolbar string includes `tinymceai`. Verify the API key has the AI feature enabled. + +|Token fetch returns 401 +|The token endpoint's own authentication middleware is rejecting the request. Check session cookies, Cross-Origin Resource Sharing (CORS) credentials, and bearer tokens in the browser network tab. + +|Token returned but rejected by the AI service +|See <> above: wrong secret, wrong `aud`, wrong algorithm (RS256 instead of HS256), or wrong permissions shape. + +|AI responses hang in the browser +|The reverse proxy is buffering the SSE stream. Set `proxy_buffering off;` and `proxy_cache off;` in nginx (or the equivalent for the load balancer). + +|CORS error on `/v1/conversations` +|Add the editor's origin (scheme {plus} host {plus} port) to the `ALLOWED_ORIGINS` environment variable. + +|Editor renders then disappears (Next.js / Nuxt / SvelteKit) +|TinyMCE references `window` at load time. Load the editor client-only: `dynamic(() => import('./Editor'), { ssr: false })` in Next.js, `` in Nuxt, `onMount` in SvelteKit. See xref:tinymceai-on-premises-frameworks.adoc[Framework integration]. + +|`tinymceai_token_provider` called in a tight loop +|Token endpoint is returning an invalid JWT or non-JSON response. Test with `curl -X POST http://localhost:3000/api/ai-token` and verify the response is `pass:c[{"token":"eyJ..."}]`. +|=== + + +[[performance]] +== Performance + +[cols="2,3",options="header"] +|=== +|Symptom |Fix +|Self-hosted model is slow through the AI service compared with raw `curl` |Co-locate the inference server with the AI service. Use a smaller or more quantized model. Disable telemetry during development (`LLM_TELEMETRY_ENABLED=false`). +|Containers OOM or MySQL takes 60{plus} seconds to start (Colima) |Default Colima VM is too small. Run `colima stop && colima start --cpu 4 --memory 8 --disk 100`. +|=== + + +== Diagnostic recipes + +.Expand for copy-ready diagnostic commands +[%collapsible] +==== +*Tail logs:* + +[source,bash] +---- +docker logs ai-service --tail 200 -f +---- + +*Liveness check:* + +[source,bash] +---- +curl -fsS http://localhost:8000/health +---- + +*Decode a JWT (inspect payload without verifying):* + +[source,bash] +---- +python3 -c "import jwt,sys,json; print(json.dumps(jwt.decode(sys.argv[1], options={'verify_signature': False}), indent=2))" +---- + +*Recreate after an env change:* + +[source,bash] +---- +docker compose up -d --force-recreate ai-service +---- + +*Inspect effective environment:* + +[source,bash] +---- +docker inspect ai-service | jq '.[0].Config.Env' +---- + +*Validate PROVIDERS JSON:* + +[source,bash] +---- +echo "$PROVIDERS" | jq . +---- + +*Test data layer connectivity from inside the container:* + +[source,bash] +---- +docker compose exec ai-service /bin/sh -c "nc -zv mysql 3306" +docker compose exec ai-service /bin/sh -c "nc -zv redis 6379" +---- + +*End-to-end smoke test (token mint through streamed response):* + +[source,bash] +---- +TOKEN=$(curl -s -X POST http://localhost:3000/api/ai-token | jq -r '.token') + +curl -s -X POST http://localhost:8000/v1/conversations \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"id":"smoke-1","title":"Smoke test"}' + +curl -N -X POST http://localhost:8000/v1/conversations/smoke-1/messages \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"prompt":"Say hi in five words.","model":"agent-1"}' +---- +==== + + +== Related pages + +* xref:tinymceai-on-premises-getting-started.adoc[Getting started] +* xref:tinymceai-on-premises-jwt.adoc[JWT authentication] +* xref:tinymceai-on-premises-providers.adoc[LLM providers] +* xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] +* xref:tinymceai-on-premises-frameworks.adoc[Framework integration] +* xref:tinymceai-on-premises-production.adoc[Production deployment] diff --git a/modules/ROOT/pages/tinymceai-on-premises.adoc b/modules/ROOT/pages/tinymceai-on-premises.adoc new file mode 100644 index 0000000000..ebfae3fdff --- /dev/null +++ b/modules/ROOT/pages/tinymceai-on-premises.adoc @@ -0,0 +1,188 @@ += TinyMCE AI On-Premise +:navtitle: Overview +:description: Self-hosted AI writing assistance for TinyMCE; architecture, prerequisites, and topic guide +:keywords: AI, on-premises, self-hosted, deployment, overview + +The TinyMCE AI on-premises service is a self-hosted back end that powers AI writing assistance inside the TinyMCE rich text editor. It runs entirely within the host infrastructure. Editor content, conversation history, file attachments, and user data never leave the network. + +The service ships as a single Open Container Initiative (OCI) container image (`registry.containers.tiny.cloud/ai-service`). It exposes a REST API, a Management Panel, Server-Sent Events streaming, and an OpenAPI spec. + +== Architecture + +[.text-center] +image::tinymceai-on-premises/complete-guide-fig-1.svg[alt="Service architecture showing browser with TinyMCE token endpoint AI service database Redis and LLM providers",width=100%] + +Data flow for a single AI request: + +[arabic] +. The browser loads TinyMCE with the `tinymceai` plugin. +. The user triggers an AI feature (chat, review, quick action). +. The plugin calls the token endpoint, which signs an HS256 JSON Web Token (JWT) with the API Secret. +. The plugin sends the JWT and prompt to the AI service. +. The AI service verifies the token, checks per-feature permissions, and forwards the prompt to the configured large language model (LLM). +. The LLM streams its response back to the browser through Server-Sent Events. + +The shared secret (API Secret) never leaves the back end; the editor and the AI service only ever see signed tokens. + +== Capabilities + +[cols="1,2",options="header"] +|=== +|Capability |Details + +|Conversational AI assistant +|Multi-turn chat sidebar. Conversation history is isolated per user through the JWT `sub` claim. + +|Document review +|Correctness, clarity, readability, tone, and translation. + +|Quick actions +|Rewrite, summarize, expand, change tone, fix grammar, translate, continue, and improve writing. + +|LLM provider flexibility +|OpenAI, Anthropic, Google Gemini, Azure OpenAI, AWS Bedrock, Google Vertex AI, or any self-hosted OpenAI-compatible endpoint. Multiple providers can coexist. + +|Model Context Protocol (MCP) integration +|Connect internal tools, databases, and knowledge bases through Model Context Protocol over Streamable HTTP transport. + +|Web scraping and web search +|Pluggable endpoints for fetching web pages and running searches. + +|Multi-tenant environments +|Isolated conversation history and per-tenant access keys through Environments. + +|Per-user, per-feature permissions +|Fine-grained control through the `auth.ai.permissions` JWT claim. + +|Streaming responses +|Server-Sent Events from the LLM back to the browser. + +|File attachments +|Database, filesystem, Amazon S3, or Azure Blob Storage. + +|Observability +|Structured request logs, OpenTelemetry, and Langfuse. All three run as independent simultaneous pipelines. + +|Horizontal scaling +|The service is stateless. Share identical environment configuration across replicas. +|=== + +== Prerequisites + +[cols="1,3",options="header"] +|=== +|Requirement |Details + +|Container runtime +|Docker 20.10{plus}, Podman 4{plus}, or any OCI-compatible runtime. Kubernetes, AWS ECS, or Azure Container Apps are also supported. + +|SQL database +|MySQL 8.0 or PostgreSQL 13{plus} (16 recommended). + +|Redis +|3.2.6{plus} (7.x recommended). Single node, Sentinel, or Cluster mode. + +|LLM access +|At least one provider. Multiple providers can coexist. + +|TinyMCE 8.0{plus} +|The `tinymceai` plugin is a premium plugin. + +|License key and registry credentials +|Provided by a Tiny account representative. + +|Token endpoint +|A back end that signs HS256 JWTs. + +|Reverse proxy +|The AI service does not terminate Transport Layer Security (TLS). Use nginx, HAProxy, or a cloud load balancer. +|=== + +== Choosing a setup path + +[.text-center] +image::tinymceai-on-premises/complete-guide-fig-2.svg[Setup path decision tree,width=100%] + +Each path carries the same level of documentation. After identifying which path fits the operational model, complete its topic guides in the order listed. + +== Topic guides + +For a first-time deployment, progress through the guides in order. Each topic guide also stands alone when only one area applies. + +[cols="1,3",options="header"] +|=== +|Guide |Scope + +|xref:tinymceai-on-premises-getting-started.adoc[Getting started] +|Five-minute Docker Compose quick start. Stand up the AI service, database, Redis, token server, and a browser editor. + +|xref:tinymceai-on-premises-database.adoc[Database, Redis, and storage] +|MySQL and PostgreSQL setup, Redis configuration, container runtimes (Docker, Podman, Kubernetes, ECS), and reverse proxy with TLS. + +|xref:tinymceai-on-premises-providers.adoc[LLM providers] +|OpenAI, Anthropic, Google Gemini, Azure OpenAI, AWS Bedrock, Google Vertex AI, and self-hosted endpoints (Ollama, vLLM, LM Studio). Custom model catalog and API key rotation. + +|xref:tinymceai-on-premises-jwt.adoc[JWT authentication] +|HS256 signing model, required and optional claims, permissions reference, and token endpoint examples in 8 languages. + +|xref:tinymceai-on-premises-frameworks.adoc[Framework integration] +|Editor-side configuration: plugin options, token provider, authentication patterns, Cross-Origin Resource Sharing (CORS), and deployment checklists. + +|xref:tinymceai-on-premises-production.adoc[Production deployment] +|Kubernetes manifests, AWS ECS task definitions, horizontal scaling, sizing, security hardening, rate limiting, observability, backup and recovery, and upgrades. + +|xref:tinymceai-on-premises-advanced.adoc[Advanced scenarios] +|MCP server integration, web scraping and search, multi-tenant patterns, custom models with guardrails, and AI-powered document pipelines. + +|xref:tinymceai-on-premises-troubleshooting.adoc[Troubleshooting] +|Quick triage, container startup failures, JWT errors, LLM provider errors, editor issues, performance, and diagnostic recipes. + +|xref:tinymceai-on-premises-reference.adoc[Reference] +|Environment variable reference, API endpoint reference, Server-Sent Events reference, and error code reference. +|=== + +== Support + +* *Technical support:* https://support.tiny.cloud[Submit a support request] (available to customers with an active commercial license). +* *Account and licensing:* https://www.tiny.cloud/contact/[Contact Tiny]. + +When submitting a support request, include: + +Container logs:: ++ +[source,console] +---- +docker logs ai-service --tail 200 +---- + +Effective environment:: +Redact secrets before submitting. ++ +[source,console] +---- +docker inspect ai-service | jq '.[0].Config.Env' +---- + +Health check:: ++ +[source,console] +---- +curl -fsS http://localhost:8000/health +---- ++ +Expected response: ++ +[source,json] +---- +{"serviceName":"on-premises-http","uptime":1234} +---- + +Decoded JWT payload:: +Strip the signature and decode with a JWT library. + +Image version:: ++ +[source,console] +---- +docker inspect ai-service | jq '.[0].Config.Image' +----