diff --git a/.agents/skills/add-enrichment/SKILL.md b/.agents/skills/add-enrichment/SKILL.md
new file mode 100644
index 00000000000..f963ce14517
--- /dev/null
+++ b/.agents/skills/add-enrichment/SKILL.md
@@ -0,0 +1,142 @@
+---
+name: add-enrichment
+description: Add a code-defined table enrichment (registry entry) under `apps/sim/enrichments/` backed by an ordered provider cascade, ensuring every provider tool it calls has hosted-key support. Use when adding a per-row table enrichment that fills cells via existing Sim tools.
+---
+
+# Adding a Table Enrichment
+
+Enrichments are code-defined entries in `apps/sim/enrichments/` that run **directly per table row** (no workflow). Each enrichment declares inputs, outputs, and an ordered list of **providers**; the cascade runner tries providers in order and the first non-empty result fills the cell. Each provider calls one existing Sim tool via `executeTool`, which injects the workspace's BYOK key or a **hosted key** and bills usage automatically.
+
+Because enrichments run on Sim's hosted keys by default, **every provider tool you reference must have hosted-key support** — otherwise it can only run when the workspace brings its own key. This command makes that check a required step.
+
+## Overview
+
+| Step | What | Where |
+|------|------|-------|
+| 1 | Pick the data-source tool(s) for each output | `tools/{service}/` + `tools/registry.ts` |
+| 2 | **Verify each tool has `hosting`; if not, run `/add-hosted-key`** | `tools/{service}/{action}.ts` |
+| 3 | Write the enrichment definition | `enrichments/{name}/{name}.ts` + `index.ts` |
+| 4 | Register it | `enrichments/registry.ts` |
+| 5 | Verify | tsc / biome / manual run |
+
+## Architecture (what you're plugging into)
+
+- **`enrichments/types.ts`** — `EnrichmentConfig { id, name, description, icon, inputs, outputs, providers }` and `EnrichmentProvider { id, label, toolId, buildParams, mapOutput }`. Providers are **plain data** (no `@/tools` import) so the catalog stays client-safe.
+- **`enrichments/providers.ts`** — `toolProvider(...)` (typed passthrough) plus shared input helpers: `str(v)`, `normalizeDomain(v)`, `firstNonEmpty(arr)`, `splitName(fullName)`.
+- **`enrichments/run.ts`** — the server-only cascade runner. Calls `executeTool(provider.toolId, { ...params, _context: { workspaceId } })`, accumulates hosted-key cost, returns the first non-empty mapped result. **You do not edit this** — it works for any registry entry.
+- **`enrichments/registry.ts`** — `ENRICHMENT_REGISTRY` / `ALL_ENRICHMENTS` / `getEnrichment`. Register new entries here.
+
+Outputs automatically become table columns; billing, the catalog/sidebar UI, the column meta-header icon, and per-row execution all work with no extra wiring.
+
+## Step 1: Pick the data-source tool(s)
+
+For each output the enrichment produces, decide which existing tool provides it. Look up the service's API and the tool in `apps/sim/tools/{service}/` (e.g. `hunter_email_finder`, `pdl_person_enrich`, `pdl_company_enrich`). Confirm:
+
+- The tool id is registered in `apps/sim/tools/registry.ts`.
+- Its `params` accept what you can derive from table columns (read the tool's `params`).
+- Its `outputs` / `transformResponse` actually expose the field you need (read the real output shape — don't assume).
+
+Order providers **cheapest / most-likely-to-hit first**; the cascade stops at the first non-empty result. Apollo / LinkedIn are not hosted-safe (ToS) — don't use them.
+
+## Step 2: Verify hosted-key support — chain to `/add-hosted-key` if missing
+
+**This is the required gate.** For every tool a provider calls, open `apps/sim/tools/{service}/{action}.ts` and check for a `hosting` block:
+
+```typescript
+hosting: {
+ envKeyPrefix: 'SERVICE_API_KEY',
+ apiKeyParam: 'apiKey',
+ byokProviderId: 'service',
+ pricing: { /* ... */ },
+ rateLimit: { /* ... */ },
+}
+```
+
+- **If `hosting` is present** — good. Note the `envKeyPrefix`; the deployment needs `{PREFIX}_COUNT` + `{PREFIX}_1..N` env vars set for the hosted key to actually resolve at runtime (ops concern, not code). If those env vars aren't set in the target environment, the provider will only run with a workspace BYOK key.
+- **If `hosting` is absent** — the tool can't use a Sim-provided key, so the enrichment would silently produce blank cells on hosted Sim. **Stop and run `/add-hosted-key `** to add hosted-key support to that tool first, then come back. Do this for every provider tool that lacks it.
+
+Why it matters: the cascade runner only bills (and only reads `output.cost.total`) when `executeTool` injected a hosted key, which requires the tool's `hosting` config. No `hosting` → no hosted key → the enrichment depends entirely on per-workspace BYOK.
+
+## Step 3: Write the enrichment definition
+
+Create `apps/sim/enrichments/{name}/{name}.ts` and a barrel `index.ts`. Mirror the existing entries (`work-email`, `phone-number`, `company-domain`, `company-info`).
+
+```typescript
+import { SomeIcon } from 'lucide-react'
+import { filterUndefined } from '@sim/utils/object'
+import { normalizeDomain, splitName, str, toolProvider } from '@/enrichments/providers'
+import type { EnrichmentConfig } from '@/enrichments/types'
+
+export const myEnrichment: EnrichmentConfig = {
+ id: 'my-enrichment',
+ name: 'My Enrichment',
+ description: 'One concise sentence describing what it finds.',
+ icon: SomeIcon,
+ inputs: [
+ // Person enrichments take a single canonical `fullName` (Clay-style);
+ // split it with splitName() for tools that need first/last.
+ { id: 'fullName', name: 'Full name', type: 'string', required: true },
+ { id: 'companyDomain', name: 'Company domain', type: 'string' },
+ ],
+ outputs: [{ id: 'value', name: 'value', type: 'string' }],
+ providers: [
+ toolProvider({
+ id: 'provider-a',
+ label: 'Provider A',
+ toolId: 'service_action', // must have `hosting` (Step 2)
+ buildParams: (inputs) => {
+ // Return null when there aren't enough inputs → cascade skips this provider.
+ const name = splitName(inputs.fullName)
+ const domain = normalizeDomain(inputs.companyDomain)
+ if (!name || !domain) return null
+ return { domain, first_name: name.firstName, last_name: name.lastName }
+ },
+ mapOutput: (output) => {
+ // Return { [outputId]: value } on a hit, or null to fall through.
+ const value = str(output.value)
+ return value ? { value } : null
+ },
+ }),
+ // ...additional fallback providers, in priority order.
+ ],
+}
+```
+
+```typescript
+// apps/sim/enrichments/{name}/index.ts
+export { myEnrichment } from './my-enrichment'
+```
+
+Rules:
+- Keep the file **client-safe**: import only `lucide-react`, `@sim/utils/*`, `@/enrichments/providers`, and the types. **Never import `@/tools`** here — the runner does the tool call.
+- `buildParams` returns `null` when inputs are insufficient (provider skipped). `mapOutput` returns `null`/empty for a miss (falls through). Use `filterUndefined` when assembling optional tool params; coerce numbers explicitly (don't pass `''` to number outputs).
+- Output `id`s are the keys `mapOutput` returns; output `name`s are the default column names (the user can rename them in the config).
+
+## Step 4: Register it
+
+In `apps/sim/enrichments/registry.ts`, import and add the entry (catalog order is registration order):
+
+```typescript
+import { myEnrichment } from '@/enrichments/my-enrichment'
+
+export const ENRICHMENT_REGISTRY: EnrichmentRegistry = {
+ // ...existing
+ [myEnrichment.id]: myEnrichment,
+}
+```
+
+## Step 5: Verify
+
+1. `bunx tsc --noEmit` (from `apps/sim`, `NODE_OPTIONS=--max-old-space-size=8192`) and `bunx biome check` on the changed files.
+2. In a table → **+ New column → Enrichments** → pick the new enrichment, map its inputs to columns, name the output column(s), Save. Confirm it appears in the catalog with its icon/description.
+3. With hosted keys (or a workspace BYOK key) configured for each provider's service, run a row and confirm the cell fills; the dev-server log shows `Enrichment hit { provider }`. A row whose providers all miss completes blank; a row where every provider errored shows an error cell.
+
+## Checklist
+
+- [ ] Each output mapped to a real tool field (verified against the tool's `params`/`outputs`)
+- [ ] **Every provider tool has a `hosting` block — ran `/add-hosted-key` for any that didn't**
+- [ ] Providers ordered cheapest / most-likely-first; Apollo/LinkedIn not used
+- [ ] Enrichment file is client-safe (no `@/tools` import); uses `toolProvider` + shared helpers
+- [ ] `buildParams` returns `null` on insufficient inputs; `mapOutput` returns `null` on a miss
+- [ ] Registered in `enrichments/registry.ts`
+- [ ] tsc + biome clean; created and ran the column end-to-end
diff --git a/.agents/skills/add-enrichment/agents/openai.yaml b/.agents/skills/add-enrichment/agents/openai.yaml
new file mode 100644
index 00000000000..f19413c0a44
--- /dev/null
+++ b/.agents/skills/add-enrichment/agents/openai.yaml
@@ -0,0 +1,5 @@
+interface:
+ display_name: "Add Enrichment"
+ short_description: "Build a table enrichment cascade"
+ brand_color: "#16A34A"
+ default_prompt: "Use $add-enrichment to add a code-defined Sim table enrichment backed by a provider cascade."
diff --git a/.agents/skills/add-hosted-key/SKILL.md b/.agents/skills/add-hosted-key/SKILL.md
new file mode 100644
index 00000000000..2cb257c2060
--- /dev/null
+++ b/.agents/skills/add-hosted-key/SKILL.md
@@ -0,0 +1,296 @@
+---
+name: add-hosted-key
+description: Add hosted API key support to a tool so Sim provides the key (metered and billed to the workspace) when a user has not brought their own. Use when adding a `hosting` config to a tool under `apps/sim/tools/{service}/`.
+---
+
+# Adding Hosted Key Support to a Tool
+
+When a tool has hosted key support, Sim provides its own API key if the user hasn't configured one (via BYOK or env var). Usage is metered and billed to the workspace.
+
+## Overview
+
+| Step | What | Where |
+|------|------|-------|
+| 1 | Register BYOK provider ID | `tools/types.ts`, `app/api/workspaces/[id]/byok-keys/route.ts` |
+| 2 | Research the API's pricing and rate limits | API docs / pricing page (before writing any code) |
+| 3 | Add `hosting` config to the tool | `tools/{service}/{action}.ts` |
+| 4 | Hide API key field when hosted | `blocks/blocks/{service}.ts` |
+| 5 | Add to BYOK settings UI | BYOK settings component (`byok.tsx`) |
+| 6 | Summarize pricing and throttling comparison | Output to user (after all code changes) |
+
+## Step 1: Register the BYOK Provider ID
+
+Add the new provider to the `BYOKProviderId` union in `tools/types.ts`:
+
+```typescript
+export type BYOKProviderId =
+ | 'openai'
+ | 'anthropic'
+ // ...existing providers
+ | 'your_service'
+```
+
+Then add it to `VALID_PROVIDERS` in `app/api/workspaces/[id]/byok-keys/route.ts`:
+
+```typescript
+const VALID_PROVIDERS = ['openai', 'anthropic', 'google', 'mistral', 'your_service'] as const
+```
+
+## Step 2: Research the API's Pricing Model and Rate Limits
+
+**Before writing any `getCost` or `rateLimit` code**, look up the service's official documentation for both pricing and rate limits. You need to understand:
+
+### Pricing
+
+1. **How the API charges** — per request, per credit, per token, per step, per minute, etc.
+2. **Whether the API reports cost in its response** — look for fields like `creditsUsed`, `costDollars`, `tokensUsed`, or similar in the response body or headers
+3. **Whether cost varies by endpoint/options** — some APIs charge more for certain features (e.g., Firecrawl charges 1 credit/page base but +4 for JSON format, +4 for enhanced mode)
+4. **The dollar-per-unit rate** — what each credit/token/unit costs in dollars on our plan
+
+### Rate Limits
+
+1. **What rate limits the API enforces** — requests per minute/second, tokens per minute, concurrent requests, etc.
+2. **Whether limits vary by plan tier** — free vs paid vs enterprise often have different ceilings
+3. **Whether limits are per-key or per-account** — determines whether adding more hosted keys actually increases total throughput
+4. **What the API returns when rate limited** — HTTP 429, `Retry-After` header, error body format, etc.
+5. **Whether there are multiple dimensions** — some APIs limit both requests/min AND tokens/min independently
+
+Search the API's docs/pricing page (use WebSearch/WebFetch). Capture the pricing model as a comment in `getCost` so future maintainers know the source of truth.
+
+### Setting Our Rate Limits
+
+Our rate limiter (`lib/core/rate-limiter/hosted-key/`) uses a token-bucket algorithm applied **per billing actor** (workspace). It supports two modes:
+
+- **`per_request`** — simple; just `requestsPerMinute`. Good when the API charges flat per-request or cost doesn't vary much.
+- **`custom`** — `requestsPerMinute` plus additional `dimensions` (e.g., `tokens`, `search_units`). Each dimension has its own `limitPerMinute` and an `extractUsage` function that reads actual usage from the response. Use when the API charges on a variable metric (tokens, credits) and you want to cap that metric too.
+
+When choosing values for `requestsPerMinute` and any dimension limits:
+
+- **Stay well below the API's per-key limit** — our keys are shared across all workspaces. If the API allows 60 RPM per key and we have 3 keys, the global ceiling is ~180 RPM. Set the per-workspace limit low enough (e.g., 20-60 RPM) that many workspaces can coexist without collectively hitting the API's ceiling.
+- **Account for key pooling** — our round-robin distributes requests across `N` hosted keys, so the effective API-side rate per key is `(total requests) / N`. But per-workspace limits are enforced *before* key selection, so they apply regardless of key count.
+- **Prefer conservative defaults** — it's easy to raise limits later but hard to claw back after users depend on high throughput.
+
+## Step 3: Add `hosting` Config to the Tool
+
+Add a `hosting` object to the tool's `ToolConfig`. This tells the execution layer how to acquire hosted keys, calculate cost, and rate-limit.
+
+```typescript
+hosting: {
+ envKeyPrefix: 'YOUR_SERVICE_API_KEY',
+ apiKeyParam: 'apiKey',
+ byokProviderId: 'your_service',
+ pricing: {
+ type: 'custom',
+ getCost: (_params, output) => {
+ if (output.creditsUsed == null) {
+ throw new Error('Response missing creditsUsed field')
+ }
+ const creditsUsed = output.creditsUsed as number
+ const cost = creditsUsed * 0.001 // dollars per credit
+ return { cost, metadata: { creditsUsed } }
+ },
+ },
+ rateLimit: {
+ mode: 'per_request',
+ requestsPerMinute: 100,
+ },
+},
+```
+
+### Hosted Key Env Var Convention
+
+Keys use a numbered naming pattern driven by a count env var:
+
+```
+YOUR_SERVICE_API_KEY_COUNT=3
+YOUR_SERVICE_API_KEY_1=sk-...
+YOUR_SERVICE_API_KEY_2=sk-...
+YOUR_SERVICE_API_KEY_3=sk-...
+```
+
+The `envKeyPrefix` value (`YOUR_SERVICE_API_KEY`) determines which env vars are read at runtime. Adding more keys only requires bumping the count and adding the new env var.
+
+### Pricing: Prefer API-Reported Cost
+
+Always prefer using cost data returned by the API (e.g., `creditsUsed`, `costDollars`). This is the most accurate because it accounts for variable pricing tiers, feature modifiers, and plan-level discounts.
+
+**When the API reports cost** — use it directly and throw if missing:
+
+```typescript
+pricing: {
+ type: 'custom',
+ getCost: (params, output) => {
+ if (output.creditsUsed == null) {
+ throw new Error('Response missing creditsUsed field')
+ }
+ // $0.001 per credit — from https://example.com/pricing
+ const cost = (output.creditsUsed as number) * 0.001
+ return { cost, metadata: { creditsUsed: output.creditsUsed } }
+ },
+},
+```
+
+**When the API does NOT report cost** — compute it from params/output based on the pricing docs, but still validate the data you depend on:
+
+```typescript
+pricing: {
+ type: 'custom',
+ getCost: (params, output) => {
+ if (!Array.isArray(output.searchResults)) {
+ throw new Error('Response missing searchResults, cannot determine cost')
+ }
+ // Serper: 1 credit for <=10 results, 2 credits for >10 — from https://serper.dev/pricing
+ const credits = Number(params.num) > 10 ? 2 : 1
+ return { cost: credits * 0.001, metadata: { credits } }
+ },
+},
+```
+
+**`getCost` must always throw** if it cannot determine cost. Never silently fall back to a default — this would hide billing inaccuracies.
+
+### Capturing Cost Data from the API
+
+If the API returns cost info, capture it in `transformResponse` so `getCost` can read it from the output:
+
+```typescript
+transformResponse: async (response: Response) => {
+ const data = await response.json()
+ return {
+ success: true,
+ output: {
+ results: data.results,
+ creditsUsed: data.creditsUsed, // pass through for getCost
+ },
+ }
+},
+```
+
+For async/polling tools, capture it in `postProcess` when the job completes:
+
+```typescript
+if (jobData.status === 'completed') {
+ result.output = {
+ data: jobData.data,
+ creditsUsed: jobData.creditsUsed,
+ }
+}
+```
+
+## Step 4: Hide the API Key Field When Hosted
+
+In the block config (`blocks/blocks/{service}.ts`), add `hideWhenHosted: true` to the API key subblock. This hides the field on hosted Sim since the platform provides the key:
+
+```typescript
+{
+ id: 'apiKey',
+ title: 'API Key',
+ type: 'short-input',
+ placeholder: 'Enter your API key',
+ password: true,
+ required: true,
+ hideWhenHosted: true,
+},
+```
+
+The visibility is controlled by `isSubBlockHidden()` in `lib/workflows/subblocks/visibility.ts`, which checks both the `isHosted` feature flag (`hideWhenHosted`) and optional env var conditions (`hideWhenEnvSet`).
+
+### Excluding Specific Operations from Hosted Key Support
+
+When a block has multiple operations but some operations should **not** use a hosted key (e.g., the underlying API is deprecated, unsupported, or too expensive), use the **duplicate apiKey subblock** pattern. This is the same pattern Exa uses for its `research` operation:
+
+1. **Remove the `hosting` config** from the tool definition for that operation — it must not have a `hosting` object at all.
+2. **Duplicate the `apiKey` subblock** in the block config with opposing conditions:
+
+```typescript
+// API Key — hidden when hosted for operations with hosted key support
+{
+ id: 'apiKey',
+ title: 'API Key',
+ type: 'short-input',
+ placeholder: 'Enter your API key',
+ password: true,
+ required: true,
+ hideWhenHosted: true,
+ condition: { field: 'operation', value: 'unsupported_op', not: true },
+},
+// API Key — always visible for unsupported_op (no hosted key support)
+{
+ id: 'apiKey',
+ title: 'API Key',
+ type: 'short-input',
+ placeholder: 'Enter your API key',
+ password: true,
+ required: true,
+ condition: { field: 'operation', value: 'unsupported_op' },
+},
+```
+
+Both subblocks share the same `id: 'apiKey'`, so the same value flows to the tool. The conditions ensure only one is visible at a time. The first has `hideWhenHosted: true` and shows for all hosted operations; the second has no `hideWhenHosted` and shows only for the excluded operation — meaning users must always provide their own key for that operation.
+
+To exclude multiple operations, use an array: `{ field: 'operation', value: ['op_a', 'op_b'] }`.
+
+**Reference implementations:**
+- **Exa** (`blocks/blocks/exa.ts`): `research` operation excluded from hosting — lines 309-329
+- **Google Maps** (`blocks/blocks/google_maps.ts`): `speed_limits` operation excluded from hosting (deprecated Roads API)
+
+## Step 5: Add to the BYOK Settings UI
+
+Add an entry to the `PROVIDERS` array in the BYOK settings component so users can bring their own key. You need the service icon from `components/icons.tsx`:
+
+```typescript
+{
+ id: 'your_service',
+ name: 'Your Service',
+ icon: YourServiceIcon,
+ description: 'What this service does',
+ placeholder: 'Enter your API key',
+},
+```
+
+## Step 6: Summarize Pricing and Throttling Comparison
+
+After all code changes are complete, output a detailed summary to the user covering:
+
+### What to include
+
+1. **API's pricing model** — how the service charges (per token, per credit, per request, etc.), the specific rates found in docs, and whether the API reports cost in responses.
+2. **Our `getCost` approach** — how we calculate cost, what fields we depend on, and any assumptions or estimates (especially when the API doesn't report exact dollar cost).
+3. **API's rate limits** — the documented limits (RPM, TPM, concurrent, etc.), which plan tier they apply to, and whether they're per-key or per-account.
+4. **Our `rateLimit` config** — what we set for `requestsPerMinute` (and dimensions if custom mode), why we chose those values, and how they compare to the API's limits.
+5. **Key pooling impact** — how many hosted keys we expect, and how round-robin distribution affects the effective per-key rate at the API.
+6. **Gaps or risks** — anything the API charges for that we don't meter, rate limit dimensions we chose not to enforce, or pricing that may be inaccurate due to variable model/tier costs.
+
+### Format
+
+Present this as a structured summary with clear headings. Example:
+
+```
+### Pricing
+- **API charges**: $X per 1M tokens (input), $Y per 1M tokens (output) — varies by model
+- **Response reports cost?**: No — only token counts in `usage` field
+- **Our getCost**: Estimates cost at $Z per 1M total tokens based on median model pricing
+- **Risk**: Actual cost varies by model; our estimate may over/undercharge for cheap/expensive models
+
+### Throttling
+- **API limits**: 300 RPM per key (paid tier), 60 RPM (free tier)
+- **Per-key or per-account**: Per key — more keys = more throughput
+- **Our config**: 60 RPM per workspace (per_request mode)
+- **With N keys**: Effective per-key rate is (total RPM across workspaces) / N
+- **Headroom**: Comfortable — even 10 active workspaces at full rate = 600 RPM / 3 keys = 200 RPM per key, under the 300 RPM API limit
+```
+
+This summary helps reviewers verify that the pricing and rate limiting are well-calibrated and surfaces any risks that need monitoring.
+
+## Checklist
+
+- [ ] Provider added to `BYOKProviderId` in `tools/types.ts`
+- [ ] Provider added to `VALID_PROVIDERS` in the BYOK keys API route
+- [ ] API pricing docs researched — understand per-unit cost and whether the API reports cost in responses
+- [ ] API rate limits researched — understand RPM/TPM limits, per-key vs per-account, and plan tiers
+- [ ] `hosting` config added to the tool with `envKeyPrefix`, `apiKeyParam`, `byokProviderId`, `pricing`, and `rateLimit`
+- [ ] `getCost` throws if required cost data is missing from the response
+- [ ] Cost data captured in `transformResponse` or `postProcess` if API provides it
+- [ ] `hideWhenHosted: true` added to the API key subblock in the block config
+- [ ] Provider entry added to the BYOK settings UI with icon and description
+- [ ] Env vars documented: `{PREFIX}_COUNT` and `{PREFIX}_1..N`
+- [ ] Pricing and throttling summary provided to reviewer
diff --git a/.agents/skills/add-hosted-key/agents/openai.yaml b/.agents/skills/add-hosted-key/agents/openai.yaml
new file mode 100644
index 00000000000..97dfcf458cf
--- /dev/null
+++ b/.agents/skills/add-hosted-key/agents/openai.yaml
@@ -0,0 +1,5 @@
+interface:
+ display_name: "Add Hosted Key"
+ short_description: "Add hosted API key to a tool"
+ brand_color: "#CA8A04"
+ default_prompt: "Use $add-hosted-key to add hosted API key support (metered and billed) to a Sim tool."
diff --git a/.agents/skills/add-model/SKILL.md b/.agents/skills/add-model/SKILL.md
new file mode 100644
index 00000000000..f97c966e1f3
--- /dev/null
+++ b/.agents/skills/add-model/SKILL.md
@@ -0,0 +1,209 @@
+---
+name: add-model
+description: Add a new LLM model to `apps/sim/providers/models.ts` with every pricing and capability value verified against the provider's live API docs (no hallucination), plus the repo-side touchpoints that are not data-driven — hosted-key billing, tests, and provider-code handling. Use when adding a model to an existing provider in `apps/sim/providers/models.ts`.
+---
+
+# Add Model Skill
+
+You add a new model entry to `apps/sim/providers/models.ts`. **Every numeric and capability claim MUST be derived from a live web fetch of the provider's official docs in this session.** Marketing emails, training data, and your prior knowledge are not sources of truth — they routinely hallucinate pricing, context windows, and capability lists.
+
+## Hard rules (do not skip)
+
+1. **Live-fetch or refuse.** Before writing the entry, you must successfully WebFetch the provider's official models/pricing page in this session. If you cannot reach an authoritative source for any field, **mark the field as UNVERIFIED in your report and ask the user before guessing**. Never fill in pricing or capabilities from memory.
+2. **Two-source rule for pricing.** Cross-check input/output/cached pricing against at least one secondary source (OpenRouter, Artificial Analysis, CloudPrice, mem0, intuitionlabs). If sources disagree, the provider's own docs win — but flag the disagreement.
+3. **Read the code before setting capability flags.** Capability flags are dead unless the provider's implementation under `apps/sim/providers/{provider}/` actually consumes them (see Consumption Matrix below). Setting a flag the provider ignores is a silent bug.
+4. **Cite every fact.** Your final report must list the URL each value came from. No URL → not verified.
+
+## Your Task
+
+1. Identify provider and model id from user args
+2. Live-fetch official docs + pricing page + capability/parameter pages + at least one secondary source
+3. Apply the Consumption Matrix to know which capability flags are real
+4. Read 2-3 sibling entries in `models.ts` and match their pattern exactly
+5. Check the repo-side touchpoints that are NOT data-driven (hosted-key billing, tests, provider code)
+6. Insert the entry, run `bun run lint`, print the verification report
+
+## Step 1: Live source-of-truth lookup
+
+In priority order — fetch all that exist for the provider:
+
+| Provider | Models index | Pricing | Reasoning/parameter caveats |
+|---|---|---|---|
+| OpenAI | platform.openai.com/docs/models | openai.com/api/pricing | platform.openai.com/docs/guides/reasoning |
+| Anthropic | docs.anthropic.com/en/docs/about-claude/models | anthropic.com/pricing | docs.anthropic.com/en/docs/build-with-claude/extended-thinking |
+| Google (Gemini) | ai.google.dev/gemini-api/docs/models | ai.google.dev/pricing | ai.google.dev/gemini-api/docs/thinking |
+| xAI | docs.x.ai/developers/models | docs.x.ai/developers/models (per-model detail page) | docs.x.ai/developers/model-capabilities/text/reasoning |
+| Mistral | docs.mistral.ai/getting-started/models/models_overview | mistral.ai/pricing | n/a |
+| DeepSeek | api-docs.deepseek.com/quick_start/pricing | same | api-docs.deepseek.com/guides/reasoning_model |
+| Groq | console.groq.com/docs/models | groq.com/pricing | n/a |
+| Cerebras | inference-docs.cerebras.ai/models | cerebras.ai/pricing | n/a |
+
+Secondary verification (use at least one): `openrouter.ai//`, `artificialanalysis.ai/models/`, `cloudprice.net/models/-`.
+
+Use a precise WebFetch prompt: *"Extract for {model_id}: exact model id string, context window in tokens, input price per 1M, cached input price per 1M, output price per 1M, max output tokens, supported reasoning effort levels, accepted parameters (temperature, top_p), release date. Do not fill in fields you cannot find."*
+
+## Step 2: Consumption Matrix (which provider honors which capability)
+
+| Capability | Honored by | Effect if set elsewhere |
+|---|---|---|
+| `temperature` | All providers (passed through if set) | Safe but inert on always-reasoning models that reject it |
+| `toolUsageControl` | All providers (provider-level, not per-model) | n/a — set on `ProviderDefinition`, not models |
+| `reasoningEffort` | `openai/core.ts`, `azure-openai`, `anthropic/core.ts` (mapped to thinking), `gemini/core.ts` | **Dead on xai, deepseek, mistral, groq, cerebras, openrouter, fireworks, bedrock, vertex** unless their core consumes it — re-grep before assuming |
+| `verbosity` | `openai/core.ts`, `azure-openai/index.ts` only | Dead elsewhere |
+| `thinking` | `anthropic/core.ts`, `gemini/core.ts` | Dead elsewhere |
+| `nativeStructuredOutputs` | `anthropic/core.ts`, `fireworks/index.ts`, `openrouter/index.ts` | Dead on openai, xai, google, vertex, bedrock, azure-openai, deepseek, mistral, groq, cerebras |
+| `maxOutputTokens` | Read by UI + executor for token estimation | Always meaningful — set if provider documents a cap |
+| `computerUse` | `anthropic/core.ts` | Dead elsewhere |
+| `deepResearch` | UI flag for routing to deep-research SKUs | Set only on actual deep-research model IDs |
+| `memory: false` | Conversation persistence opt-out | Set only when model genuinely cannot maintain history (e.g., deep-research) |
+
+**Always re-grep before relying on this table** — the codebase moves:
+
+```bash
+rg "reasoningEffort|reasoning_effort" apps/sim/providers//
+rg "verbosity" apps/sim/providers//
+rg "request\.thinking|thinking:" apps/sim/providers//
+rg "supportsNativeStructuredOutputs|nativeStructuredOutputs" apps/sim/providers//
+```
+
+## Step 3: Match the provider's existing entry pattern
+
+Open `apps/sim/providers/models.ts`, find `PROVIDER_DEFINITIONS[].models`, read 2-3 sibling entries. Match field order exactly:
+
+```ts
+{
+ id: '',
+ pricing: {
+ input: ,
+ cachedInput: , // omit if provider doesn't offer caching
+ output: ,
+ updatedAt: '',
+ },
+ capabilities: {
+ // only flags the provider actually consumes — see matrix
+ },
+ contextWindow: ,
+ releaseDate: '',
+ recommended: true, // only if new flagship; ask user before swapping
+ speedOptimized: true, // only on smallest/fastest tier
+ deprecated: true, // only on retired models
+}
+```
+
+### Reseller providers (azure-openai, azure-anthropic, vertex, bedrock, openrouter)
+
+Model id MUST be prefixed: `azure/`, `azure-anthropic/`, `vertex/`, `bedrock/`, `openrouter/`. Pricing usually mirrors the upstream provider but verify on the reseller's own pricing page.
+
+### Insertion order
+
+Within a family, newest first (matches existing convention: GPT-5.5 above GPT-5.4 above GPT-5.2). Across families, biggest/flagship at top of list.
+
+### `recommended` / `speedOptimized`
+
+- At most one or two `recommended: true` per provider — the current flagship(s).
+- If you're adding a new flagship, ask the user before removing `recommended` from the previous flagship. Never silently flip it.
+- `speedOptimized: true` only on the smallest/fastest tier (nano, flash-lite, haiku class).
+
+## Step 4: Repo-side touchpoints beyond the entry
+
+Adding the `models.ts` entry is most of the job because nearly every consumer is **data-driven** and picks the model up automatically: the ~40 query helpers in `models.ts` / `providers/utils.ts`, the public `/models` catalog (`app/(landing)/models/utils.ts` iterates `PROVIDER_DEFINITIONS`), the agent-block model dropdown, and copilot's `isKnownModelId` / `suggestModelIdsForUnknownModel` validation. The touchpoints below are the exceptions — they are **not** data-driven, so check each one.
+
+### Hosted = auto-billed, by provider
+
+`getHostedModels()` in `apps/sim/providers/models.ts` returns **every** model under `openai`, `anthropic`, and `google`:
+
+```ts
+export function getHostedModels(): string[] {
+ return [
+ ...getProviderModels('openai'),
+ ...getProviderModels('anthropic'),
+ ...getProviderModels('google'),
+ ]
+}
+```
+
+So a model added to any of those three providers is **automatically served with Sim's rotating hosted key and billed** to the workspace via `shouldBillModelUsage()` (`providers/utils.ts`). Before you insert:
+
+- **If the model should be BYOK-only / never-billed**, do NOT drop it under `openai`/`anthropic`/`google` as-is — that silently enrolls it in hosted billing. Confirm hosting/billing intent with the user. (Precedent: Ollama Cloud is a deliberately separate `isReseller` provider specifically to stay BYOK-only/never-billed.)
+- **If the model should be hosted**, the deployment must actually have a key for it — the provider's `{PREFIX}_COUNT` / `{PREFIX}_1..N` env vars must be set, or hosted runs fail at execution time.
+- State the hosted/billing status explicitly in the verification report.
+
+### Tests with hardcoded model IDs
+
+`bun run lint` does **not** run tests. A few tests assert specific model IDs and can break or need updating when you touch a hosted or flagship model:
+
+- `apps/sim/providers/utils.test.ts` — asserts membership of `getHostedModels()` / `shouldBillModelUsage()`
+- `apps/sim/providers/index.test.ts` and serializer tests — reference concrete model IDs
+
+```bash
+rg "|getHostedModels|shouldBillModelUsage" apps/sim/providers/*.test.ts
+```
+
+If anything matches, run the affected provider tests and update assertions as needed.
+
+### New API behavior is NOT data-driven
+
+The Consumption Matrix (Step 2) tells you which capability *flags* are honored by existing provider code. But if the new model needs **net-new** request handling that the provider doesn't implement yet — a new beta header (e.g. Anthropic's `anthropic-beta` structured-outputs header in `anthropic/index.ts`), a new thinking/reasoning encoding, a Responses-API quirk — you must edit `apps/sim/providers//core.ts` / `index.ts`. Setting a flag whose behavior isn't implemented is a silent no-op.
+
+### Wrong family entirely?
+
+- **Embedding or rerank model** → it does NOT go in the `models[]` array. Use `EMBEDDING_MODEL_PRICING` / `RERANK_MODEL_PRICING` in `models.ts` instead.
+- **Brand-new provider** (not just a new model under an existing one) → much larger surface: add the id to `ProviderId` in `providers/types.ts`, a registry entry in `providers/registry.ts`, a provider implementation under `providers//`, an icon in `components/icons.tsx`, and the `PROVIDER_DEFINITIONS` block. That is beyond this skill — tell the user.
+
+## Step 5: Write, lint
+
+```bash
+bun run lint
+```
+
+Lint must pass before reporting done. **If lint fails:** read the error, fix the syntax/typing issue in the entry you just wrote (do not delete the entry — it's the work product), re-run lint, and note the fix in a "Lint adjustments" line in the verification report. Never report done with lint failing.
+
+## Step 6: Verification report (mandatory format)
+
+End with this exact structure:
+
+```markdown
+### Verification —
+
+| Field | Value | Source URL | Status |
+|---|---|---|---|
+| `id` | `grok-4.3` | https://docs.x.ai/... | ✓ verified |
+| `contextWindow` | 1,000,000 | https://docs.x.ai/... + https://openrouter.ai/... | ✓ verified (2 sources agree) |
+| `input` | $1.25/M | https://docs.x.ai/... | ✓ verified |
+| `cachedInput` | $0.20/M | https://cloudprice.net/... | ⚠️ single source |
+| `output` | $2.50/M | https://docs.x.ai/... + https://openrouter.ai/... | ✓ verified |
+| `capabilities.temperature` | `{ min: 0, max: 1 }` | matches sibling entries | — pattern-match only |
+| `capabilities.reasoningEffort` | NOT SET | provider docs say API rejects it for this model | ✓ correctly omitted |
+| `releaseDate` | 2026-04-30 | https://docs.x.ai/... announcement | ✓ verified |
+| hosted/billing | BYOK-only (xai not in `getHostedModels`) | `providers/models.ts` | — confirmed intent |
+
+**Disagreements**
+- _none_ OR _OpenRouter says X, provider docs say Y — used Y per provider rule_
+
+**Unverified fields**
+- _none_ OR _: could not find authoritative source — left as based on sibling pattern; please confirm_
+```
+
+If any row is ⚠️ single-source or "unverified," **state it plainly to the user and ask whether to proceed**. Do not silently merge.
+
+## What to do if you cannot find a source
+
+Omitting a field is **not the same as verifying it**. Any field you cannot confirm from a live fetch must be **both** omitted from the entry **and** listed as ❓ UNVERIFIED in the report's "Unverified fields" section, with the URLs you attempted. Then ask the user to confirm before merging.
+
+- Pricing missing → do NOT guess. Omit `cachedInput`. Mark ❓ UNVERIFIED. Ask the user for the price or the docs URL.
+- Context window missing → do NOT guess. Ask the user; mark ❓ UNVERIFIED.
+- Release date missing → omit the field; mark ❓ UNVERIFIED in the report.
+- Capability uncertain → omit the flag (safer than setting a dead/wrong one); mark ❓ UNVERIFIED so the user knows you didn't confirm it either way.
+
+## Anti-patterns this skill exists to prevent
+
+- ❌ Trusting a marketing email (xAI's grok-4.3 email claimed "3 reasoning efforts" but the API rejects `reasoning_effort` — verified by official docs only)
+- ❌ Setting `nativeStructuredOutputs: true` on xai/openai/google (dead — only anthropic/fireworks/openrouter consume it)
+- ❌ Setting `thinking` on non-Anthropic/non-Gemini providers
+- ❌ Setting `verbosity` on anything other than OpenAI gpt-5.x
+- ❌ Copying `pricing.updatedAt` from a sibling instead of using today's date
+- ❌ Inventing a `cachedInput` price by dividing input by 4 (varies by provider — find an explicit number)
+- ❌ Stamping `recommended: true` on the new model without removing it from the previous flagship
+- ❌ Adding a BYOK-only model under `openai`/`anthropic`/`google` (silently enrolls it in hosted billing via `getHostedModels()`)
+- ❌ Reporting "done" after only `bun run lint` when you touched a hosted (openai/anthropic/google) or flagship model with assertions in `providers/utils.test.ts`
+- ❌ Reporting "done" with any UNVERIFIED row in the table
diff --git a/.agents/skills/add-model/agents/openai.yaml b/.agents/skills/add-model/agents/openai.yaml
new file mode 100644
index 00000000000..4ba3fc233e8
--- /dev/null
+++ b/.agents/skills/add-model/agents/openai.yaml
@@ -0,0 +1,5 @@
+interface:
+ display_name: "Add Model"
+ short_description: "Add an LLM model, specs verified"
+ brand_color: "#0EA5E9"
+ default_prompt: "Use $add-model to add a new LLM model to Sim with pricing and capabilities verified against the provider's live docs."
diff --git a/.agents/skills/council/SKILL.md b/.agents/skills/council/SKILL.md
new file mode 100644
index 00000000000..0df112728be
--- /dev/null
+++ b/.agents/skills/council/SKILL.md
@@ -0,0 +1,13 @@
+---
+name: council
+description: Spawn parallel task agents to explore a given area of the codebase from multiple angles, then use their findings to answer the question or build a plan. Use when a task needs broad fan-out exploration across many files before acting.
+# No agents/openai.yaml by design: council is a meta/exploration utility (like cleanup, ship, you-might-not-need-*), not a service-integration builder, so it intentionally ships no standalone agent card.
+---
+
+Based on the given area of interest, please:
+
+1. Dig around the codebase in terms of that given area of interest, gather general information such as keywords and architecture overview.
+2. Spawn off n=10 (unless specified otherwise) task agents to dig deeper into the codebase in terms of that given area of interest, some of them should be out of the box for variance.
+3. Once the task agents are done, use the information to do what the user wants.
+
+If user is in plan mode, use the information to create the plan.
diff --git a/.agents/skills/validate-model/SKILL.md b/.agents/skills/validate-model/SKILL.md
new file mode 100644
index 00000000000..c05b2e3527b
--- /dev/null
+++ b/.agents/skills/validate-model/SKILL.md
@@ -0,0 +1,170 @@
+---
+name: validate-model
+description: Validate a model entry (or every model in a provider) in `apps/sim/providers/models.ts` against the provider's live API docs, reporting pricing and capability drift, dead capability flags, hosting/billing intent, and any field that cannot be verified. Use when auditing or repairing model entries under `apps/sim/providers/models.ts`.
+---
+
+# Validate Model Skill
+
+You audit one or more model entries in `apps/sim/providers/models.ts` against the provider's official live API docs. **Hallucinated pricing and capabilities are the #1 failure mode in this file.** Every numeric and capability claim must be re-derived from a live web fetch in this session — not from memory, not from training data, not from the user's marketing email.
+
+## Hard rules (do not skip)
+
+1. **Live-fetch or report unverified.** Each field must be backed by a live WebFetch in this session. If you cannot reach an authoritative URL for a field, mark it **UNVERIFIED** in the report — do not silently confirm it from memory.
+2. **Cite every fact.** Every value in the report must show the source URL it was checked against. No URL → mark UNVERIFIED.
+3. **Two-source rule for pricing.** Cross-check input/output/cached against at least one secondary source (OpenRouter, Artificial Analysis, CloudPrice). If sources disagree, the provider's own docs win — flag the disagreement.
+4. **Inspect provider implementation before flagging capability mismatches.** A capability flag in `models.ts` is dead unless the provider's code under `apps/sim/providers/{provider}/` consumes it (see Consumption Matrix below). Setting a flag the provider ignores is a warning, not a critical.
+5. **Never auto-fix without printing the diff.** Show the user the proposed diff before applying. Get confirmation.
+
+## Your Task
+
+When invoked as `/validate-model [model-id]`:
+
+1. Read the target entries from `models.ts`
+2. Live-fetch the provider's official models, pricing, and capability/reasoning pages + at least one secondary source for pricing
+3. Inspect the provider implementation to know which flags are actually consumed
+4. Run the checklist below per model
+5. Report findings (critical / warning / suggestion / unverified) with every cell linked to its source URL
+6. Offer to fix; on confirm, edit `models.ts` in a single pass and re-lint
+
+If `model-id` is omitted, validate every model in the provider.
+
+## Step 1: Read entries from `models.ts`
+
+Capture per model: `id`, full `pricing`, full `capabilities`, `contextWindow`, `releaseDate`, `recommended`, `speedOptimized`, `deprecated`.
+
+## Step 2: Live-fetch authoritative sources
+
+Use the canonical provider URL table in the `add-model` skill (`.claude/commands/add-model.md`, or its mirror `.agents/skills/add-model/SKILL.md`), Step 1, as the single source of truth — fetch the models index, pricing, and reasoning/parameter caveats pages listed there for the target provider. If you update one table, update the other in the same change.
+
+Secondary cross-check (use at least one): OpenRouter, Artificial Analysis, CloudPrice.
+
+If a fetch fails (404, timeout, paywall), record the URL attempted and mark dependent fields UNVERIFIED.
+
+## Step 3: Build the consumption map for this provider
+
+Re-grep before trusting the snapshot below:
+
+```bash
+rg "reasoningEffort|reasoning_effort" apps/sim/providers//
+rg "verbosity" apps/sim/providers//
+rg "request\.thinking|thinking:" apps/sim/providers//
+rg "supportsNativeStructuredOutputs|nativeStructuredOutputs" apps/sim/providers//
+```
+
+Snapshot (verify before relying):
+
+| Capability | Consumed by |
+|---|---|
+| `reasoningEffort` | `openai/core.ts`, `azure-openai`, `anthropic/core.ts` (mapped via thinking), `gemini/core.ts` |
+| `verbosity` | `openai/core.ts`, `azure-openai/index.ts` |
+| `thinking` | `anthropic/core.ts`, `gemini/core.ts` |
+| `nativeStructuredOutputs` | `anthropic/core.ts`, `fireworks/index.ts`, `openrouter/index.ts` |
+| `computerUse` | `anthropic/core.ts` |
+| `temperature` | All providers (passthrough) |
+
+A flag set in `models.ts` but not in the consumption list for this provider = **warning: dead flag**.
+
+## Step 4: Run the checklist
+
+For each model, evaluate every row. Statuses: ✓ matches docs, ✗ disagrees, ⚠️ single-source, ❓ UNVERIFIED (could not fetch).
+
+### Identity
+- [ ] `id` exactly matches provider's API model identifier (case, dots, dashes, prefix for resellers)
+- [ ] `releaseDate` matches launch announcement
+- [ ] `deprecated: true` set if provider has announced retirement (or removed from active list)
+
+### Pricing (per 1M tokens, USD)
+- [ ] `pricing.input` matches provider pricing page
+- [ ] `pricing.output` matches provider pricing page
+- [ ] `pricing.cachedInput` matches provider's documented cached/prompt-cache rate (or is correctly omitted if no caching offered)
+- [ ] `pricing.updatedAt` is recent — warn if older than 60 days
+
+### Context & output limits
+- [ ] `contextWindow` matches docs (in tokens)
+- [ ] `capabilities.maxOutputTokens` matches documented output cap (or is correctly omitted if "no output limit")
+
+### Capabilities (each must be DOCUMENTED-AS-SUPPORTED **and** CONSUMED-BY-PROVIDER-CODE)
+- [ ] `temperature` — provider accepts it for this model (reasoning-always-on models often reject)
+- [ ] `reasoningEffort.values` — list matches docs; **omitted** for always-reasoning models that reject the parameter (e.g., grok-4.3, where xAI docs explicitly state `reasoning_effort` is not supported). Verify per model — some always-reasoning models (e.g., OpenAI's o-series) DO accept `reasoning_effort` and should keep the flag.
+- [ ] `verbosity.values` — only on OpenAI gpt-5.x family; values match docs
+- [ ] `thinking.levels` + `thinking.default` — only on Anthropic/Gemini; values match docs
+- [ ] `nativeStructuredOutputs` — only on anthropic/fireworks/openrouter; provider must document Structured Outputs / JSON-mode for this model
+- [ ] `toolUsageControl` — provider supports `tool_choice` semantics
+- [ ] `computerUse` — provider implements computer-use loop AND model is a computer-use SKU
+- [ ] `deepResearch` — only on actual deep-research SKUs
+- [ ] `memory: false` — only when the model genuinely cannot maintain conversation history
+
+### Flags
+- [ ] `recommended: true` — at most one or two per provider; should be current flagship
+- [ ] `speedOptimized: true` — only on smallest/fastest tier (nano / flash-lite / haiku class)
+
+### Hosting / billing
+- [ ] If the model is under `openai`/`anthropic`/`google`, it is automatically in `getHostedModels()` → served with Sim's rotating key and billed via `shouldBillModelUsage()`. Confirm that is the intent (a BYOK-only model parked under one of these providers is a billing bug — warning).
+- [ ] If the model is hosted, the deployment is expected to have its `{PREFIX}_COUNT` / `{PREFIX}_1..N` env vars set (ops concern; note if it looks unset for a model claiming hosted support).
+
+## Step 5: Report (mandatory format)
+
+For each model, emit a table with one row per checklist item. Every row that claims ✓ must have a URL.
+
+```markdown
+### Validation —
+
+| Field | Repo | Live docs | Source URL | Status |
+|---|---|---|---|---|
+| `input` | $1.25/M | $1.25/M | https://docs.x.ai/... | ✓ |
+| `cachedInput` | $0.50/M | $0.20/M | https://cloudprice.net/... | ✗ stale (price cut not picked up) |
+| `reasoningEffort` | low/medium/high | rejected by API | https://docs.x.ai/.../reasoning | ✗ inert — selecting silently no-ops |
+| `contextWindow` | 1,000,000 | 1,000,000 | https://docs.x.ai/... + https://openrouter.ai/... | ✓ (2 sources) |
+| `releaseDate` | 2026-04-30 | not found in scraped pages | _attempted: docs.x.ai, x.ai/news_ | ❓ UNVERIFIED |
+
+**Findings**
+- 🔴 critical — `cachedInput` is wrong: docs say $0.20/M, repo has $0.50/M
+- 🟡 warning — `reasoningEffort` is set but provider rejects it for this model (xAI docs explicitly: "reasoning_effort is not supported by grok-4.3")
+- 🔵 suggestion — `pricing.updatedAt` is 90 days old; refresh
+- ❓ unverified — `releaseDate` could not be confirmed from any fetched page; ask user
+
+**Disagreements between sources**
+- _none_ OR _OpenRouter says $X, provider docs say $Y — went with provider docs_
+```
+
+End each multi-model run with a summary count: `N models checked · X critical · Y warnings · Z suggestions · W unverified`.
+
+## Step 6: Offer to fix
+
+After reporting, ask: *"Want me to fix the critical and warning items? I'll print the diff first."* On yes:
+
+1. Print the proposed diff (do not apply yet)
+2. Get user confirmation
+3. Edit `models.ts` in a single pass
+4. Run `bun run lint`
+5. Re-run only the failed rows of the checklist on the new state
+
+## Severity definitions
+
+- 🔴 **critical** — wrong number or wrong identifier that misleads users about cost or breaks API calls. Examples: incorrect pricing, wrong model id, wrong context window, capability the API rejects.
+- 🟡 **warning** — dead code or internal inconsistency. Examples: capability flag the provider ignores, multiple `recommended: true` per provider, `pricing.updatedAt` >60 days old, missing `deprecated: true` on retired model.
+- 🔵 **suggestion** — style/consistency. Examples: field order, missing `speedOptimized` on a clearly smallest-tier model.
+- ❓ **unverified** — could not fetch an authoritative source for this field. Surface it; never silently confirm.
+
+## Common bugs this skill catches
+
+- Pricing drift after a provider price cut (very common — providers cut quarterly)
+- `reasoningEffort` set on always-reasoning models that reject the parameter (grok-4.3, o3-pro pattern)
+- `nativeStructuredOutputs` set on providers that don't consume the flag (dead)
+- `thinking` set on non-Anthropic/non-Gemini providers
+- `verbosity` set on non-gpt-5.x models
+- Wrong context window (e.g., 128k claimed vs 200k actual)
+- Stale `pricing.updatedAt`
+- Multiple `recommended: true` per provider after a flagship swap
+- Missing `deprecated: true` on retired models (e.g., the xAI batch retiring May 15, 2026)
+
+## What "I cannot verify this" looks like
+
+If, after fetching the documented sources, a field cannot be confirmed:
+
+- Mark the row ❓ UNVERIFIED with the URL(s) attempted
+- Surface it in the **Findings** section with severity ❓
+- Do NOT mark the validation as passed
+- Ask the user for a docs URL or guidance before changing anything
+
+The skill is allowed to say *"I could not verify the cached input price for grok-4.3 from the official xAI docs in this session — I attempted [URLs] without finding the value. Third-party sources [URL1, URL2] both report $0.20/M. Confirm before I update."* That is correct behavior. Hallucinating a number is not.
diff --git a/.agents/skills/validate-model/agents/openai.yaml b/.agents/skills/validate-model/agents/openai.yaml
new file mode 100644
index 00000000000..fa58d32ca8b
--- /dev/null
+++ b/.agents/skills/validate-model/agents/openai.yaml
@@ -0,0 +1,5 @@
+interface:
+ display_name: "Validate Model"
+ short_description: "Audit model entries vs live docs"
+ brand_color: "#0891B2"
+ default_prompt: "Use $validate-model to audit Sim model entries against the provider's live API docs."
diff --git a/.claude/commands/add-connector.md b/.claude/commands/add-connector.md
index 22c8c52e1c8..81823675a72 100644
--- a/.claude/commands/add-connector.md
+++ b/.claude/commands/add-connector.md
@@ -463,6 +463,24 @@ const response = await fetchWithRetry(url, { ... }, VALIDATE_RETRY_OPTIONS)
If `ExternalDocument.sourceUrl` is set, the sync engine stores it on the document record. Always construct the full URL (not a relative path).
+## Capped or Incomplete Listings — `syncContext.listingCapped` (REQUIRED)
+
+If `listDocuments` can ever return **less than the full source set** on a non-incremental sync — a `maxItems`/`maxDocuments`-style cap, or a transient per-item error that drops a still-existing document from the listing — it MUST set `syncContext.listingCapped = true` when that happens.
+
+The sync engine reconciles deletions by comparing the full listing against stored documents: anything not seen is **hard-deleted** (sync-engine.ts, gated on `!syncContext?.listingCapped`). A truncated listing without this flag deletes every real document beyond the cap. This was the single most common bug found when auditing connectors — do not omit it.
+
+```typescript
+if (hitLimit && syncContext) {
+ syncContext.listingCapped = true
+}
+```
+
+Rules:
+- Set it when a user-configured cap truncates the listing while more documents exist
+- Set it when a thrown error caused a still-present document to be skipped during listing
+- Do NOT set it when the source is genuinely exhausted (deleted documents must still reconcile)
+- Do NOT set it for intentional scope filters (e.g. a date cutoff) — out-of-scope documents should be reconciled normally
+
## Sync Engine Behavior (Do Not Modify)
The sync engine (`lib/knowledge/connectors/sync-engine.ts`) is connector-agnostic. It:
@@ -515,6 +533,7 @@ export const CONNECTOR_REGISTRY: ConnectorRegistry = {
- `dependsOn` references selector field IDs (not `canonicalParamId`)
- Dependency `canonicalParamId` values exist in `SELECTOR_CONTEXT_FIELDS`
- [ ] `listDocuments` handles pagination with metadata-based content hashes
+- [ ] `syncContext.listingCapped = true` set whenever the listing is truncated (max-items cap or transient per-item error) — required to prevent the engine's deletion reconciliation from removing unseen documents
- [ ] `contentDeferred: true` used if content requires per-doc API calls (file download, export, blocks fetch)
- [ ] `contentHash` is metadata-based (not content-based) and identical between stub and `getDocument`
- [ ] `sourceUrl` set on each ExternalDocument (full URL, not relative)
diff --git a/.claude/commands/add-model.md b/.claude/commands/add-model.md
index 1fcf828537c..c52e1b451f9 100644
--- a/.claude/commands/add-model.md
+++ b/.claude/commands/add-model.md
@@ -20,7 +20,8 @@ You add a new model entry to `apps/sim/providers/models.ts`. **Every numeric and
2. Live-fetch official docs + pricing page + capability/parameter pages + at least one secondary source
3. Apply the Consumption Matrix to know which capability flags are real
4. Read 2-3 sibling entries in `models.ts` and match their pattern exactly
-5. Insert the entry, run `bun run lint`, print the verification report
+5. Check the repo-side touchpoints that are NOT data-driven (hosted-key billing, tests, provider code)
+6. Insert the entry, run `bun run lint`, print the verification report
## Step 1: Live source-of-truth lookup
@@ -103,7 +104,53 @@ Within a family, newest first (matches existing convention: GPT-5.5 above GPT-5.
- If you're adding a new flagship, ask the user before removing `recommended` from the previous flagship. Never silently flip it.
- `speedOptimized: true` only on the smallest/fastest tier (nano, flash-lite, haiku class).
-## Step 4: Write, lint
+## Step 4: Repo-side touchpoints beyond the entry
+
+Adding the `models.ts` entry is most of the job because nearly every consumer is **data-driven** and picks the model up automatically: the ~40 query helpers in `models.ts` / `providers/utils.ts`, the public `/models` catalog (`app/(landing)/models/utils.ts` iterates `PROVIDER_DEFINITIONS`), the agent-block model dropdown, and copilot's `isKnownModelId` / `suggestModelIdsForUnknownModel` validation. The touchpoints below are the exceptions — they are **not** data-driven, so check each one.
+
+### Hosted = auto-billed, by provider
+
+`getHostedModels()` in `apps/sim/providers/models.ts` returns **every** model under `openai`, `anthropic`, and `google`:
+
+```ts
+export function getHostedModels(): string[] {
+ return [
+ ...getProviderModels('openai'),
+ ...getProviderModels('anthropic'),
+ ...getProviderModels('google'),
+ ]
+}
+```
+
+So a model added to any of those three providers is **automatically served with Sim's rotating hosted key and billed** to the workspace via `shouldBillModelUsage()` (`providers/utils.ts`). Before you insert:
+
+- **If the model should be BYOK-only / never-billed**, do NOT drop it under `openai`/`anthropic`/`google` as-is — that silently enrolls it in hosted billing. Confirm hosting/billing intent with the user. (Precedent: Ollama Cloud is a deliberately separate `isReseller` provider specifically to stay BYOK-only/never-billed.)
+- **If the model should be hosted**, the deployment must actually have a key for it — the provider's `{PREFIX}_COUNT` / `{PREFIX}_1..N` env vars must be set, or hosted runs fail at execution time.
+- State the hosted/billing status explicitly in the verification report.
+
+### Tests with hardcoded model IDs
+
+`bun run lint` does **not** run tests. A few tests assert specific model IDs and can break or need updating when you touch a hosted or flagship model:
+
+- `apps/sim/providers/utils.test.ts` — asserts membership of `getHostedModels()` / `shouldBillModelUsage()`
+- `apps/sim/providers/index.test.ts` and serializer tests — reference concrete model IDs
+
+```bash
+rg "|getHostedModels|shouldBillModelUsage" apps/sim/providers/*.test.ts
+```
+
+If anything matches, run the affected provider tests and update assertions as needed.
+
+### New API behavior is NOT data-driven
+
+The Consumption Matrix (Step 2) tells you which capability *flags* are honored by existing provider code. But if the new model needs **net-new** request handling that the provider doesn't implement yet — a new beta header (e.g. Anthropic's `anthropic-beta` structured-outputs header in `anthropic/index.ts`), a new thinking/reasoning encoding, a Responses-API quirk — you must edit `apps/sim/providers//core.ts` / `index.ts`. Setting a flag whose behavior isn't implemented is a silent no-op.
+
+### Wrong family entirely?
+
+- **Embedding or rerank model** → it does NOT go in the `models[]` array. Use `EMBEDDING_MODEL_PRICING` / `RERANK_MODEL_PRICING` in `models.ts` instead.
+- **Brand-new provider** (not just a new model under an existing one) → much larger surface: add the id to `ProviderId` in `providers/types.ts`, a registry entry in `providers/registry.ts`, a provider implementation under `providers//`, an icon in `components/icons.tsx`, and the `PROVIDER_DEFINITIONS` block. That is beyond this skill — tell the user.
+
+## Step 5: Write, lint
```bash
bun run lint
@@ -111,7 +158,7 @@ bun run lint
Lint must pass before reporting done. **If lint fails:** read the error, fix the syntax/typing issue in the entry you just wrote (do not delete the entry — it's the work product), re-run lint, and note the fix in a "Lint adjustments" line in the verification report. Never report done with lint failing.
-## Step 5: Verification report (mandatory format)
+## Step 6: Verification report (mandatory format)
End with this exact structure:
@@ -128,6 +175,7 @@ End with this exact structure:
| `capabilities.temperature` | `{ min: 0, max: 1 }` | matches sibling entries | — pattern-match only |
| `capabilities.reasoningEffort` | NOT SET | provider docs say API rejects it for this model | ✓ correctly omitted |
| `releaseDate` | 2026-04-30 | https://docs.x.ai/... announcement | ✓ verified |
+| hosted/billing | BYOK-only (xai not in `getHostedModels`) | `providers/models.ts` | — confirmed intent |
**Disagreements**
- _none_ OR _OpenRouter says X, provider docs say Y — used Y per provider rule_
@@ -156,4 +204,6 @@ Omitting a field is **not the same as verifying it**. Any field you cannot confi
- ❌ Copying `pricing.updatedAt` from a sibling instead of using today's date
- ❌ Inventing a `cachedInput` price by dividing input by 4 (varies by provider — find an explicit number)
- ❌ Stamping `recommended: true` on the new model without removing it from the previous flagship
+- ❌ Adding a BYOK-only model under `openai`/`anthropic`/`google` (silently enrolls it in hosted billing via `getHostedModels()`)
+- ❌ Reporting "done" after only `bun run lint` when you touched a hosted (openai/anthropic/google) or flagship model with assertions in `providers/utils.test.ts`
- ❌ Reporting "done" with any UNVERIFIED row in the table
diff --git a/.claude/commands/validate-connector.md b/.claude/commands/validate-connector.md
index adcbf61b12b..3aa5da34f93 100644
--- a/.claude/commands/validate-connector.md
+++ b/.claude/commands/validate-connector.md
@@ -135,6 +135,13 @@ For each API endpoint the connector calls:
- [ ] No off-by-one errors in pagination tracking
- [ ] The connector does NOT hit known API pagination limits silently (e.g., HubSpot search 10k cap)
+### Deletion-Reconciliation Safety (`listingCapped`) — CRITICAL
+The sync engine hard-deletes any stored document absent from a full listing. Audit every path where `listDocuments` can return less than the full source set:
+- [ ] `syncContext.listingCapped = true` is set when a `maxItems`-style cap truncates the listing while more documents exist
+- [ ] `listingCapped` is set when a transient per-item error drops a still-existing document from the listing
+- [ ] `listingCapped` is NOT set when the source is genuinely exhausted (deleted documents must reconcile) or for intentional scope filters (date cutoffs)
+This is the most common connector bug class — verify it explicitly against `sync-engine.ts`'s reconciliation gate.
+
### Pagination State Across Pages
- [ ] `syncContext` is used to cache state across pages (user names, field maps, instance URLs, portal IDs, etc.)
- [ ] Cached state in `syncContext` is correctly initialized on first page and reused on subsequent pages
diff --git a/.claude/commands/validate-model.md b/.claude/commands/validate-model.md
index 10c6aaa0b27..bf1d30745b6 100644
--- a/.claude/commands/validate-model.md
+++ b/.claude/commands/validate-model.md
@@ -34,7 +34,7 @@ Capture per model: `id`, full `pricing`, full `capabilities`, `contextWindow`, `
## Step 2: Live-fetch authoritative sources
-Use the canonical provider URL table in `add-model.md` (Step 1) as the single source of truth — fetch the models index, pricing, and reasoning/parameter caveats pages listed there for the target provider. If you update one table, update the other in the same change.
+Use the canonical provider URL table in the `add-model` skill (`.claude/commands/add-model.md`, or its mirror `.agents/skills/add-model/SKILL.md`), Step 1, as the single source of truth — fetch the models index, pricing, and reasoning/parameter caveats pages listed there for the target provider. If you update one table, update the other in the same change.
Secondary cross-check (use at least one): OpenRouter, Artificial Analysis, CloudPrice.
@@ -98,6 +98,10 @@ For each model, evaluate every row. Statuses: ✓ matches docs, ✗ disagrees,
- [ ] `recommended: true` — at most one or two per provider; should be current flagship
- [ ] `speedOptimized: true` — only on smallest/fastest tier (nano / flash-lite / haiku class)
+### Hosting / billing
+- [ ] If the model is under `openai`/`anthropic`/`google`, it is automatically in `getHostedModels()` → served with Sim's rotating key and billed via `shouldBillModelUsage()`. Confirm that is the intent (a BYOK-only model parked under one of these providers is a billing bug — warning).
+- [ ] If the model is hosted, the deployment is expected to have its `{PREFIX}_COUNT` / `{PREFIX}_1..N` env vars set (ops concern; note if it looks unset for a model claiming hosted support).
+
## Step 5: Report (mandatory format)
For each model, emit a table with one row per checklist item. Every row that claims ✓ must have a URL.
diff --git a/README.md b/README.md
index 989452870fd..6a8508bcc3c 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
-
The open-source platform to build AI agents and run your agentic workforce. Connect 1,000+ integrations and LLMs to orchestrate agentic workflows.
+
The open-source AI workspace where teams build, deploy, and manage AI agents. Build conversationally, visually, or with code. Connect 1,000+ integrations and every major LLM to automate real work.
@@ -21,25 +21,39 @@
-### Build Workflows with Ease
-Design agent workflows visually on a canvas—connect agents, tools, and blocks, then run them instantly.
+### Build everything in Mothership
+Your AI command center. Describe what you want in plain language. Mothership knows your entire workspace and takes action: building agents, running them, querying data, and more.
-
+
-### Supercharge with Copilot
-Leverage Copilot to generate nodes, fix errors, and iterate on flows directly from natural language.
+### Create files and documents
+Generate documents, reports, and presentations from a single prompt, grounded in your workspace data.
-
+
-### Integrate Vector Databases
-Upload documents to a vector store and let agents answer questions grounded in your specific content.
+### Ground agents in your knowledge
+Upload documents to a knowledge base and let agents answer questions from your own content.
-
+
+
+
+### Structured data with Tables
+A database, built in. Store, query, and wire structured data into agent runs.
+
+
+
+
+
+### Build visually with Workflows
+Prefer a canvas? Design agents block by block in the visual builder, and let Copilot generate blocks, wire variables, and fix errors from natural language.
+
+
+
## Quickstart
@@ -74,7 +88,7 @@ docker compose -f docker-compose.prod.yml up -d
Open [http://localhost:3000](http://localhost:3000)
-Sim also supports local models via [Ollama](https://ollama.ai) and [vLLM](https://docs.vllm.ai/) — see the [Docker self-hosting docs](https://docs.sim.ai/self-hosting/docker) for setup details.
+Sim also supports local models via [Ollama](https://ollama.ai) and [vLLM](https://docs.vllm.ai/). See the [Docker self-hosting docs](https://docs.sim.ai/self-hosting/docker) for setup details.
### Self-hosted: Manual Setup
diff --git a/apps/docs/app/global.css b/apps/docs/app/global.css
index 2bb74df043e..d0645dc8046 100644
--- a/apps/docs/app/global.css
+++ b/apps/docs/app/global.css
@@ -510,6 +510,13 @@ figure[data-rehype-pretty-code-figure],
max-width: 480px !important;
}
+/* Search dialog overlay + panel must cover the sticky navbar — both default to z-50,
+ and the navbar wins the tie by DOM order, leaving it unblurred above the overlay */
+.bg-fd-overlay,
+[role="dialog"][data-state] {
+ z-index: 60 !important;
+}
+
pre {
font-size: 0.875rem;
line-height: 1.7;
diff --git a/apps/docs/components/icons.tsx b/apps/docs/components/icons.tsx
index fcdab73224d..dce91bf9720 100644
--- a/apps/docs/components/icons.tsx
+++ b/apps/docs/components/icons.tsx
@@ -2743,6 +2743,18 @@ export function ClerkIcon(props: SVGProps) {
)
}
+export function ClickHouseIcon(props: SVGProps) {
+ return (
+
+ )
+}
+
export function MicrosoftIcon(props: SVGProps) {
return (
+)
+export const FalIcon = (props: SVGProps) => (
+
)
export function ShieldCheckIcon(props: SVGProps) {
@@ -3982,16 +4005,16 @@ export function FireworksIcon(props: SVGProps) {
return (
)
diff --git a/apps/docs/components/ui/icon-mapping.ts b/apps/docs/components/ui/icon-mapping.ts
index d22e1caf00f..7b4a0e3a336 100644
--- a/apps/docs/components/ui/icon-mapping.ts
+++ b/apps/docs/components/ui/icon-mapping.ts
@@ -31,6 +31,7 @@ import {
CirclebackIcon,
ClayIcon,
ClerkIcon,
+ ClickHouseIcon,
CloudFormationIcon,
CloudflareIcon,
CloudWatchIcon,
@@ -243,6 +244,7 @@ export const blockTypeToIconMap: Record = {
circleback: CirclebackIcon,
clay: ClayIcon,
clerk: ClerkIcon,
+ clickhouse: ClickHouseIcon,
cloudflare: CloudflareIcon,
cloudformation: CloudFormationIcon,
cloudwatch: CloudWatchIcon,
diff --git a/apps/docs/content/docs/en/knowledgebase/connectors.mdx b/apps/docs/content/docs/en/knowledgebase/connectors.mdx
index 88a62383027..2f9de16cfa2 100644
--- a/apps/docs/content/docs/en/knowledgebase/connectors.mdx
+++ b/apps/docs/content/docs/en/knowledgebase/connectors.mdx
@@ -14,21 +14,23 @@ Connectors continuously sync documents from external services into your knowledg
-Sim ships with 30 built-in connectors:
+Sim ships with 49 built-in connectors:
| Category | Connectors |
|----------|-----------|
-| **Productivity** | Notion, Confluence, Asana, Linear, Jira, Google Calendar, Google Sheets |
-| **Cloud Storage** | Google Drive, Dropbox, OneDrive, SharePoint |
-| **Documents** | Google Docs, WordPress, Webflow |
-| **Development** | GitHub |
-| **Communication** | Slack, Discord, Microsoft Teams, Reddit |
+| **Productivity** | Notion, Confluence, Asana, Linear, Jira, Jira Service Management, Monday, Google Calendar, Google Sheets, Google Forms, Typeform |
+| **Cloud Storage** | Google Drive, Dropbox, OneDrive, SharePoint, Amazon S3 |
+| **Documents** | Google Docs, WordPress, Webflow, DocuSign |
+| **Development** | GitHub, GitLab, Azure DevOps, Sentry |
+| **Communication** | Slack, Discord, Microsoft Teams, Reddit, YouTube |
| **Email** | Gmail, Outlook |
| **CRM** | HubSpot, Salesforce |
| **Support** | Intercom, ServiceNow, Zendesk |
+| **Incident Management** | incident.io, Rootly |
| **Data** | Airtable |
| **Note-taking** | Evernote, Obsidian |
-| **Meetings** | Fireflies |
+| **Meetings** | Zoom, Gong, Grain, Granola, Fathom, Fireflies |
+| **Recruiting** | Greenhouse, Ashby |
## Adding a Connector
@@ -41,13 +43,18 @@ From inside a knowledge base, click **+ New connector** in the top right to open
Most connectors use **OAuth** — select an existing credential from the dropdown or click **Connect new account** to authorize through the service. Tokens are refreshed automatically.
-A few connectors use **API keys** instead:
+Other connectors use **API keys** or **personal access tokens** instead. The setup modal tells you which credential each connector expects — for example:
| Connector | Where to get the key |
|-----------|---------------------|
| **Evernote** | Developer Token (starts with `S=`) from your Evernote account settings |
| **Obsidian** | Install the [Local REST API](https://github.com/coddingtonbear/obsidian-local-rest-api) plugin, then copy the key from its settings |
| **Fireflies** | Generate from the Integrations page in your Fireflies account |
+| **Typeform** | Personal access token from your Typeform account settings |
+| **Azure DevOps** | Personal access token with Wiki (Read), Work Items (Read), and Code (Read) scopes |
+| **YouTube** | YouTube Data API key from the Google Cloud Console |
+| **Amazon S3** | Secret Access Key (the Access Key ID, region, and bucket are entered as config fields) |
+| **Sentry** | Auth token with `project:read` and `event:read` scopes |
If you rotate an API key in the external service, update it in Sim as well — OAuth tokens refresh automatically, but API keys do not.
@@ -63,6 +70,10 @@ Each connector has source-specific fields that control what gets synced. Example
- **Notion** — sync an entire workspace, a specific database, or a single page tree
- **GitHub** — specify a repository, branch, and optional file extension filter
- **Confluence** — enter your Atlassian domain and optionally filter by space key or content type
+- **Azure DevOps** — choose what to sync (wiki pages, work items, repository files, or all), with optional work item type/state filters, a custom WIQL query, and repository/branch/path filters
+- **Amazon S3** — point at a bucket with an optional key prefix and a customizable file extension allowlist; S3-compatible stores (Cloudflare R2, MinIO) are supported via a custom endpoint
+- **YouTube** — sync a channel (by `@handle` or ID) or playlist, with an optional published-after date filter and the option to exclude Shorts
+- **Sentry** — filter issues by search query (e.g. `is:unresolved`), environment, and time window; self-hosted Sentry is supported via a custom host
- **Obsidian** — provide your vault URL (`https://127.0.0.1:27124` by default) and optionally restrict to a folder path
- **Fireflies** — optionally filter by host email or cap the number of transcripts synced
@@ -188,5 +199,5 @@ You can add as many connectors as you need to a single knowledge base. Each mana
{ question: "What happens when I delete a connector?", answer: "The connector is removed and future syncs stop. You're given the option to also delete all documents that were synced by that connector. If you don't check that option, they stay in the knowledge base as-is." },
{ question: "What does the Disabled status mean?", answer: "After 10 consecutive full-sync failures, the connector is automatically disabled to stop retrying. Reconnect the OAuth account or click Resume to re-enable it." },
{ question: "Do metadata tags count against a limit?", answer: "Yes. Tag slots are shared across all documents in a knowledge base — 17 slots total. Multiple connectors draw from the same pool, so plan accordingly if several connectors each auto-populate tags." },
- { question: "Do I need to re-authenticate connectors?", answer: "OAuth connectors refresh tokens automatically. API key connectors (Evernote, Obsidian, Fireflies) need manual updates if you rotate the key in the external service." },
+ { question: "Do I need to re-authenticate connectors?", answer: "OAuth connectors refresh tokens automatically. API key and personal access token connectors need manual updates if you rotate the credential in the external service." },
]} />
diff --git a/apps/docs/content/docs/en/mothership/knowledge.mdx b/apps/docs/content/docs/en/mothership/knowledge.mdx
index ab17e6e6a78..008c050b5c2 100644
--- a/apps/docs/content/docs/en/mothership/knowledge.mdx
+++ b/apps/docs/content/docs/en/mothership/knowledge.mdx
@@ -49,7 +49,7 @@ For knowledge bases that should stay current automatically, connectors sync cont
Connectors are configured through the knowledge base settings, not through Mothership chat. Once connected, all synced content is immediately searchable by Mothership and by any Agent block with the knowledge base attached.
-Sim ships with 30 built-in connectors, including Notion, Google Drive, Slack, GitHub, Confluence, HubSpot, Salesforce, Gmail, and more.
+Sim ships with 49 built-in connectors, including Notion, Google Drive, Slack, GitHub, Confluence, HubSpot, Salesforce, Gmail, and more.
Examples of what you can sync:
diff --git a/apps/docs/content/docs/en/tools/clickhouse.mdx b/apps/docs/content/docs/en/tools/clickhouse.mdx
new file mode 100644
index 00000000000..f3c9837525b
--- /dev/null
+++ b/apps/docs/content/docs/en/tools/clickhouse.mdx
@@ -0,0 +1,559 @@
+---
+title: ClickHouse
+description: Connect to a ClickHouse database
+---
+
+import { BlockInfoCard } from "@/components/ui/block-info-card"
+
+
+
+{/* MANUAL-CONTENT-START:intro */}
+[ClickHouse](https://clickhouse.com) is an open-source, column-oriented database management system for online analytical processing (OLAP). It is built for speed at scale — running aggregations and analytical queries over billions of rows in real time.
+
+The ClickHouse block connects to any ClickHouse deployment (ClickHouse Cloud or self-hosted) over the [HTTP interface](https://clickhouse.com/docs/interfaces/http). Use it to run analytical queries, stream rows into tables, manage schemas, inspect system state, and execute arbitrary SQL — all from within a workflow.
+
+**Connection details**
+
+- **Host** — your ClickHouse hostname (e.g. `your-instance.clickhouse.cloud` or your server address).
+- **Port** — the HTTP interface port. Use `8443` for HTTPS (ClickHouse Cloud) or `8123` for plain HTTP (self-hosted).
+- **Database** / **Username** — default to `default` if not specified.
+- **Password** — optional for unauthenticated local instances.
+- **Use HTTPS** — keep enabled for any remote or Cloud instance.
+
+**Things to know**
+
+- `UPDATE` and `DELETE` are implemented as ClickHouse [mutations](https://clickhouse.com/docs/sql-reference/statements/alter/update) (`ALTER TABLE ... UPDATE/DELETE`). Mutations run **asynchronously** in the background, so the affected row count is not returned immediately.
+- ClickHouse is optimized for bulk inserts. Prefer batching many rows per insert over many single-row inserts.
+- The connection host is validated to block private/internal addresses, so the block cannot reach `localhost` or internal-only hosts.
+{/* MANUAL-CONTENT-END */}
+
+
+## Usage Instructions
+
+Integrate ClickHouse into the workflow. Query and insert data, manage databases and tables, inspect schemas, monitor mutations and running queries, manage partitions, and execute raw SQL over the ClickHouse HTTP interface.
+
+
+
+## Tools
+
+### `clickhouse_query`
+
+Execute a SELECT query on a ClickHouse database
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+| `host` | string | Yes | ClickHouse server hostname \(e.g., your-instance.clickhouse.cloud\) |
+| `port` | number | Yes | ClickHouse HTTP interface port \(8443 for HTTPS, 8123 for HTTP\) |
+| `database` | string | Yes | Database name to connect to |
+| `username` | string | Yes | ClickHouse username |
+| `password` | string | No | ClickHouse password |
+| `secure` | boolean | No | Use a secure HTTPS connection \(default: true\) |
+| `query` | string | Yes | SQL SELECT query to execute |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Operation status message |
+| `rows` | array | Array of rows returned from the query |
+| `rowCount` | number | Number of rows returned |
+
+### `clickhouse_execute`
+
+Execute raw SQL (DDL, mutations, or queries) on a ClickHouse database
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+| `host` | string | Yes | ClickHouse server hostname \(e.g., your-instance.clickhouse.cloud\) |
+| `port` | number | Yes | ClickHouse HTTP interface port \(8443 for HTTPS, 8123 for HTTP\) |
+| `database` | string | Yes | Database name to connect to |
+| `username` | string | Yes | ClickHouse username |
+| `password` | string | No | ClickHouse password |
+| `secure` | boolean | No | Use a secure HTTPS connection \(default: true\) |
+| `query` | string | Yes | Raw SQL statement to execute |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Operation status message |
+| `rows` | array | Array of rows returned from the statement |
+| `rowCount` | number | Number of rows returned or affected |
+
+### `clickhouse_insert`
+
+Insert a row into a ClickHouse table
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+| `host` | string | Yes | ClickHouse server hostname \(e.g., your-instance.clickhouse.cloud\) |
+| `port` | number | Yes | ClickHouse HTTP interface port \(8443 for HTTPS, 8123 for HTTP\) |
+| `database` | string | Yes | Database name to connect to |
+| `username` | string | Yes | ClickHouse username |
+| `password` | string | No | ClickHouse password |
+| `secure` | boolean | No | Use a secure HTTPS connection \(default: true\) |
+| `table` | string | Yes | Table name to insert data into |
+| `data` | object | Yes | Data object to insert \(key-value pairs mapping column names to values\) |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Operation status message |
+| `rows` | array | Inserted rows \(empty for ClickHouse inserts\) |
+| `rowCount` | number | Number of rows inserted |
+
+### `clickhouse_insert_rows`
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message describing the operation outcome |
+| `rows` | array | Array of rows returned from the operation |
+| `rowCount` | number | Number of rows returned or affected by the operation |
+| `count` | number | Row count \(count rows operation\) |
+| `ddl` | string | CREATE TABLE statement \(show create table operation\) |
+| `tables` | array | Array of table schemas with columns and engines \(introspect operation\) |
+
+### `clickhouse_update`
+
+Update rows in a ClickHouse table via an ALTER TABLE ... UPDATE mutation
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+| `host` | string | Yes | ClickHouse server hostname \(e.g., your-instance.clickhouse.cloud\) |
+| `port` | number | Yes | ClickHouse HTTP interface port \(8443 for HTTPS, 8123 for HTTP\) |
+| `database` | string | Yes | Database name to connect to |
+| `username` | string | Yes | ClickHouse username |
+| `password` | string | No | ClickHouse password |
+| `secure` | boolean | No | Use a secure HTTPS connection \(default: true\) |
+| `table` | string | Yes | Table name to update data in |
+| `data` | object | Yes | Data object with fields to update \(key-value pairs\) |
+| `where` | string | Yes | WHERE clause condition \(without the WHERE keyword\) |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Operation status message |
+| `rows` | array | Updated rows \(empty for ClickHouse mutations\) |
+| `rowCount` | number | Number of rows written by the mutation |
+
+### `clickhouse_delete`
+
+Delete rows from a ClickHouse table via an ALTER TABLE ... DELETE mutation
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+| `host` | string | Yes | ClickHouse server hostname \(e.g., your-instance.clickhouse.cloud\) |
+| `port` | number | Yes | ClickHouse HTTP interface port \(8443 for HTTPS, 8123 for HTTP\) |
+| `database` | string | Yes | Database name to connect to |
+| `username` | string | Yes | ClickHouse username |
+| `password` | string | No | ClickHouse password |
+| `secure` | boolean | No | Use a secure HTTPS connection \(default: true\) |
+| `table` | string | Yes | Table name to delete data from |
+| `where` | string | Yes | WHERE clause condition \(without the WHERE keyword\) |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Operation status message |
+| `rows` | array | Deleted rows \(empty for ClickHouse mutations\) |
+| `rowCount` | number | Number of rows affected by the mutation |
+
+### `clickhouse_list_databases`
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message describing the operation outcome |
+| `rows` | array | Array of rows returned from the operation |
+| `rowCount` | number | Number of rows returned or affected by the operation |
+| `count` | number | Row count \(count rows operation\) |
+| `ddl` | string | CREATE TABLE statement \(show create table operation\) |
+| `tables` | array | Array of table schemas with columns and engines \(introspect operation\) |
+
+### `clickhouse_list_tables`
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message describing the operation outcome |
+| `rows` | array | Array of rows returned from the operation |
+| `rowCount` | number | Number of rows returned or affected by the operation |
+| `count` | number | Row count \(count rows operation\) |
+| `ddl` | string | CREATE TABLE statement \(show create table operation\) |
+| `tables` | array | Array of table schemas with columns and engines \(introspect operation\) |
+
+### `clickhouse_describe_table`
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message describing the operation outcome |
+| `rows` | array | Array of rows returned from the operation |
+| `rowCount` | number | Number of rows returned or affected by the operation |
+| `count` | number | Row count \(count rows operation\) |
+| `ddl` | string | CREATE TABLE statement \(show create table operation\) |
+| `tables` | array | Array of table schemas with columns and engines \(introspect operation\) |
+
+### `clickhouse_show_create_table`
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message describing the operation outcome |
+| `rows` | array | Array of rows returned from the operation |
+| `rowCount` | number | Number of rows returned or affected by the operation |
+| `count` | number | Row count \(count rows operation\) |
+| `ddl` | string | CREATE TABLE statement \(show create table operation\) |
+| `tables` | array | Array of table schemas with columns and engines \(introspect operation\) |
+
+### `clickhouse_count_rows`
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message describing the operation outcome |
+| `rows` | array | Array of rows returned from the operation |
+| `rowCount` | number | Number of rows returned or affected by the operation |
+| `count` | number | Row count \(count rows operation\) |
+| `ddl` | string | CREATE TABLE statement \(show create table operation\) |
+| `tables` | array | Array of table schemas with columns and engines \(introspect operation\) |
+
+### `clickhouse_introspect`
+
+Introspect a ClickHouse database to retrieve table structures, columns, and engines
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+| `host` | string | Yes | ClickHouse server hostname \(e.g., your-instance.clickhouse.cloud\) |
+| `port` | number | Yes | ClickHouse HTTP interface port \(8443 for HTTPS, 8123 for HTTP\) |
+| `database` | string | Yes | Database name to introspect |
+| `username` | string | Yes | ClickHouse username |
+| `password` | string | No | ClickHouse password |
+| `secure` | boolean | No | Use a secure HTTPS connection \(default: true\) |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Operation status message |
+| `tables` | array | Array of table schemas with columns and engines |
+| ↳ `name` | string | Table name |
+| ↳ `database` | string | Database the table belongs to |
+| ↳ `engine` | string | Table engine \(e.g., MergeTree, Log\) |
+| ↳ `totalRows` | number | Approximate total number of rows in the table |
+| ↳ `columns` | array | Table columns |
+| ↳ `name` | string | Column name |
+| ↳ `type` | string | ClickHouse data type \(e.g., UInt32, String, DateTime\) |
+| ↳ `defaultKind` | string | Kind of default expression \(DEFAULT, MATERIALIZED, ALIAS\) |
+| ↳ `defaultExpression` | string | Default value expression for the column |
+| ↳ `isInPrimaryKey` | boolean | Whether the column is part of the primary key |
+| ↳ `isInSortingKey` | boolean | Whether the column is part of the sorting key |
+
+### `clickhouse_create_database`
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message describing the operation outcome |
+| `rows` | array | Array of rows returned from the operation |
+| `rowCount` | number | Number of rows returned or affected by the operation |
+| `count` | number | Row count \(count rows operation\) |
+| `ddl` | string | CREATE TABLE statement \(show create table operation\) |
+| `tables` | array | Array of table schemas with columns and engines \(introspect operation\) |
+
+### `clickhouse_drop_database`
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message describing the operation outcome |
+| `rows` | array | Array of rows returned from the operation |
+| `rowCount` | number | Number of rows returned or affected by the operation |
+| `count` | number | Row count \(count rows operation\) |
+| `ddl` | string | CREATE TABLE statement \(show create table operation\) |
+| `tables` | array | Array of table schemas with columns and engines \(introspect operation\) |
+
+### `clickhouse_create_table`
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message describing the operation outcome |
+| `rows` | array | Array of rows returned from the operation |
+| `rowCount` | number | Number of rows returned or affected by the operation |
+| `count` | number | Row count \(count rows operation\) |
+| `ddl` | string | CREATE TABLE statement \(show create table operation\) |
+| `tables` | array | Array of table schemas with columns and engines \(introspect operation\) |
+
+### `clickhouse_drop_table`
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message describing the operation outcome |
+| `rows` | array | Array of rows returned from the operation |
+| `rowCount` | number | Number of rows returned or affected by the operation |
+| `count` | number | Row count \(count rows operation\) |
+| `ddl` | string | CREATE TABLE statement \(show create table operation\) |
+| `tables` | array | Array of table schemas with columns and engines \(introspect operation\) |
+
+### `clickhouse_truncate_table`
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message describing the operation outcome |
+| `rows` | array | Array of rows returned from the operation |
+| `rowCount` | number | Number of rows returned or affected by the operation |
+| `count` | number | Row count \(count rows operation\) |
+| `ddl` | string | CREATE TABLE statement \(show create table operation\) |
+| `tables` | array | Array of table schemas with columns and engines \(introspect operation\) |
+
+### `clickhouse_rename_table`
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message describing the operation outcome |
+| `rows` | array | Array of rows returned from the operation |
+| `rowCount` | number | Number of rows returned or affected by the operation |
+| `count` | number | Row count \(count rows operation\) |
+| `ddl` | string | CREATE TABLE statement \(show create table operation\) |
+| `tables` | array | Array of table schemas with columns and engines \(introspect operation\) |
+
+### `clickhouse_optimize_table`
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message describing the operation outcome |
+| `rows` | array | Array of rows returned from the operation |
+| `rowCount` | number | Number of rows returned or affected by the operation |
+| `count` | number | Row count \(count rows operation\) |
+| `ddl` | string | CREATE TABLE statement \(show create table operation\) |
+| `tables` | array | Array of table schemas with columns and engines \(introspect operation\) |
+
+### `clickhouse_list_partitions`
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message describing the operation outcome |
+| `rows` | array | Array of rows returned from the operation |
+| `rowCount` | number | Number of rows returned or affected by the operation |
+| `count` | number | Row count \(count rows operation\) |
+| `ddl` | string | CREATE TABLE statement \(show create table operation\) |
+| `tables` | array | Array of table schemas with columns and engines \(introspect operation\) |
+
+### `clickhouse_drop_partition`
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message describing the operation outcome |
+| `rows` | array | Array of rows returned from the operation |
+| `rowCount` | number | Number of rows returned or affected by the operation |
+| `count` | number | Row count \(count rows operation\) |
+| `ddl` | string | CREATE TABLE statement \(show create table operation\) |
+| `tables` | array | Array of table schemas with columns and engines \(introspect operation\) |
+
+### `clickhouse_list_mutations`
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message describing the operation outcome |
+| `rows` | array | Array of rows returned from the operation |
+| `rowCount` | number | Number of rows returned or affected by the operation |
+| `count` | number | Row count \(count rows operation\) |
+| `ddl` | string | CREATE TABLE statement \(show create table operation\) |
+| `tables` | array | Array of table schemas with columns and engines \(introspect operation\) |
+
+### `clickhouse_list_running_queries`
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message describing the operation outcome |
+| `rows` | array | Array of rows returned from the operation |
+| `rowCount` | number | Number of rows returned or affected by the operation |
+| `count` | number | Row count \(count rows operation\) |
+| `ddl` | string | CREATE TABLE statement \(show create table operation\) |
+| `tables` | array | Array of table schemas with columns and engines \(introspect operation\) |
+
+### `clickhouse_kill_query`
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message describing the operation outcome |
+| `rows` | array | Array of rows returned from the operation |
+| `rowCount` | number | Number of rows returned or affected by the operation |
+| `count` | number | Row count \(count rows operation\) |
+| `ddl` | string | CREATE TABLE statement \(show create table operation\) |
+| `tables` | array | Array of table schemas with columns and engines \(introspect operation\) |
+
+### `clickhouse_table_stats`
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message describing the operation outcome |
+| `rows` | array | Array of rows returned from the operation |
+| `rowCount` | number | Number of rows returned or affected by the operation |
+| `count` | number | Row count \(count rows operation\) |
+| `ddl` | string | CREATE TABLE statement \(show create table operation\) |
+| `tables` | array | Array of table schemas with columns and engines \(introspect operation\) |
+
+### `clickhouse_list_clusters`
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message describing the operation outcome |
+| `rows` | array | Array of rows returned from the operation |
+| `rowCount` | number | Number of rows returned or affected by the operation |
+| `count` | number | Row count \(count rows operation\) |
+| `ddl` | string | CREATE TABLE statement \(show create table operation\) |
+| `tables` | array | Array of table schemas with columns and engines \(introspect operation\) |
+
+
diff --git a/apps/docs/content/docs/en/tools/dagster.mdx b/apps/docs/content/docs/en/tools/dagster.mdx
index b82c1a7f4ab..ef78e695e6e 100644
--- a/apps/docs/content/docs/en/tools/dagster.mdx
+++ b/apps/docs/content/docs/en/tools/dagster.mdx
@@ -73,8 +73,15 @@ Get the status and details of a Dagster run by its ID.
| `runId` | string | Run ID |
| `jobName` | string | Name of the job this run belongs to |
| `status` | string | Run status \(QUEUED, NOT_STARTED, STARTING, MANAGED, STARTED, SUCCESS, FAILURE, CANCELING, CANCELED\) |
+| `mode` | string | Execution mode of the run |
| `startTime` | number | Run start time as Unix timestamp |
| `endTime` | number | Run end time as Unix timestamp |
+| `creationTime` | number | Time the run was created as Unix timestamp |
+| `updateTime` | number | Time the run was last updated as Unix timestamp |
+| `parentRunId` | string | ID of the immediate parent run \(for re-executions\) |
+| `rootRunId` | string | ID of the root run in the re-execution group |
+| `canTerminate` | boolean | Whether the run can currently be terminated |
+| `assetSelection` | json | Asset keys targeted by the run, as slash-joined strings |
| `runConfigYaml` | string | Run configuration as YAML |
| `tags` | json | Run tags as array of \{key, value\} objects |
@@ -108,7 +115,7 @@ Fetch execution event logs for a Dagster run.
### `dagster_list_runs`
-List recent Dagster runs, optionally filtered by job name.
+List Dagster runs with optional filters by job name, status, and creation-time range, plus cursor pagination.
#### Input
@@ -118,6 +125,9 @@ List recent Dagster runs, optionally filtered by job name.
| `apiKey` | string | No | Dagster+ API token \(leave blank for OSS / self-hosted\) |
| `jobName` | string | No | Filter runs by job name \(optional\) |
| `statuses` | string | No | Comma-separated run statuses to filter by, e.g. "SUCCESS,FAILURE" \(optional\) |
+| `createdAfter` | number | No | Only return runs created at or after this Unix timestamp in seconds \(optional\) |
+| `createdBefore` | number | No | Only return runs created at or before this Unix timestamp in seconds \(optional\) |
+| `cursor` | string | No | Run ID to page after, from a previous response cursor \(optional\) |
| `limit` | number | No | Maximum number of runs to return \(default 20\) |
#### Output
@@ -131,6 +141,8 @@ List recent Dagster runs, optionally filtered by job name.
| ↳ `tags` | json | Run tags as array of \{key, value\} objects |
| ↳ `startTime` | number | Start time as Unix timestamp |
| ↳ `endTime` | number | End time as Unix timestamp |
+| `cursor` | string | Run ID of the last returned run — pass as cursor to fetch the next page |
+| `hasMore` | boolean | Whether more runs are likely available beyond this page |
### `dagster_list_jobs`
@@ -295,7 +307,7 @@ List all sensors in a Dagster repository, optionally filtered by status.
| --------- | ---- | ----------- |
| `sensors` | json | Array of sensors \(name, sensorType, status, id, description\) |
| ↳ `name` | string | Sensor name |
-| ↳ `sensorType` | string | Sensor type \(ASSET, AUTO_MATERIALIZE, FRESHNESS_POLICY, MULTI_ASSET, RUN_STATUS, STANDARD\) |
+| ↳ `sensorType` | string | Sensor type \(ASSET, AUTO_MATERIALIZE, FRESHNESS_POLICY, MULTI_ASSET, RUN_STATUS, STANDARD, UNKNOWN\) |
| ↳ `status` | string | Sensor status: RUNNING or STOPPED |
| ↳ `id` | string | Instigator state ID — use this to start or stop the sensor |
| ↳ `description` | string | Human-readable sensor description |
@@ -340,4 +352,120 @@ Disable (stop) a running sensor in Dagster.
| `id` | string | Instigator state ID of the sensor |
| `status` | string | Updated sensor status \(RUNNING or STOPPED\) |
+### `dagster_list_assets`
+
+List assets tracked by a Dagster instance, optionally filtered by key prefix.
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+| `host` | string | Yes | Dagster host URL \(e.g., https://myorg.dagster.cloud/prod or http://localhost:3001\) |
+| `apiKey` | string | No | Dagster+ API token \(leave blank for OSS / self-hosted\) |
+| `prefix` | string | No | Slash-delimited asset key prefix to filter by, e.g. "raw" or "raw/events" \(optional\) |
+| `cursor` | string | No | Asset key cursor from a previous response, for pagination \(optional\) |
+| `limit` | number | No | Maximum number of assets to return \(optional\) |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `assets` | json | Array of assets \(assetKey, path\) |
+| ↳ `assetKey` | string | Slash-joined asset key |
+| ↳ `path` | json | Asset key path segments |
+| `cursor` | string | Cursor to pass on the next call to fetch more assets |
+| `hasMore` | boolean | Whether more assets are likely available beyond this page |
+
+### `dagster_get_asset`
+
+Get an asset definition and its latest materialization by asset key.
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+| `host` | string | Yes | Dagster host URL \(e.g., https://myorg.dagster.cloud/prod or http://localhost:3001\) |
+| `apiKey` | string | No | Dagster+ API token \(leave blank for OSS / self-hosted\) |
+| `assetKey` | string | Yes | Slash-delimited asset key, e.g. "my_asset" or "raw/events" |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `assetKey` | string | Slash-joined asset key |
+| `path` | json | Asset key path segments |
+| `groupName` | string | Asset group the definition belongs to |
+| `description` | string | Asset description |
+| `jobNames` | json | Names of jobs that can materialize this asset |
+| `computeKind` | string | Compute kind tag \(e.g., python, dbt, spark\) |
+| `isPartitioned` | boolean | Whether the asset is partitioned |
+| `latestMaterialization` | json | Most recent materialization \(runId, timestamp, partition, stepKey\) |
+| ↳ `runId` | string | Run that produced the materialization |
+| ↳ `timestamp` | string | Materialization timestamp \(epoch ms string\) |
+| ↳ `partition` | string | Partition key, if partitioned |
+| ↳ `stepKey` | string | Step key that emitted it |
+
+### `dagster_materialize_assets`
+
+Materialize selected assets by launching their asset job with an asset selection.
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+| `host` | string | Yes | Dagster host URL \(e.g., https://myorg.dagster.cloud/prod or http://localhost:3001\) |
+| `apiKey` | string | No | Dagster+ API token \(leave blank for OSS / self-hosted\) |
+| `repositoryLocationName` | string | Yes | Repository location \(code location\) name |
+| `repositoryName` | string | Yes | Repository name within the code location |
+| `jobName` | string | Yes | Asset job that contains the assets, e.g. "__ASSET_JOB" or a named asset job |
+| `assetSelection` | string | Yes | Comma- or newline-separated asset keys to materialize, each slash-delimited \(e.g. "raw/events, summary"\) |
+| `tags` | string | No | Tags as a JSON array of \{key, value\} objects \(optional\) |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `runId` | string | The globally unique ID of the launched materialization run |
+
+### `dagster_report_asset_materialization`
+
+Report an external (runless) materialization or observation for an asset.
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+| `host` | string | Yes | Dagster host URL \(e.g., https://myorg.dagster.cloud/prod or http://localhost:3001\) |
+| `apiKey` | string | No | Dagster+ API token \(leave blank for OSS / self-hosted\) |
+| `assetKey` | string | Yes | Slash-delimited asset key to report against, e.g. "my_asset" or "raw/events" |
+| `eventType` | string | No | Event type to report: ASSET_MATERIALIZATION \(default\) or ASSET_OBSERVATION |
+| `partitionKeys` | string | No | Comma-separated partition keys to report against \(optional\) |
+| `description` | string | No | Human-readable description for the reported event \(optional\) |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `success` | boolean | Whether the event was reported successfully |
+| `assetKey` | string | Slash-joined asset key the event was reported against |
+
+### `dagster_wipe_asset`
+
+DESTRUCTIVE: permanently wipes ALL materialization history (every partition) for an asset. This cannot be undone.
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+| `host` | string | Yes | Dagster host URL \(e.g., https://myorg.dagster.cloud/prod or http://localhost:3001\) |
+| `apiKey` | string | No | Dagster+ API token \(leave blank for OSS / self-hosted\) |
+| `assetKey` | string | Yes | Slash-delimited asset key to wipe, e.g. "my_asset" or "raw/events" |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `success` | boolean | Whether the asset was wiped successfully |
+| `assetKey` | string | Slash-joined asset key that was wiped |
+
diff --git a/apps/docs/content/docs/en/tools/meta.json b/apps/docs/content/docs/en/tools/meta.json
index a17c92d28e7..84dac39482d 100644
--- a/apps/docs/content/docs/en/tools/meta.json
+++ b/apps/docs/content/docs/en/tools/meta.json
@@ -27,6 +27,7 @@
"circleback",
"clay",
"clerk",
+ "clickhouse",
"cloudflare",
"cloudformation",
"cloudwatch",
diff --git a/apps/docs/content/docs/en/tools/tinybird.mdx b/apps/docs/content/docs/en/tools/tinybird.mdx
index 0c3d74a9341..f67ac0c2ff0 100644
--- a/apps/docs/content/docs/en/tools/tinybird.mdx
+++ b/apps/docs/content/docs/en/tools/tinybird.mdx
@@ -1,6 +1,6 @@
---
title: Tinybird
-description: Send events and query data with Tinybird
+description: Send events, query data, and manage Data Sources with Tinybird
---
import { BlockInfoCard } from "@/components/ui/block-info-card"
@@ -30,7 +30,7 @@ Connect Tinybird to your workflows today to accelerate data-driven features, aut
## Usage Instructions
-Interact with Tinybird using the Events API to stream JSON or NDJSON events, or use the Query API to execute SQL queries against Pipes and Data Sources.
+Interact with Tinybird: stream JSON or NDJSON events with the Events API, run SQL with the Query API, call published Pipe API Endpoints by name with dynamic parameters, and manage Data Sources by appending from a URL, truncating, or deleting rows by condition.
@@ -77,7 +77,110 @@ Execute SQL queries against Tinybird Pipes and Data Sources using the Query API.
| Parameter | Type | Description |
| --------- | ---- | ----------- |
| `data` | json | Query result data. For FORMAT JSON: array of objects. For other formats \(CSV, TSV, etc.\): raw text string. |
+| `meta` | array | Column metadata for the result set \(only available with FORMAT JSON\) |
+| ↳ `name` | string | Column name |
+| ↳ `type` | string | Column data type |
| `rows` | number | Number of rows returned \(only available with FORMAT JSON\) |
+| `rows_before_limit_at_least` | number | Minimum number of rows there would be without a LIMIT clause \(only available with FORMAT JSON\) |
| `statistics` | json | Query execution statistics - elapsed time, rows read, bytes read \(only available with FORMAT JSON\) |
+### `tinybird_query_pipe`
+
+Call a published Tinybird Pipe API Endpoint by name, passing dynamic parameters and receiving structured JSON results.
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+| `base_url` | string | Yes | Tinybird API base URL \(e.g., https://api.tinybird.co\) |
+| `pipe` | string | Yes | Name of the published Pipe API Endpoint to call. Example: "top_pages" |
+| `parameters` | json | No | Dynamic Pipe parameters as a JSON object, sent as query-string arguments. Example: \{"start_date": "2024-01-01", "limit": 10\} |
+| `q` | string | No | Optional SQL to run on top of the Pipe result. Use "_" to reference the Pipe. Example: "SELECT count\(\) FROM _" |
+| `token` | string | Yes | Tinybird API Token with PIPE:READ scope |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `data` | json | Pipe result data as an array of row objects |
+| `meta` | array | Column metadata for the result set |
+| ↳ `name` | string | Column name |
+| ↳ `type` | string | Column data type |
+| `rows` | number | Number of rows returned |
+| `rows_before_limit_at_least` | number | Minimum number of rows there would be without a LIMIT clause |
+| `statistics` | json | Query execution statistics - elapsed time, rows read, bytes read |
+| ↳ `elapsed` | number | Query execution time in seconds |
+| ↳ `rows_read` | number | Number of rows processed |
+| ↳ `bytes_read` | number | Number of bytes processed |
+
+### `tinybird_append_datasource`
+
+Append data to a Tinybird Data Source from a remote file URL (CSV, NDJSON, Parquet).
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+| `base_url` | string | Yes | Tinybird API base URL \(e.g., https://api.tinybird.co\) |
+| `datasource` | string | Yes | Name of the existing Data Source to append to. Example: "events_raw" |
+| `url` | string | Yes | Publicly accessible URL of the file to append. Example: "https://example.com/data.csv" |
+| `format` | string | No | Format of the source file: "csv" \(default\), "ndjson", or "parquet" |
+| `token` | string | Yes | Tinybird API Token with DATASOURCES:CREATE scope |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `id` | string | Identifier of the append operation |
+| `import_id` | string | Import identifier for the append job |
+| `job_id` | string | Job identifier used to poll import status |
+| `job_url` | string | URL to query the import job status |
+| `status` | string | Initial job status \(e.g., "waiting"\) |
+| `job` | json | Full import job details \(kind, id, status, created_at, datasource, ...\) |
+| `datasource` | json | Target Data Source metadata \(id, name, ...\) |
+
+### `tinybird_truncate_datasource`
+
+Delete all rows from a Tinybird Data Source.
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+| `base_url` | string | Yes | Tinybird API base URL \(e.g., https://api.tinybird.co\) |
+| `datasource` | string | Yes | Name of the Data Source to truncate. Example: "events_raw" |
+| `token` | string | Yes | Tinybird API Token with DATASOURCES:CREATE scope |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `truncated` | boolean | Whether the Data Source was truncated successfully |
+| `result` | json | Raw response body from the truncate endpoint, if any |
+
+### `tinybird_delete_datasource_rows`
+
+Delete rows from a Tinybird Data Source matching a SQL condition.
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+| `base_url` | string | Yes | Tinybird API base URL \(e.g., https://api.tinybird.co\) |
+| `datasource` | string | Yes | Name of the Data Source to delete rows from. Example: "events_raw" |
+| `delete_condition` | string | Yes | SQL WHERE-clause condition selecting the rows to delete. Example: "country = \'ES\'" or "event_date < \'2024-01-01\'" |
+| `dry_run` | boolean | No | When true, returns how many rows would be deleted without deleting them. Defaults to false. |
+| `token` | string | Yes | Tinybird API Token with DATASOURCES:CREATE scope |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `id` | string | Identifier of the delete operation |
+| `job_id` | string | Job identifier used to poll delete status |
+| `delete_id` | string | Deletion identifier |
+| `job_url` | string | URL to query the delete job status |
+| `status` | string | Current job status \(e.g., "waiting", "done"\) |
+| `job` | json | Full delete job details \(kind, id, status, delete_condition, rows_affected, ...\) |
+
diff --git a/apps/docs/content/docs/en/triggers/table.mdx b/apps/docs/content/docs/en/triggers/table.mdx
index 1a4a7139987..beb0826cabb 100644
--- a/apps/docs/content/docs/en/triggers/table.mdx
+++ b/apps/docs/content/docs/en/triggers/table.mdx
@@ -38,7 +38,6 @@ Triggers when rows are inserted or updated in a table
| `changedColumns` | json | List of column names that changed \(empty for inserts\) |
| `rowId` | string | The unique row ID |
| `headers` | json | Column names from the table schema |
-| `rowNumber` | number | The position of the row in the table |
| `tableId` | string | The table ID |
| `tableName` | string | The table name |
| `timestamp` | string | Event timestamp in ISO format |
diff --git a/apps/sim/app/(landing)/integrations/data/icon-mapping.ts b/apps/sim/app/(landing)/integrations/data/icon-mapping.ts
index ef0582c6f41..5f5ca00b33e 100644
--- a/apps/sim/app/(landing)/integrations/data/icon-mapping.ts
+++ b/apps/sim/app/(landing)/integrations/data/icon-mapping.ts
@@ -31,6 +31,7 @@ import {
CirclebackIcon,
ClayIcon,
ClerkIcon,
+ ClickHouseIcon,
CloudFormationIcon,
CloudflareIcon,
CloudWatchIcon,
@@ -242,6 +243,7 @@ export const blockTypeToIconMap: Record = {
circleback: CirclebackIcon,
clay: ClayIcon,
clerk: ClerkIcon,
+ clickhouse: ClickHouseIcon,
cloudflare: CloudflareIcon,
cloudformation: CloudFormationIcon,
cloudwatch: CloudWatchIcon,
diff --git a/apps/sim/app/(landing)/integrations/data/integrations.json b/apps/sim/app/(landing)/integrations/data/integrations.json
index f371f03ebe5..65df1c17364 100644
--- a/apps/sim/app/(landing)/integrations/data/integrations.json
+++ b/apps/sim/app/(landing)/integrations/data/integrations.json
@@ -2492,6 +2492,129 @@
"integrationTypes": ["security", "developer-tools"],
"tags": ["identity", "automation"]
},
+ {
+ "type": "clickhouse",
+ "slug": "clickhouse",
+ "name": "ClickHouse",
+ "description": "Connect to a ClickHouse database",
+ "longDescription": "Integrate ClickHouse into the workflow. Query and insert data, manage databases and tables, inspect schemas, monitor mutations and running queries, manage partitions, and execute raw SQL over the ClickHouse HTTP interface.",
+ "bgColor": "#f9ff69",
+ "iconName": "ClickHouseIcon",
+ "docsUrl": "https://docs.sim.ai/tools/clickhouse",
+ "operations": [
+ {
+ "name": "Query (SELECT)",
+ "description": "Execute a SELECT query on a ClickHouse database"
+ },
+ {
+ "name": "Execute Raw SQL",
+ "description": "Execute raw SQL (DDL, mutations, or queries) on a ClickHouse database"
+ },
+ {
+ "name": "Insert Row",
+ "description": "Insert a row into a ClickHouse table"
+ },
+ {
+ "name": "Insert Rows (Bulk)",
+ "description": "Insert multiple rows into a ClickHouse table"
+ },
+ {
+ "name": "Update Data",
+ "description": "Update rows in a ClickHouse table via an ALTER TABLE ... UPDATE mutation"
+ },
+ {
+ "name": "Delete Data",
+ "description": "Delete rows from a ClickHouse table via an ALTER TABLE ... DELETE mutation"
+ },
+ {
+ "name": "List Databases",
+ "description": "List all databases on a ClickHouse server"
+ },
+ {
+ "name": "List Tables",
+ "description": "List tables in the connected ClickHouse database"
+ },
+ {
+ "name": "Describe Table",
+ "description": "Describe the columns of a ClickHouse table"
+ },
+ {
+ "name": "Show Create Table",
+ "description": "Get the CREATE TABLE statement (DDL) for a ClickHouse table"
+ },
+ {
+ "name": "Count Rows",
+ "description": "Count rows in a ClickHouse table, optionally filtered"
+ },
+ {
+ "name": "Introspect Schema",
+ "description": "Introspect a ClickHouse database to retrieve table structures, columns, and engines"
+ },
+ {
+ "name": "Create Database",
+ "description": "Create a new database on a ClickHouse server"
+ },
+ {
+ "name": "Drop Database",
+ "description": "Drop a database from a ClickHouse server"
+ },
+ {
+ "name": "Create Table",
+ "description": "Create a new MergeTree-family table in ClickHouse"
+ },
+ {
+ "name": "Drop Table",
+ "description": "Drop a table from a ClickHouse database"
+ },
+ {
+ "name": "Truncate Table",
+ "description": "Remove all rows from a ClickHouse table"
+ },
+ {
+ "name": "Rename Table",
+ "description": "Rename a ClickHouse table"
+ },
+ {
+ "name": "Optimize Table",
+ "description": "Trigger a merge of table parts via OPTIMIZE TABLE"
+ },
+ {
+ "name": "List Partitions",
+ "description": "List active partitions for a ClickHouse table"
+ },
+ {
+ "name": "Drop Partition",
+ "description": "Drop a partition from a ClickHouse table"
+ },
+ {
+ "name": "List Mutations",
+ "description": "List mutations (async ALTER UPDATE/DELETE) for the connected database"
+ },
+ {
+ "name": "List Running Queries",
+ "description": "List currently running queries on a ClickHouse server"
+ },
+ {
+ "name": "Kill Query",
+ "description": "Kill a running query by its query ID"
+ },
+ {
+ "name": "Table Stats",
+ "description": "Get row counts and on-disk size for tables in the connected database"
+ },
+ {
+ "name": "List Clusters",
+ "description": "List configured clusters, shards, and replicas"
+ }
+ ],
+ "operationCount": 26,
+ "triggers": [],
+ "triggerCount": 0,
+ "authType": "none",
+ "category": "tools",
+ "integrationTypes": ["databases", "analytics"],
+ "tags": ["data-warehouse", "data-analytics"]
+ },
{
"type": "cloudflare",
"slug": "cloudflare",
@@ -3110,7 +3233,7 @@
},
{
"name": "List Runs",
- "description": "List recent Dagster runs, optionally filtered by job name."
+ "description": "List Dagster runs with optional filters by job name, status, and creation-time range, plus cursor pagination."
},
{
"name": "List Jobs",
@@ -3151,9 +3274,29 @@
{
"name": "Stop Sensor",
"description": "Disable (stop) a running sensor in Dagster."
+ },
+ {
+ "name": "List Assets",
+ "description": "List assets tracked by a Dagster instance, optionally filtered by key prefix."
+ },
+ {
+ "name": "Get Asset",
+ "description": "Get an asset definition and its latest materialization by asset key."
+ },
+ {
+ "name": "Materialize Assets",
+ "description": "Materialize selected assets by launching their asset job with an asset selection."
+ },
+ {
+ "name": "Report Asset Materialization",
+ "description": "Report an external (runless) materialization or observation for an asset."
+ },
+ {
+ "name": "Wipe Asset",
+ "description": "DESTRUCTIVE: permanently wipes ALL materialization history (every partition) for an asset. This cannot be undone."
}
],
- "operationCount": 14,
+ "operationCount": 19,
"triggers": [],
"triggerCount": 0,
"authType": "api-key",
@@ -14219,11 +14362,11 @@
"type": "tinybird",
"slug": "tinybird",
"name": "Tinybird",
- "description": "Send events and query data with Tinybird",
- "longDescription": "Interact with Tinybird using the Events API to stream JSON or NDJSON events, or use the Query API to execute SQL queries against Pipes and Data Sources.",
+ "description": "Send events, query data, and manage Data Sources with Tinybird",
+ "longDescription": "Interact with Tinybird: stream JSON or NDJSON events with the Events API, run SQL with the Query API, call published Pipe API Endpoints by name with dynamic parameters, and manage Data Sources by appending from a URL, truncating, or deleting rows by condition.",
"bgColor": "#2EF598",
"iconName": "TinybirdIcon",
- "docsUrl": "https://www.tinybird.co/docs/api-reference",
+ "docsUrl": "https://docs.sim.ai/tools/tinybird",
"operations": [
{
"name": "Send Events",
@@ -14232,9 +14375,25 @@
{
"name": "Query",
"description": "Execute SQL queries against Tinybird Pipes and Data Sources using the Query API."
+ },
+ {
+ "name": "Query Pipe Endpoint",
+ "description": "Call a published Tinybird Pipe API Endpoint by name, passing dynamic parameters and receiving structured JSON results."
+ },
+ {
+ "name": "Append Data Source (from URL)",
+ "description": "Append data to a Tinybird Data Source from a remote file URL (CSV, NDJSON, Parquet)."
+ },
+ {
+ "name": "Truncate Data Source",
+ "description": "Delete all rows from a Tinybird Data Source."
+ },
+ {
+ "name": "Delete Data Source Rows",
+ "description": "Delete rows from a Tinybird Data Source matching a SQL condition."
}
],
- "operationCount": 2,
+ "operationCount": 6,
"triggers": [],
"triggerCount": 0,
"authType": "none",
diff --git a/apps/sim/app/api/cron/cleanup-stale-executions/route.ts b/apps/sim/app/api/cron/cleanup-stale-executions/route.ts
index 52c9420916c..99c395d644b 100644
--- a/apps/sim/app/api/cron/cleanup-stale-executions/route.ts
+++ b/apps/sim/app/api/cron/cleanup-stale-executions/route.ts
@@ -1,5 +1,5 @@
import { asyncJobs, db } from '@sim/db'
-import { workflowExecutionLogs } from '@sim/db/schema'
+import { userTableDefinitions, workflowExecutionLogs } from '@sim/db/schema'
import { createLogger } from '@sim/logger'
import { toError } from '@sim/utils/errors'
import { and, eq, inArray, lt, sql } from 'drizzle-orm'
@@ -110,6 +110,37 @@ export const GET = withRouteHandler(async (request: NextRequest) => {
})
}
+ // Mark stale table imports as failed. Imports run detached on the web container and
+ // are lost if the pod is killed mid-load. `updatedAt` is bumped by progress updates, so
+ // an `importing` table with no recent update has stalled (not merely slow). Rows are
+ // left in place (no rollback); the user re-imports.
+ let staleImportsMarkedFailed = 0
+ try {
+ const staleImports = await db
+ .update(userTableDefinitions)
+ .set({
+ importStatus: 'failed',
+ importError: `Import terminated: no progress for more than ${STALE_THRESHOLD_MINUTES} minutes (worker timeout or crash)`,
+ updatedAt: new Date(),
+ })
+ .where(
+ and(
+ eq(userTableDefinitions.importStatus, 'importing'),
+ lt(userTableDefinitions.updatedAt, staleThreshold)
+ )
+ )
+ .returning({ id: userTableDefinitions.id })
+
+ staleImportsMarkedFailed = staleImports.length
+ if (staleImportsMarkedFailed > 0) {
+ logger.info(`Marked ${staleImportsMarkedFailed} stale table imports as failed`)
+ }
+ } catch (error) {
+ logger.error('Failed to clean up stale table imports:', {
+ error: toError(error).message,
+ })
+ }
+
// Clean up stale pending jobs (never started, e.g., due to server crash before startJob())
let stalePendingJobsMarkedFailed = 0
@@ -179,6 +210,9 @@ export const GET = withRouteHandler(async (request: NextRequest) => {
staleThresholdMinutes: STALE_THRESHOLD_MINUTES,
retentionHours: JOB_RETENTION_HOURS,
},
+ tableImports: {
+ staleMarkedFailed: staleImportsMarkedFailed,
+ },
})
} catch (error) {
logger.error('Error in stale execution cleanup job:', error)
diff --git a/apps/sim/app/api/table/[tableId]/groups/route.ts b/apps/sim/app/api/table/[tableId]/groups/route.ts
index 197a1722b1b..5b9f960896a 100644
--- a/apps/sim/app/api/table/[tableId]/groups/route.ts
+++ b/apps/sim/app/api/table/[tableId]/groups/route.ts
@@ -116,6 +116,9 @@ export const PATCH = withRouteHandler(async (request: NextRequest, { params }: R
...(validated.inputMappings !== undefined
? { inputMappings: validated.inputMappings }
: {}),
+ ...(validated.deploymentMode !== undefined
+ ? { deploymentMode: validated.deploymentMode }
+ : {}),
...(validated.type !== undefined ? { type: validated.type } : {}),
...(validated.autoRun !== undefined ? { autoRun: validated.autoRun } : {}),
},
diff --git a/apps/sim/app/api/table/[tableId]/import-async/route.test.ts b/apps/sim/app/api/table/[tableId]/import-async/route.test.ts
new file mode 100644
index 00000000000..18fa93aca80
--- /dev/null
+++ b/apps/sim/app/api/table/[tableId]/import-async/route.test.ts
@@ -0,0 +1,144 @@
+/**
+ * @vitest-environment node
+ */
+import { hybridAuthMockFns } from '@sim/testing'
+import { NextRequest } from 'next/server'
+import { beforeEach, describe, expect, it, vi } from 'vitest'
+import type { TableDefinition } from '@/lib/table'
+
+const { mockCheckAccess, mockMarkTableImporting, mockRunTableImport } = vi.hoisted(() => ({
+ mockCheckAccess: vi.fn(),
+ mockMarkTableImporting: vi.fn(),
+ mockRunTableImport: vi.fn(),
+}))
+
+vi.mock('@sim/utils/id', () => ({
+ generateId: vi.fn().mockReturnValue('import-id-xyz'),
+ generateShortId: vi.fn().mockReturnValue('short-id'),
+}))
+vi.mock('@/lib/table/service', () => ({ markTableImporting: mockMarkTableImporting }))
+vi.mock('@/lib/table/import-runner', () => ({ runTableImport: mockRunTableImport }))
+vi.mock('@/lib/core/utils/background', () => ({
+ runDetached: (_label: string, work: () => Promise) => {
+ void work()
+ },
+}))
+vi.mock('@/app/api/table/utils', async () => {
+ const { NextResponse } = await import('next/server')
+ return {
+ checkAccess: mockCheckAccess,
+ accessError: (result: { status: number }) =>
+ NextResponse.json({ error: 'denied' }, { status: result.status }),
+ }
+})
+
+import { POST } from '@/app/api/table/[tableId]/import-async/route'
+
+function buildTable(overrides: Partial = {}): TableDefinition {
+ return {
+ id: 'tbl_1',
+ name: 'People',
+ description: null,
+ schema: { columns: [{ name: 'name', type: 'string' }] },
+ metadata: null,
+ rowCount: 0,
+ maxRows: 1_000_000,
+ workspaceId: 'workspace-1',
+ createdBy: 'user-1',
+ archivedAt: null,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ ...overrides,
+ }
+}
+
+function makeRequest(body: unknown, tableId = 'tbl_1') {
+ const req = new NextRequest(`http://localhost:3000/api/table/${tableId}/import-async`, {
+ method: 'POST',
+ headers: { 'content-type': 'application/json' },
+ body: JSON.stringify(body),
+ })
+ return POST(req, { params: Promise.resolve({ tableId }) })
+}
+
+const validBody = {
+ workspaceId: 'workspace-1',
+ fileKey: 'workspace/workspace-1/123-data.csv',
+ fileName: 'data.csv',
+ mode: 'append',
+}
+
+describe('POST /api/table/[tableId]/import-async', () => {
+ beforeEach(() => {
+ vi.clearAllMocks()
+ hybridAuthMockFns.mockCheckSessionOrInternalAuth.mockResolvedValue({
+ success: true,
+ userId: 'user-1',
+ authType: 'session',
+ })
+ mockCheckAccess.mockResolvedValue({ ok: true, table: buildTable() })
+ mockMarkTableImporting.mockResolvedValue(true)
+ mockRunTableImport.mockResolvedValue(undefined)
+ })
+
+ it('marks the table importing and kicks off the worker with mode + mapping', async () => {
+ const response = await makeRequest({
+ ...validBody,
+ mode: 'replace',
+ mapping: { Name: 'name' },
+ createColumns: ['Extra'],
+ })
+ const data = await response.json()
+
+ expect(response.status).toBe(200)
+ expect(data.data).toEqual({ tableId: 'tbl_1', importId: 'import-id-xyz' })
+ expect(mockMarkTableImporting).toHaveBeenCalledWith('tbl_1', 'import-id-xyz')
+ expect(mockRunTableImport).toHaveBeenCalledWith(
+ expect.objectContaining({
+ tableId: 'tbl_1',
+ mode: 'replace',
+ delimiter: ',',
+ mapping: { Name: 'name' },
+ createColumns: ['Extra'],
+ })
+ )
+ })
+
+ it('returns 409 when the table is already importing (claim lost)', async () => {
+ mockMarkTableImporting.mockResolvedValue(false)
+ const response = await makeRequest(validBody)
+ expect(response.status).toBe(409)
+ expect(mockRunTableImport).not.toHaveBeenCalled()
+ })
+
+ it('returns 401 when unauthenticated', async () => {
+ hybridAuthMockFns.mockCheckSessionOrInternalAuth.mockResolvedValue({ success: false })
+ const response = await makeRequest(validBody)
+ expect(response.status).toBe(401)
+ expect(mockMarkTableImporting).not.toHaveBeenCalled()
+ })
+
+ it('returns the access error status when access is denied', async () => {
+ mockCheckAccess.mockResolvedValue({ ok: false, status: 403 })
+ const response = await makeRequest(validBody)
+ expect(response.status).toBe(403)
+ expect(mockRunTableImport).not.toHaveBeenCalled()
+ })
+
+ it('returns 400 when the target table is archived', async () => {
+ mockCheckAccess.mockResolvedValue({ ok: true, table: buildTable({ archivedAt: new Date() }) })
+ const response = await makeRequest(validBody)
+ expect(response.status).toBe(400)
+ expect(mockRunTableImport).not.toHaveBeenCalled()
+ })
+
+ it('returns 400 on workspace mismatch', async () => {
+ const response = await makeRequest({ ...validBody, workspaceId: 'other-ws' })
+ expect(response.status).toBe(400)
+ })
+
+ it('returns 400 for an invalid mode', async () => {
+ const response = await makeRequest({ ...validBody, mode: 'bogus' })
+ expect(response.status).toBe(400)
+ })
+})
diff --git a/apps/sim/app/api/table/[tableId]/import-async/route.ts b/apps/sim/app/api/table/[tableId]/import-async/route.ts
new file mode 100644
index 00000000000..46190cbfb06
--- /dev/null
+++ b/apps/sim/app/api/table/[tableId]/import-async/route.ts
@@ -0,0 +1,92 @@
+import { createLogger } from '@sim/logger'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { importIntoTableAsyncContract } from '@/lib/api/contracts/tables'
+import { parseRequest } from '@/lib/api/server'
+import { checkSessionOrInternalAuth } from '@/lib/auth/hybrid'
+import { runDetached } from '@/lib/core/utils/background'
+import { generateRequestId } from '@/lib/core/utils/request'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { runTableImport } from '@/lib/table/import-runner'
+import { markTableImporting } from '@/lib/table/service'
+import { accessError, checkAccess } from '@/app/api/table/utils'
+
+const logger = createLogger('TableImportIntoAsync')
+
+export const runtime = 'nodejs'
+export const dynamic = 'force-dynamic'
+
+interface RouteParams {
+ params: Promise<{ tableId: string }>
+}
+
+export const POST = withRouteHandler(async (request: NextRequest, { params }: RouteParams) => {
+ const requestId = generateRequestId()
+
+ const authResult = await checkSessionOrInternalAuth(request, { requireWorkflowId: false })
+ if (!authResult.success || !authResult.userId) {
+ return NextResponse.json({ error: 'Authentication required' }, { status: 401 })
+ }
+ const userId = authResult.userId
+
+ const parsed = await parseRequest(importIntoTableAsyncContract, request, { params })
+ if (!parsed.success) return parsed.response
+ const { tableId } = parsed.data.params
+ const { workspaceId, fileKey, fileName, mode, mapping, createColumns } = parsed.data.body
+
+ const access = await checkAccess(tableId, userId, 'write')
+ if (!access.ok) return accessError(access, requestId, tableId)
+ const { table } = access
+
+ if (table.workspaceId !== workspaceId) {
+ return NextResponse.json({ error: 'Invalid workspace ID' }, { status: 400 })
+ }
+ // The fileKey is client-supplied — ensure it points at this workspace's storage prefix so a
+ // caller can't import another workspace's uploaded object.
+ if (!fileKey.startsWith(`workspace/${workspaceId}/`)) {
+ return NextResponse.json({ error: 'Invalid file key for workspace' }, { status: 400 })
+ }
+ if (table.archivedAt) {
+ return NextResponse.json({ error: 'Cannot import into an archived table' }, { status: 400 })
+ }
+
+ const ext = fileName.split('.').pop()?.toLowerCase()
+ if (ext !== 'csv' && ext !== 'tsv') {
+ return NextResponse.json({ error: 'Only CSV and TSV files are supported' }, { status: 400 })
+ }
+ const delimiter = ext === 'tsv' ? '\t' : ','
+
+ // Atomically claim the table — the single concurrency gate. If another import already holds it,
+ // this returns false (no overlapping workers writing colliding row positions).
+ const importId = generateId()
+ const claimed = await markTableImporting(tableId, importId)
+ if (!claimed) {
+ return NextResponse.json(
+ { error: 'An import is already in progress for this table' },
+ { status: 409 }
+ )
+ }
+
+ runDetached('table-import', () =>
+ runTableImport({
+ importId,
+ tableId,
+ workspaceId,
+ userId,
+ fileKey,
+ fileName,
+ delimiter,
+ mode,
+ mapping,
+ createColumns,
+ })
+ )
+
+ logger.info(`[${requestId}] Async CSV import into existing table started`, {
+ tableId,
+ importId,
+ mode,
+ fileName,
+ })
+ return NextResponse.json({ success: true, data: { tableId, importId } })
+})
diff --git a/apps/sim/app/api/table/[tableId]/import/cancel/route.test.ts b/apps/sim/app/api/table/[tableId]/import/cancel/route.test.ts
new file mode 100644
index 00000000000..d45baae77e2
--- /dev/null
+++ b/apps/sim/app/api/table/[tableId]/import/cancel/route.test.ts
@@ -0,0 +1,110 @@
+/**
+ * @vitest-environment node
+ */
+import { hybridAuthMockFns } from '@sim/testing'
+import { NextRequest } from 'next/server'
+import { beforeEach, describe, expect, it, vi } from 'vitest'
+import type { TableDefinition } from '@/lib/table'
+
+const { mockCheckAccess, mockMarkImportCanceled, mockAppendTableEvent } = vi.hoisted(() => ({
+ mockCheckAccess: vi.fn(),
+ mockMarkImportCanceled: vi.fn(),
+ mockAppendTableEvent: vi.fn(),
+}))
+
+vi.mock('@/lib/table/service', () => ({ markImportCanceled: mockMarkImportCanceled }))
+vi.mock('@/lib/table/events', () => ({ appendTableEvent: mockAppendTableEvent }))
+vi.mock('@/app/api/table/utils', async () => {
+ const { NextResponse } = await import('next/server')
+ return {
+ checkAccess: mockCheckAccess,
+ accessError: (result: { status: number }) =>
+ NextResponse.json({ error: 'denied' }, { status: result.status }),
+ }
+})
+
+import { POST } from '@/app/api/table/[tableId]/import/cancel/route'
+
+function buildTable(overrides: Partial = {}): TableDefinition {
+ return {
+ id: 'tbl_1',
+ name: 'People',
+ description: null,
+ schema: { columns: [{ name: 'name', type: 'string' }] },
+ metadata: null,
+ rowCount: 0,
+ maxRows: 1_000_000,
+ workspaceId: 'workspace-1',
+ createdBy: 'user-1',
+ archivedAt: null,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ ...overrides,
+ }
+}
+
+function makeRequest(body: unknown, tableId = 'tbl_1') {
+ const req = new NextRequest(`http://localhost:3000/api/table/${tableId}/import/cancel`, {
+ method: 'POST',
+ headers: { 'content-type': 'application/json' },
+ body: JSON.stringify(body),
+ })
+ return POST(req, { params: Promise.resolve({ tableId }) })
+}
+
+const validBody = { workspaceId: 'workspace-1', importId: 'import-id-xyz' }
+
+describe('POST /api/table/[tableId]/import/cancel', () => {
+ beforeEach(() => {
+ vi.clearAllMocks()
+ hybridAuthMockFns.mockCheckSessionOrInternalAuth.mockResolvedValue({
+ success: true,
+ userId: 'user-1',
+ authType: 'session',
+ })
+ mockCheckAccess.mockResolvedValue({ ok: true, table: buildTable() })
+ mockMarkImportCanceled.mockResolvedValue(true)
+ })
+
+ it('cancels the import and emits a canceled event', async () => {
+ const response = await makeRequest(validBody)
+ const data = await response.json()
+
+ expect(response.status).toBe(200)
+ expect(data.data).toEqual({ canceled: true })
+ expect(mockMarkImportCanceled).toHaveBeenCalledWith('tbl_1', 'import-id-xyz')
+ expect(mockAppendTableEvent).toHaveBeenCalledWith(
+ expect.objectContaining({ kind: 'import', status: 'canceled', importId: 'import-id-xyz' })
+ )
+ })
+
+ it('does not emit an event when nothing was importing', async () => {
+ mockMarkImportCanceled.mockResolvedValue(false)
+ const response = await makeRequest(validBody)
+ const data = await response.json()
+
+ expect(response.status).toBe(200)
+ expect(data.data).toEqual({ canceled: false })
+ expect(mockAppendTableEvent).not.toHaveBeenCalled()
+ })
+
+ it('returns 401 when unauthenticated', async () => {
+ hybridAuthMockFns.mockCheckSessionOrInternalAuth.mockResolvedValue({ success: false })
+ const response = await makeRequest(validBody)
+ expect(response.status).toBe(401)
+ expect(mockMarkImportCanceled).not.toHaveBeenCalled()
+ })
+
+ it('returns the access error status when access is denied', async () => {
+ mockCheckAccess.mockResolvedValue({ ok: false, status: 403 })
+ const response = await makeRequest(validBody)
+ expect(response.status).toBe(403)
+ })
+
+ it('returns 400 on workspace mismatch', async () => {
+ mockCheckAccess.mockResolvedValue({ ok: true, table: buildTable({ workspaceId: 'other-ws' }) })
+ const response = await makeRequest(validBody)
+ expect(response.status).toBe(400)
+ expect(mockMarkImportCanceled).not.toHaveBeenCalled()
+ })
+})
diff --git a/apps/sim/app/api/table/[tableId]/import/cancel/route.ts b/apps/sim/app/api/table/[tableId]/import/cancel/route.ts
new file mode 100644
index 00000000000..62ab7310f47
--- /dev/null
+++ b/apps/sim/app/api/table/[tableId]/import/cancel/route.ts
@@ -0,0 +1,54 @@
+import { createLogger } from '@sim/logger'
+import { type NextRequest, NextResponse } from 'next/server'
+import { cancelTableImportContract } from '@/lib/api/contracts/tables'
+import { parseRequest } from '@/lib/api/server'
+import { checkSessionOrInternalAuth } from '@/lib/auth/hybrid'
+import { generateRequestId } from '@/lib/core/utils/request'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { appendTableEvent } from '@/lib/table/events'
+import { markImportCanceled } from '@/lib/table/service'
+import { accessError, checkAccess } from '@/app/api/table/utils'
+
+const logger = createLogger('TableImportCancelAPI')
+
+export const runtime = 'nodejs'
+export const dynamic = 'force-dynamic'
+
+interface RouteParams {
+ params: Promise<{ tableId: string }>
+}
+
+/**
+ * POST /api/table/[tableId]/import/cancel
+ *
+ * Cancels an in-flight async CSV import. Flips the table's import status to `canceled`, which makes
+ * the detached worker's next ownership check fail so it stops inserting. Committed rows are left in
+ * place (no rollback) — the user can delete the table. No-op if the import already finished.
+ */
+export const POST = withRouteHandler(async (request: NextRequest, { params }: RouteParams) => {
+ const requestId = generateRequestId()
+
+ const authResult = await checkSessionOrInternalAuth(request, { requireWorkflowId: false })
+ if (!authResult.success || !authResult.userId) {
+ return NextResponse.json({ error: 'Authentication required' }, { status: 401 })
+ }
+
+ const parsed = await parseRequest(cancelTableImportContract, request, { params })
+ if (!parsed.success) return parsed.response
+ const { tableId } = parsed.data.params
+ const { workspaceId, importId } = parsed.data.body
+
+ const access = await checkAccess(tableId, authResult.userId, 'write')
+ if (!access.ok) return accessError(access, requestId, tableId)
+ if (access.table.workspaceId !== workspaceId) {
+ return NextResponse.json({ error: 'Invalid workspace ID' }, { status: 400 })
+ }
+
+ const canceled = await markImportCanceled(tableId, importId)
+ if (canceled) {
+ void appendTableEvent({ kind: 'import', tableId, importId, status: 'canceled' })
+ }
+ logger.info(`[${requestId}] Import cancel requested`, { tableId, importId, canceled })
+
+ return NextResponse.json({ success: true, data: { canceled } })
+})
diff --git a/apps/sim/app/api/table/[tableId]/import/route.test.ts b/apps/sim/app/api/table/[tableId]/import/route.test.ts
index 1a551745402..438f74e035e 100644
--- a/apps/sim/app/api/table/[tableId]/import/route.test.ts
+++ b/apps/sim/app/api/table/[tableId]/import/route.test.ts
@@ -8,16 +8,18 @@ import type { TableDefinition } from '@/lib/table'
const {
mockCheckAccess,
- mockBatchInsertRowsWithTx,
- mockReplaceTableRowsWithTx,
- mockAddTableColumnsWithTx,
+ mockImportAppendRows,
+ mockImportReplaceRows,
mockDispatchAfterBatchInsert,
+ mockMarkTableImporting,
+ mockReleaseImportClaim,
} = vi.hoisted(() => ({
mockCheckAccess: vi.fn(),
- mockBatchInsertRowsWithTx: vi.fn(),
- mockReplaceTableRowsWithTx: vi.fn(),
- mockAddTableColumnsWithTx: vi.fn(),
+ mockImportAppendRows: vi.fn(),
+ mockImportReplaceRows: vi.fn(),
mockDispatchAfterBatchInsert: vi.fn(),
+ mockMarkTableImporting: vi.fn(),
+ mockReleaseImportClaim: vi.fn(),
}))
vi.mock('@sim/utils/id', () => ({
@@ -33,20 +35,28 @@ vi.mock('@/app/api/table/utils', async () => {
const message = result.status === 404 ? 'Table not found' : 'Access denied'
return NextResponse.json({ error: message }, { status: result.status })
},
+ csvProxyBodyCapResponse: () => null,
+ multipartErrorResponse: (error: { code: string; message: string }) =>
+ NextResponse.json(
+ { error: error.message },
+ { status: error.code === 'FILE_TOO_LARGE' ? 413 : 400 }
+ ),
}
})
/**
- * The route imports `batchInsertRows` and `replaceTableRows` from the barrel,
- * which forwards them from `./service`. Mocking the service module replaces
- * both without having to touch the other real helpers (`parseCsvBuffer`,
- * `coerceRowsForTable`, etc.) exported through the barrel.
+ * The route imports `importAppendRows` / `importReplaceRows` from the barrel,
+ * which forwards them from `./service`. These functions own the import
+ * transaction (column adds + row writes); mocking the service module replaces
+ * them without touching the other real helpers (`coerceRowsForTable`,
+ * `createCsvParser`, etc.) exported through the barrel.
*/
vi.mock('@/lib/table/service', () => ({
- batchInsertRowsWithTx: mockBatchInsertRowsWithTx,
- replaceTableRowsWithTx: mockReplaceTableRowsWithTx,
- addTableColumnsWithTx: mockAddTableColumnsWithTx,
+ importAppendRows: mockImportAppendRows,
+ importReplaceRows: mockImportReplaceRows,
dispatchAfterBatchInsert: mockDispatchAfterBatchInsert,
+ markTableImporting: mockMarkTableImporting,
+ releaseImportClaim: mockReleaseImportClaim,
}))
import { POST } from '@/app/api/table/[tableId]/import/route'
@@ -64,8 +74,8 @@ function createFormData(
createColumns?: unknown
}
): FormData {
+ // Text fields must precede the file part for the streaming parser.
const form = new FormData()
- form.append('file', file)
if (options?.workspaceId !== null) {
form.append('workspaceId', options?.workspaceId ?? 'workspace-1')
}
@@ -86,6 +96,7 @@ function createFormData(
: JSON.stringify(options.createColumns)
)
}
+ form.append('file', file)
return form
}
@@ -112,10 +123,21 @@ function buildTable(overrides: Partial = {}): TableDefinition {
}
}
+/** Additions array the route passed to importAppendRows (2nd positional arg). */
+function appendAdditions(): { name: string; type: string }[] {
+ return mockImportAppendRows.mock.calls[0][1] as { name: string; type: string }[]
+}
+
+/** Rows array the route passed to importAppendRows (3rd positional arg). */
+function appendRows(): unknown[] {
+ return mockImportAppendRows.mock.calls[0][2] as unknown[]
+}
+
async function callPost(form: FormData, { tableId }: { tableId: string } = { tableId: 'tbl_1' }) {
+ // Building the request from a FormData body gives a real multipart stream and
+ // boundary, exercising the streaming `readMultipart` parser end-to-end.
const req = new NextRequest(`http://localhost:3000/api/table/${tableId}/import`, {
method: 'POST',
- headers: { 'content-length': '1024' },
body: form,
})
return POST(req, { params: Promise.resolve({ tableId }) })
@@ -130,25 +152,15 @@ describe('POST /api/table/[tableId]/import', () => {
authType: 'session',
})
mockCheckAccess.mockResolvedValue({ ok: true, table: buildTable() })
- mockBatchInsertRowsWithTx.mockImplementation(async (_trx, data: { rows: unknown[] }) =>
- data.rows.map((_, i) => ({ id: `row_${i}` }))
- )
- mockReplaceTableRowsWithTx.mockResolvedValue({ deletedCount: 0, insertedCount: 0 })
- mockAddTableColumnsWithTx.mockImplementation(
- async (
- _trx,
- table: { schema: { columns: { name: string; type: string }[] } },
- columns: { name: string; type: string }[]
- ) => ({
- ...table,
- schema: {
- columns: [
- ...table.schema.columns,
- ...columns.map((c) => ({ name: c.name, type: c.type as 'string' })),
- ],
- },
+ mockImportAppendRows.mockImplementation(
+ async (table: TableDefinition, _additions: unknown, rows: unknown[]) => ({
+ inserted: rows.map((_, i) => ({ id: `row_${i}` })),
+ table,
})
)
+ mockImportReplaceRows.mockResolvedValue({ deletedCount: 0, insertedCount: 0 })
+ mockMarkTableImporting.mockResolvedValue(true)
+ mockReleaseImportClaim.mockResolvedValue(undefined)
})
it('returns 401 when the user is not authenticated', async () => {
@@ -160,6 +172,22 @@ describe('POST /api/table/[tableId]/import', () => {
expect(response.status).toBe(401)
})
+ it('returns 409 when a background import already holds the table (claim lost)', async () => {
+ mockMarkTableImporting.mockResolvedValueOnce(false)
+ const response = await callPost(createFormData(createCsvFile('name,age\nAlice,30')))
+ expect(response.status).toBe(409)
+ expect(mockImportAppendRows).not.toHaveBeenCalled()
+ expect(mockImportReplaceRows).not.toHaveBeenCalled()
+ expect(mockReleaseImportClaim).not.toHaveBeenCalled()
+ })
+
+ it('releases the import claim after a successful write', async () => {
+ const response = await callPost(createFormData(createCsvFile('name,age\nAlice,30')))
+ expect(response.status).toBe(200)
+ expect(mockMarkTableImporting).toHaveBeenCalledWith('tbl_1', 'deadbeefcafef00d')
+ expect(mockReleaseImportClaim).toHaveBeenCalledWith('tbl_1', 'deadbeefcafef00d')
+ })
+
it('returns 400 when the mode is invalid', async () => {
const response = await callPost(
createFormData(createCsvFile('name,age\nAlice,30'), { mode: 'bogus' })
@@ -186,24 +214,32 @@ describe('POST /api/table/[tableId]/import', () => {
expect(data.error).toMatch(/archived/i)
})
- it('returns 413 for oversized CSV files before reading their contents', async () => {
- const file = createCsvFile('name,age\nAlice,30')
- Object.defineProperty(file, 'size', {
- value: 26 * 1024 * 1024,
- })
- const arrayBufferSpy = vi.spyOn(file, 'arrayBuffer')
-
+ it('returns 400 when the file part precedes the required fields', async () => {
+ // Build a raw multipart body with the file BEFORE workspaceId.
+ const boundary = '----orderboundary'
+ const body = Buffer.concat([
+ Buffer.from(
+ `--${boundary}\r\nContent-Disposition: form-data; name="file"; filename="data.csv"\r\nContent-Type: text/csv\r\n\r\nname,age\nAlice,30\r\n`
+ ),
+ Buffer.from(`--${boundary}\r\nContent-Disposition: form-data; name="workspaceId"\r\n\r\n`),
+ Buffer.from('workspace-1\r\n'),
+ Buffer.from(`--${boundary}--\r\n`),
+ ])
const req = {
- formData: async () => createFormData(file),
+ headers: new Headers({ 'content-type': `multipart/form-data; boundary=${boundary}` }),
+ body: new ReadableStream({
+ start(controller) {
+ controller.enqueue(new Uint8Array(body))
+ controller.close()
+ },
+ }),
+ signal: undefined,
} as unknown as NextRequest
const response = await POST(req, { params: Promise.resolve({ tableId: 'tbl_1' }) })
- expect(response.status).toBe(413)
- const data = await response.json()
- expect(data.error).toMatch(/CSV import file exceeds maximum size/)
- expect(arrayBufferSpy).not.toHaveBeenCalled()
- expect(mockBatchInsertRowsWithTx).not.toHaveBeenCalled()
- expect(mockReplaceTableRowsWithTx).not.toHaveBeenCalled()
+ expect(response.status).toBe(400)
+ expect(mockImportAppendRows).not.toHaveBeenCalled()
+ expect(mockImportReplaceRows).not.toHaveBeenCalled()
})
it('returns 400 when the CSV is missing a required column', async () => {
@@ -212,10 +248,10 @@ describe('POST /api/table/[tableId]/import', () => {
const data = await response.json()
expect(data.error).toMatch(/missing required columns/i)
expect(data.details?.missingRequired).toEqual(['name'])
- expect(mockBatchInsertRowsWithTx).not.toHaveBeenCalled()
+ expect(mockImportAppendRows).not.toHaveBeenCalled()
})
- it('appends rows via batchInsertRows', async () => {
+ it('appends rows via importAppendRows', async () => {
const response = await callPost(
createFormData(createCsvFile('name,age\nAlice,30\nBob,40'), { mode: 'append' })
)
@@ -223,13 +259,12 @@ describe('POST /api/table/[tableId]/import', () => {
const data = await response.json()
expect(data.data.mode).toBe('append')
expect(data.data.insertedCount).toBe(2)
- expect(mockBatchInsertRowsWithTx).toHaveBeenCalledTimes(1)
- const callArgs = mockBatchInsertRowsWithTx.mock.calls[0][1] as { rows: unknown[] }
- expect(callArgs.rows).toEqual([
+ expect(mockImportAppendRows).toHaveBeenCalledTimes(1)
+ expect(appendRows()).toEqual([
{ name: 'Alice', age: 30 },
{ name: 'Bob', age: 40 },
])
- expect(mockReplaceTableRowsWithTx).not.toHaveBeenCalled()
+ expect(mockImportReplaceRows).not.toHaveBeenCalled()
})
it('accepts chunked multipart imports without a content-length header', async () => {
@@ -244,7 +279,7 @@ describe('POST /api/table/[tableId]/import', () => {
const response = await POST(req, { params: Promise.resolve({ tableId: 'tbl_1' }) })
expect(response.status).toBe(200)
- expect(mockBatchInsertRowsWithTx).toHaveBeenCalledTimes(1)
+ expect(mockImportAppendRows).toHaveBeenCalledTimes(1)
})
it('rejects append when it would exceed maxRows', async () => {
@@ -258,11 +293,11 @@ describe('POST /api/table/[tableId]/import', () => {
expect(response.status).toBe(400)
const data = await response.json()
expect(data.error).toMatch(/exceed table row limit/)
- expect(mockBatchInsertRowsWithTx).not.toHaveBeenCalled()
+ expect(mockImportAppendRows).not.toHaveBeenCalled()
})
- it('replaces rows via replaceTableRows', async () => {
- mockReplaceTableRowsWithTx.mockResolvedValueOnce({ deletedCount: 5, insertedCount: 2 })
+ it('replaces rows via importReplaceRows', async () => {
+ mockImportReplaceRows.mockResolvedValueOnce({ deletedCount: 5, insertedCount: 2 })
const response = await callPost(
createFormData(createCsvFile('name,age\nAlice,30\nBob,40'), { mode: 'replace' })
)
@@ -271,8 +306,8 @@ describe('POST /api/table/[tableId]/import', () => {
expect(data.data.mode).toBe('replace')
expect(data.data.deletedCount).toBe(5)
expect(data.data.insertedCount).toBe(2)
- expect(mockReplaceTableRowsWithTx).toHaveBeenCalledTimes(1)
- expect(mockBatchInsertRowsWithTx).not.toHaveBeenCalled()
+ expect(mockImportReplaceRows).toHaveBeenCalledTimes(1)
+ expect(mockImportAppendRows).not.toHaveBeenCalled()
})
it('uses an explicit mapping when provided', async () => {
@@ -285,8 +320,7 @@ describe('POST /api/table/[tableId]/import', () => {
expect(response.status).toBe(200)
const data = await response.json()
expect(data.data.mappedColumns).toEqual(['First Name', 'Years'])
- const callArgs = mockBatchInsertRowsWithTx.mock.calls[0][1] as { rows: unknown[] }
- expect(callArgs.rows).toEqual([
+ expect(appendRows()).toEqual([
{ name: 'Alice', age: 30 },
{ name: 'Bob', age: 40 },
])
@@ -316,8 +350,8 @@ describe('POST /api/table/[tableId]/import', () => {
expect(data.error).toMatch(/Mapping values must be/)
})
- it('surfaces unique violations from batchInsertRows as 400', async () => {
- mockBatchInsertRowsWithTx.mockRejectedValueOnce(
+ it('surfaces unique violations from importAppendRows as 400', async () => {
+ mockImportAppendRows.mockRejectedValueOnce(
new Error('Row 1: Column "name" must be unique. Value "Alice" already exists in row row_xxx')
)
const response = await callPost(
@@ -337,7 +371,7 @@ describe('POST /api/table/[tableId]/import', () => {
)
)
expect(response.status).toBe(200)
- expect(mockBatchInsertRowsWithTx).toHaveBeenCalledTimes(1)
+ expect(mockImportAppendRows).toHaveBeenCalledTimes(1)
})
it('returns 400 for unsupported file extensions', async () => {
@@ -358,12 +392,9 @@ describe('POST /api/table/[tableId]/import', () => {
})
)
expect(response.status).toBe(200)
- expect(mockAddTableColumnsWithTx).toHaveBeenCalledTimes(1)
- const [, , columns] = mockAddTableColumnsWithTx.mock.calls[0]
- expect(columns).toEqual([{ name: 'email', type: 'string' }])
-
- const callArgs = mockBatchInsertRowsWithTx.mock.calls[0][1] as { rows: unknown[] }
- expect(callArgs.rows).toEqual([
+ expect(mockImportAppendRows).toHaveBeenCalledTimes(1)
+ expect(appendAdditions()).toEqual([{ name: 'email', type: 'string' }])
+ expect(appendRows()).toEqual([
{ name: 'Alice', age: 30, email: 'a@x.io' },
{ name: 'Bob', age: 40, email: 'b@x.io' },
])
@@ -377,8 +408,7 @@ describe('POST /api/table/[tableId]/import', () => {
})
)
expect(response.status).toBe(200)
- const [, , columns] = mockAddTableColumnsWithTx.mock.calls[0]
- expect(columns).toEqual([{ name: 'score', type: 'number' }])
+ expect(appendAdditions()).toEqual([{ name: 'score', type: 'number' }])
})
it('dedupes when sanitized name collides with an existing column', async () => {
@@ -401,8 +431,7 @@ describe('POST /api/table/[tableId]/import', () => {
})
)
expect(response.status).toBe(200)
- const [, , columns] = mockAddTableColumnsWithTx.mock.calls[0]
- expect(columns).toEqual([{ name: 'Email_2', type: 'string' }])
+ expect(appendAdditions()).toEqual([{ name: 'Email_2', type: 'string' }])
})
it('returns 400 when createColumns references a header not in the CSV', async () => {
@@ -415,8 +444,7 @@ describe('POST /api/table/[tableId]/import', () => {
expect(response.status).toBe(400)
const data = await response.json()
expect(data.error).toMatch(/unknown CSV headers/)
- expect(mockAddTableColumnsWithTx).not.toHaveBeenCalled()
- expect(mockBatchInsertRowsWithTx).not.toHaveBeenCalled()
+ expect(mockImportAppendRows).not.toHaveBeenCalled()
})
it('returns 400 when createColumns is not an array of strings', async () => {
@@ -429,7 +457,7 @@ describe('POST /api/table/[tableId]/import', () => {
expect(response.status).toBe(400)
const data = await response.json()
expect(data.error).toMatch(/createColumns must be a JSON array/)
- expect(mockAddTableColumnsWithTx).not.toHaveBeenCalled()
+ expect(mockImportAppendRows).not.toHaveBeenCalled()
})
it('returns 400 when createColumns is invalid JSON', async () => {
@@ -444,8 +472,8 @@ describe('POST /api/table/[tableId]/import', () => {
expect(data.error).toMatch(/createColumns must be valid JSON/)
})
- it('surfaces addTableColumns failures as 400', async () => {
- mockAddTableColumnsWithTx.mockRejectedValueOnce(new Error('Column "email" already exists'))
+ it('surfaces column-creation failures from importAppendRows as 400', async () => {
+ mockImportAppendRows.mockRejectedValueOnce(new Error('Column "email" already exists'))
const response = await callPost(
createFormData(createCsvFile('name,age,email\nAlice,30,a@x.io'), {
mode: 'append',
@@ -455,30 +483,30 @@ describe('POST /api/table/[tableId]/import', () => {
expect(response.status).toBe(400)
const data = await response.json()
expect(data.error).toMatch(/already exists/)
- expect(mockBatchInsertRowsWithTx).not.toHaveBeenCalled()
})
it('surfaces row insert failures without success when schema was mutated', async () => {
- mockBatchInsertRowsWithTx.mockRejectedValueOnce(new Error('must be unique'))
+ mockImportAppendRows.mockRejectedValueOnce(new Error('must be unique'))
const response = await callPost(
createFormData(createCsvFile('name,age,email\nAlice,30,a@x.io'), {
mode: 'append',
createColumns: ['email'],
})
)
- expect(mockAddTableColumnsWithTx).toHaveBeenCalled()
+ // Route forwarded the column addition into the (now atomic) import op.
+ expect(appendAdditions()).toEqual([{ name: 'email', type: 'string' }])
expect(response.status).toBe(400)
const data = await response.json()
expect(data.success).toBeUndefined()
expect(data.error).toMatch(/must be unique/)
})
- it('does not call addTableColumns when createColumns is omitted', async () => {
+ it('passes no additions when createColumns is omitted', async () => {
const response = await callPost(
createFormData(createCsvFile('name,age\nAlice,30'), { mode: 'append' })
)
expect(response.status).toBe(200)
- expect(mockAddTableColumnsWithTx).not.toHaveBeenCalled()
+ expect(appendAdditions()).toEqual([])
})
})
})
diff --git a/apps/sim/app/api/table/[tableId]/import/route.ts b/apps/sim/app/api/table/[tableId]/import/route.ts
index e097723c023..5fd11d20426 100644
--- a/apps/sim/app/api/table/[tableId]/import/route.ts
+++ b/apps/sim/app/api/table/[tableId]/import/route.ts
@@ -1,4 +1,4 @@
-import { db } from '@sim/db'
+import type { Readable } from 'node:stream'
import { createLogger } from '@sim/logger'
import { toError } from '@sim/utils/errors'
import { generateId } from '@sim/utils/id'
@@ -13,36 +13,39 @@ import {
} from '@/lib/api/contracts/tables'
import { getValidationErrorMessage } from '@/lib/api/server'
import { checkSessionOrInternalAuth } from '@/lib/auth/hybrid'
+import { isMultipartError, readMultipart } from '@/lib/core/utils/multipart'
import { generateRequestId } from '@/lib/core/utils/request'
-import {
- isPayloadSizeLimitError,
- readFileToBufferWithLimit,
- readFormDataWithLimit,
-} from '@/lib/core/utils/stream-limits'
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
import {
- addTableColumnsWithTx,
- batchInsertRowsWithTx,
buildAutoMapping,
- CSV_MAX_BATCH_SIZE,
CSV_MAX_FILE_SIZE_BYTES,
type CsvHeaderMapping,
CsvImportValidationError,
coerceRowsForTable,
+ createCsvParser,
dispatchAfterBatchInsert,
+ importAppendRows,
+ importReplaceRows,
inferColumnType,
- parseCsvBuffer,
- replaceTableRowsWithTx,
+ markTableImporting,
+ releaseImportClaim,
sanitizeName,
type TableDefinition,
- type TableRow,
type TableSchema,
validateMapping,
} from '@/lib/table'
-import { accessError, checkAccess } from '@/app/api/table/utils'
+import {
+ accessError,
+ checkAccess,
+ csvProxyBodyCapResponse,
+ multipartErrorResponse,
+} from '@/app/api/table/utils'
const logger = createLogger('TableImportCSVExisting')
-const MAX_MULTIPART_OVERHEAD_BYTES = 1024 * 1024
+
+export const runtime = 'nodejs'
+export const dynamic = 'force-dynamic'
+export const maxDuration = 300
interface RouteParams {
params: Promise<{ tableId: string }>
@@ -51,6 +54,8 @@ interface RouteParams {
export const POST = withRouteHandler(async (request: NextRequest, { params }: RouteParams) => {
const requestId = generateRequestId()
const { tableId } = tableIdParamsSchema.parse(await params)
+ let fileStream: Readable | undefined
+ let claimedImportId: string | null = null
try {
const authResult = await checkSessionOrInternalAuth(request, { requireWorkflowId: false })
@@ -58,29 +63,37 @@ export const POST = withRouteHandler(async (request: NextRequest, { params }: Ro
return NextResponse.json({ error: 'Authentication required' }, { status: 401 })
}
- const formData = await readFormDataWithLimit(request, {
- maxBytes: CSV_MAX_FILE_SIZE_BYTES + MAX_MULTIPART_OVERHEAD_BYTES,
- label: 'CSV import body',
- })
- const formValidation = csvImportFormSchema.safeParse({
- file: formData.get('file'),
- workspaceId: formData.get('workspaceId'),
- })
- const rawMode = formData.get('mode') ?? 'append'
- const rawMapping = formData.get('mapping')
- const rawCreateColumns = formData.get('createColumns')
-
- if (!formValidation.success) {
- const message = getValidationErrorMessage(formValidation.error)
- const isSizeLimit = message.includes('File exceeds maximum allowed size')
+ const oversize = csvProxyBodyCapResponse(request)
+ if (oversize) return oversize
+
+ let parsed: Awaited>
+ try {
+ parsed = await readMultipart(request, {
+ maxFileBytes: CSV_MAX_FILE_SIZE_BYTES,
+ requiredFieldsBeforeFile: ['workspaceId'],
+ signal: request.signal,
+ })
+ } catch (err) {
+ if (isMultipartError(err)) return multipartErrorResponse(err)
+ throw err
+ }
+
+ const { fields, file } = parsed
+ if (!file) {
+ return NextResponse.json({ error: 'CSV file is required' }, { status: 400 })
+ }
+ fileStream = file.stream
+
+ const workspaceIdResult = csvImportFormSchema.shape.workspaceId.safeParse(fields.workspaceId)
+ if (!workspaceIdResult.success) {
return NextResponse.json(
- { error: isSizeLimit ? 'CSV import file exceeds maximum size' : message },
- { status: isSizeLimit ? 413 : 400 }
+ { error: getValidationErrorMessage(workspaceIdResult.error) },
+ { status: 400 }
)
}
+ const workspaceId = workspaceIdResult.data
- const { file, workspaceId } = formValidation.data
-
+ const rawMode = fields.mode ?? 'append'
const modeValidation = csvImportModeSchema.safeParse(rawMode)
if (!modeValidation.success) {
return NextResponse.json(
@@ -90,7 +103,7 @@ export const POST = withRouteHandler(async (request: NextRequest, { params }: Ro
}
const mode = modeValidation.data
- const ext = file.name.split('.').pop()?.toLowerCase()
+ const ext = file.filename.split('.').pop()?.toLowerCase()
const extensionValidation = csvExtensionSchema.safeParse(ext)
if (!extensionValidation.success) {
return NextResponse.json(
@@ -114,10 +127,18 @@ export const POST = withRouteHandler(async (request: NextRequest, { params }: Ro
if (table.archivedAt) {
return NextResponse.json({ error: 'Cannot import into an archived table' }, { status: 400 })
}
+ // Don't run a sync import on top of an in-flight background import — concurrent writers
+ // would insert at colliding row positions.
+ if (table.importStatus === 'importing') {
+ return NextResponse.json(
+ { error: 'An import is already in progress for this table' },
+ { status: 409 }
+ )
+ }
let mapping: CsvHeaderMapping | undefined
- if (rawMapping) {
- const mappingValidation = csvImportMappingSchema.safeParse(rawMapping)
+ if (fields.mapping) {
+ const mappingValidation = csvImportMappingSchema.safeParse(fields.mapping)
if (!mappingValidation.success) {
return NextResponse.json(
{ error: getValidationErrorMessage(mappingValidation.error) },
@@ -128,8 +149,8 @@ export const POST = withRouteHandler(async (request: NextRequest, { params }: Ro
}
let createColumns: string[] | undefined
- if (rawCreateColumns) {
- const createColumnsValidation = csvImportCreateColumnsSchema.safeParse(rawCreateColumns)
+ if (fields.createColumns) {
+ const createColumnsValidation = csvImportCreateColumnsSchema.safeParse(fields.createColumns)
if (!createColumnsValidation.success) {
return NextResponse.json(
{ error: getValidationErrorMessage(createColumnsValidation.error) },
@@ -139,12 +160,19 @@ export const POST = withRouteHandler(async (request: NextRequest, { params }: Ro
createColumns = createColumnsValidation.data
}
- const buffer = await readFileToBufferWithLimit(file, {
- maxBytes: CSV_MAX_FILE_SIZE_BYTES,
- label: 'CSV import file',
- })
const delimiter = extensionValidation.data === 'tsv' ? '\t' : ','
- const { headers, rows } = await parseCsvBuffer(buffer, delimiter)
+ const parser = createCsvParser(delimiter)
+ // `.pipe` doesn't forward source errors; forward them so the iterator throws.
+ file.stream.on('error', (streamErr) => parser.destroy(streamErr))
+ file.stream.pipe(parser)
+ const rows: Record[] = []
+ for await (const record of parser as AsyncIterable>) {
+ rows.push(record)
+ }
+ if (rows.length === 0) {
+ return NextResponse.json({ error: 'CSV file has no data rows' }, { status: 400 })
+ }
+ const headers = Object.keys(rows[0])
let effectiveMapping = mapping ?? buildAutoMapping(headers, table.schema)
let prospectiveTable: TableDefinition = table
@@ -218,6 +246,19 @@ export const POST = withRouteHandler(async (request: NextRequest, { params }: Ro
const coerced = coerceRowsForTable(rows, prospectiveTable.schema, validation.effectiveMap)
+ // Atomically claim the table before writing. The pre-check above reads a checkAccess snapshot
+ // taken before the parse/validation; a background import could claim the table in that window.
+ // markTableImporting is the single atomic gate (same one the async kickoff uses) — released in
+ // the finally so a sync import can't write concurrently with a background one (corrupts replace).
+ const syncImportId = generateId()
+ if (!(await markTableImporting(tableId, syncImportId))) {
+ return NextResponse.json(
+ { error: 'An import is already in progress for this table' },
+ { status: 409 }
+ )
+ }
+ claimedImportId = syncImportId
+
if (mode === 'append') {
if (prospectiveTable.rowCount + coerced.length > prospectiveTable.maxRows) {
const deficit = prospectiveTable.rowCount + coerced.length - prospectiveTable.maxRows
@@ -230,32 +271,12 @@ export const POST = withRouteHandler(async (request: NextRequest, { params }: Ro
}
try {
- const txResult = await db.transaction(async (trx) => {
- let working = table
- if (additions.length > 0) {
- working = await addTableColumnsWithTx(trx, table, additions, requestId)
- }
-
- const allInserted: TableRow[] = []
- for (let i = 0; i < coerced.length; i += CSV_MAX_BATCH_SIZE) {
- const batch = coerced.slice(i, i + CSV_MAX_BATCH_SIZE)
- const batchRequestId = generateId().slice(0, 8)
- const result = await batchInsertRowsWithTx(
- trx,
- {
- tableId: working.id,
- rows: batch,
- workspaceId,
- userId: authResult.userId,
- },
- working,
- batchRequestId
- )
- allInserted.push(...result)
- }
- return { inserted: allInserted, working }
- })
- const { inserted: insertedRows, working: finalTable } = txResult
+ const { inserted: insertedRows, table: finalTable } = await importAppendRows(
+ table,
+ additions,
+ coerced,
+ { workspaceId, userId: authResult.userId, requestId }
+ )
const inserted = insertedRows.length
// Fire trigger + scheduler AFTER the tx commits — both read through the
// global db connection and would otherwise see no rows.
@@ -263,7 +284,7 @@ export const POST = withRouteHandler(async (request: NextRequest, { params }: Ro
logger.info(`[${requestId}] Append CSV imported`, {
tableId: table.id,
- fileName: file.name,
+ fileName: file.filename,
mode,
inserted,
createdColumns: additions.length,
@@ -280,7 +301,7 @@ export const POST = withRouteHandler(async (request: NextRequest, { params }: Ro
mappedColumns: validation.mappedHeaders,
skippedHeaders: validation.skippedHeaders,
unmappedColumns: validation.unmappedColumns,
- sourceFile: file.name,
+ sourceFile: file.filename,
},
})
} catch (err) {
@@ -310,22 +331,16 @@ export const POST = withRouteHandler(async (request: NextRequest, { params }: Ro
}
try {
- const result = await db.transaction(async (trx) => {
- let working = table
- if (additions.length > 0) {
- working = await addTableColumnsWithTx(trx, table, additions, requestId)
- }
- return replaceTableRowsWithTx(
- trx,
- { tableId: working.id, rows: coerced, workspaceId, userId: authResult.userId },
- working,
- requestId
- )
- })
+ const result = await importReplaceRows(
+ table,
+ additions,
+ { rows: coerced, workspaceId, userId: authResult.userId },
+ requestId
+ )
logger.info(`[${requestId}] Replace CSV imported`, {
tableId: table.id,
- fileName: file.name,
+ fileName: file.filename,
mode,
deleted: result.deletedCount,
inserted: result.insertedCount,
@@ -343,7 +358,7 @@ export const POST = withRouteHandler(async (request: NextRequest, { params }: Ro
mappedColumns: validation.mappedHeaders,
skippedHeaders: validation.skippedHeaders,
unmappedColumns: validation.unmappedColumns,
- sourceFile: file.name,
+ sourceFile: file.filename,
},
})
} catch (err) {
@@ -362,22 +377,23 @@ export const POST = withRouteHandler(async (request: NextRequest, { params }: Ro
throw err
}
} catch (error) {
+ if (isMultipartError(error)) return multipartErrorResponse(error)
+
const message = toError(error).message
logger.error(`[${requestId}] CSV import into existing table failed:`, error)
- const isSizeLimitError =
- isPayloadSizeLimitError(error) || message.includes('CSV import file exceeds maximum size')
const isClientError =
message.includes('CSV file has no') ||
message.includes('already exists') ||
- message.includes('Invalid column name') ||
- isSizeLimitError
+ message.includes('Invalid column name')
return NextResponse.json(
{ error: isClientError ? message : 'Failed to import CSV' },
- {
- status: isSizeLimitError ? 413 : isClientError ? 400 : 500,
- }
+ { status: isClientError ? 400 : 500 }
)
+ } finally {
+ fileStream?.destroy()
+ // Release before the response returns, so a client refetch never observes the transient claim.
+ if (claimedImportId) await releaseImportClaim(tableId, claimedImportId).catch(() => {})
}
})
diff --git a/apps/sim/app/api/table/[tableId]/route.ts b/apps/sim/app/api/table/[tableId]/route.ts
index 0e73ecaaeba..c0b018f854e 100644
--- a/apps/sim/app/api/table/[tableId]/route.ts
+++ b/apps/sim/app/api/table/[tableId]/route.ts
@@ -68,6 +68,10 @@ export const GET = withRouteHandler(async (request: NextRequest, { params }: Tab
table.updatedAt instanceof Date
? table.updatedAt.toISOString()
: String(table.updatedAt),
+ importStatus: table.importStatus ?? null,
+ importId: table.importId ?? null,
+ importError: table.importError ?? null,
+ importRowsProcessed: table.importRowsProcessed ?? 0,
},
},
})
diff --git a/apps/sim/app/api/table/[tableId]/rows/route.ts b/apps/sim/app/api/table/[tableId]/rows/route.ts
index 8e29e12005c..75fd9487dfe 100644
--- a/apps/sim/app/api/table/[tableId]/rows/route.ts
+++ b/apps/sim/app/api/table/[tableId]/rows/route.ts
@@ -1,8 +1,5 @@
-import { db } from '@sim/db'
-import { tableRowExecutions, userTableRows } from '@sim/db/schema'
import { createLogger } from '@sim/logger'
import { toError } from '@sim/utils/errors'
-import { and, eq, inArray, sql } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import {
type BatchInsertTableRowsBodyInput,
@@ -17,27 +14,20 @@ import { isZodError, validationErrorResponse } from '@/lib/api/server/validation
import { checkSessionOrInternalAuth } from '@/lib/auth/hybrid'
import { generateRequestId } from '@/lib/core/utils/request'
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
-import type {
- Filter,
- RowData,
- RowExecutionMetadata,
- RowExecutions,
- Sort,
- TableSchema,
-} from '@/lib/table'
+import type { Filter, RowData, Sort, TableSchema } from '@/lib/table'
import {
batchInsertRows,
batchUpdateRows,
deleteRowsByFilter,
deleteRowsByIds,
insertRow,
- USER_TABLE_ROWS_SQL_NAME,
updateRowsByFilter,
validateBatchRows,
validateRowData,
validateRowSize,
} from '@/lib/table'
-import { buildFilterClause, buildSortClause, TableQueryValidationError } from '@/lib/table/sql'
+import { queryRows } from '@/lib/table/service'
+import { TableQueryValidationError } from '@/lib/table/sql'
import { accessError, checkAccess } from '@/app/api/table/utils'
const logger = createLogger('TableRowsAPI')
@@ -81,6 +71,7 @@ async function handleBatchInsert(
workspaceId: validated.workspaceId,
userId,
positions: validated.positions,
+ orderKeys: validated.orderKeys,
},
table,
requestId
@@ -93,6 +84,7 @@ async function handleBatchInsert(
id: r.id,
data: r.data,
position: r.position,
+ orderKey: r.orderKey ?? undefined,
createdAt: r.createdAt instanceof Date ? r.createdAt.toISOString() : r.createdAt,
updatedAt: r.updatedAt instanceof Date ? r.updatedAt.toISOString() : r.updatedAt,
})),
@@ -172,6 +164,8 @@ export const POST = withRouteHandler(
workspaceId: validated.workspaceId,
userId: authResult.userId,
position: validated.position,
+ afterRowId: validated.afterRowId,
+ beforeRowId: validated.beforeRowId,
},
table,
requestId
@@ -184,9 +178,11 @@ export const POST = withRouteHandler(
id: row.id,
data: row.data,
position: row.position,
+ orderKey: row.orderKey ?? undefined,
createdAt: row.createdAt instanceof Date ? row.createdAt.toISOString() : row.createdAt,
updatedAt: row.updatedAt instanceof Date ? row.updatedAt.toISOString() : row.updatedAt,
},
+
message: 'Row inserted successfully',
},
})
@@ -268,113 +264,35 @@ export const GET = withRouteHandler(
return NextResponse.json({ error: 'Invalid workspace ID' }, { status: 400 })
}
- const baseConditions = [
- eq(userTableRows.tableId, tableId),
- eq(userTableRows.workspaceId, validated.workspaceId),
- ]
-
- const schema = table.schema as TableSchema
-
- if (validated.filter) {
- const filterClause = buildFilterClause(
- validated.filter as Filter,
- USER_TABLE_ROWS_SQL_NAME,
- schema.columns
- )
- if (filterClause) {
- baseConditions.push(filterClause)
- }
- }
-
- let query = db
- .select({
- id: userTableRows.id,
- data: userTableRows.data,
- position: userTableRows.position,
- createdAt: userTableRows.createdAt,
- updatedAt: userTableRows.updatedAt,
- })
- .from(userTableRows)
- .where(and(...baseConditions))
-
- if (validated.sort) {
- const sortClause = buildSortClause(validated.sort, USER_TABLE_ROWS_SQL_NAME, schema.columns)
- if (sortClause) {
- query = query.orderBy(sortClause) as typeof query
- } else {
- query = query.orderBy(userTableRows.position) as typeof query
- }
- } else {
- query = query.orderBy(userTableRows.position) as typeof query
- }
-
- let totalCount: number | null = null
- if (validated.includeTotal) {
- const [{ count }] = await db
- .select({ count: sql`count(*)` })
- .from(userTableRows)
- .where(and(...baseConditions))
- totalCount = Number(count)
- }
-
- const rows = await query.limit(validated.limit).offset(validated.offset)
-
- // Sidecar: fetch per-(row, group) execution state and group into a map
- // so the response preserves the legacy `row.executions[groupId]` wire
- // shape. One indexed-IN scan against table_row_executions.
- const executionsByRow = new Map()
- if (rows.length > 0) {
- const execRows = await db
- .select()
- .from(tableRowExecutions)
- .where(
- inArray(
- tableRowExecutions.rowId,
- rows.map((r) => r.id)
- )
- )
- for (const e of execRows) {
- const existing = executionsByRow.get(e.rowId) ?? {}
- const meta: RowExecutionMetadata = {
- status: e.status as RowExecutionMetadata['status'],
- executionId: e.executionId ?? null,
- jobId: e.jobId ?? null,
- workflowId: e.workflowId,
- error: e.error ?? null,
- ...(e.runningBlockIds && e.runningBlockIds.length > 0
- ? { runningBlockIds: e.runningBlockIds }
- : {}),
- ...(e.blockErrors && Object.keys(e.blockErrors as Record).length > 0
- ? { blockErrors: e.blockErrors as Record }
- : {}),
- ...(e.cancelledAt ? { cancelledAt: e.cancelledAt.toISOString() } : {}),
- }
- existing[e.groupId] = meta
- executionsByRow.set(e.rowId, existing)
- }
- }
-
- logger.info(
- `[${requestId}] Queried ${rows.length} rows from table ${tableId} (total: ${totalCount ?? 'n/a'})`
+ const result = await queryRows(
+ table,
+ {
+ filter: validated.filter as Filter | undefined,
+ sort: validated.sort,
+ limit: validated.limit,
+ offset: validated.offset,
+ includeTotal: validated.includeTotal,
+ },
+ requestId
)
return NextResponse.json({
success: true,
data: {
- rows: rows.map((r) => ({
+ rows: result.rows.map((r) => ({
id: r.id,
data: r.data,
- executions: executionsByRow.get(r.id) ?? {},
+ executions: r.executions,
position: r.position,
createdAt:
r.createdAt instanceof Date ? r.createdAt.toISOString() : String(r.createdAt),
updatedAt:
r.updatedAt instanceof Date ? r.updatedAt.toISOString() : String(r.updatedAt),
})),
- rowCount: rows.length,
- totalCount,
- limit: validated.limit,
- offset: validated.offset,
+ rowCount: result.rowCount,
+ totalCount: result.totalCount,
+ limit: result.limit,
+ offset: result.offset,
},
})
} catch (error) {
diff --git a/apps/sim/app/api/table/import-async/route.test.ts b/apps/sim/app/api/table/import-async/route.test.ts
new file mode 100644
index 00000000000..8ecdd2a923a
--- /dev/null
+++ b/apps/sim/app/api/table/import-async/route.test.ts
@@ -0,0 +1,123 @@
+/**
+ * @vitest-environment node
+ */
+import { hybridAuthMockFns, permissionsMock, permissionsMockFns } from '@sim/testing'
+import { NextRequest } from 'next/server'
+import { beforeEach, describe, expect, it, vi } from 'vitest'
+
+const {
+ mockCreateTable,
+ mockGetLimits,
+ mockListTables,
+ mockRunTableImport,
+ mockRunDetached,
+ MockTableConflictError,
+} = vi.hoisted(() => ({
+ mockCreateTable: vi.fn(),
+ mockGetLimits: vi.fn(),
+ mockListTables: vi.fn(),
+ mockRunTableImport: vi.fn(),
+ mockRunDetached: vi.fn(),
+ MockTableConflictError: class extends Error {
+ readonly code = 'TABLE_EXISTS' as const
+ },
+}))
+
+vi.mock('@sim/utils/id', () => ({
+ generateId: vi.fn().mockReturnValue('import-id-123'),
+ generateShortId: vi.fn().mockReturnValue('short-id'),
+}))
+
+vi.mock('@/lib/table', () => ({
+ createTable: mockCreateTable,
+ getWorkspaceTableLimits: mockGetLimits,
+ listTables: mockListTables,
+ sanitizeName: (name: string) => name.replace(/[^a-zA-Z0-9_]/g, '_'),
+ TABLE_LIMITS: { MAX_TABLE_NAME_LENGTH: 128 },
+ TableConflictError: MockTableConflictError,
+}))
+vi.mock('@/lib/table/import-runner', () => ({ runTableImport: mockRunTableImport }))
+vi.mock('@/lib/core/utils/background', () => ({
+ runDetached: mockRunDetached.mockImplementation(
+ (_label: string, work: () => Promise) => {
+ void work()
+ }
+ ),
+}))
+vi.mock('@/lib/workspaces/permissions/utils', () => permissionsMock)
+
+import { POST } from '@/app/api/table/import-async/route'
+
+function makeRequest(body: unknown): NextRequest {
+ return new NextRequest('http://localhost:3000/api/table/import-async', {
+ method: 'POST',
+ headers: { 'content-type': 'application/json' },
+ body: JSON.stringify(body),
+ })
+}
+
+const validBody = {
+ workspaceId: 'workspace-1',
+ fileKey: 'workspace/workspace-1/123-data.csv',
+ fileName: 'data.csv',
+}
+
+describe('POST /api/table/import-async', () => {
+ beforeEach(() => {
+ vi.clearAllMocks()
+ hybridAuthMockFns.mockCheckSessionOrInternalAuth.mockResolvedValue({
+ success: true,
+ userId: 'user-1',
+ authType: 'session',
+ })
+ permissionsMockFns.mockGetUserEntityPermissions.mockResolvedValue('write')
+ mockGetLimits.mockResolvedValue({ maxRowsPerTable: 1_000_000, maxTables: 50 })
+ mockListTables.mockResolvedValue([])
+ mockCreateTable.mockResolvedValue({ id: 'tbl_async', name: 'data' })
+ mockRunTableImport.mockResolvedValue(undefined)
+ })
+
+ it('creates an importing table and kicks off the background import', async () => {
+ const response = await POST(makeRequest(validBody))
+ const data = await response.json()
+
+ expect(response.status).toBe(200)
+ expect(data.data).toEqual({ tableId: 'tbl_async', importId: 'import-id-123' })
+ expect(mockCreateTable).toHaveBeenCalledWith(
+ expect.objectContaining({ importStatus: 'importing', importId: 'import-id-123' }),
+ expect.any(String)
+ )
+ expect(mockRunTableImport).toHaveBeenCalledWith(
+ expect.objectContaining({ tableId: 'tbl_async', mode: 'create', delimiter: ',' })
+ )
+ })
+
+ it('uses a tab delimiter for .tsv files', async () => {
+ await POST(makeRequest({ ...validBody, fileName: 'data.tsv' }))
+ expect(mockRunTableImport).toHaveBeenCalledWith(expect.objectContaining({ delimiter: '\t' }))
+ })
+
+ it('returns 400 for unsupported extensions', async () => {
+ const response = await POST(makeRequest({ ...validBody, fileName: 'data.json' }))
+ expect(response.status).toBe(400)
+ expect(mockCreateTable).not.toHaveBeenCalled()
+ })
+
+ it('returns 401 when unauthenticated', async () => {
+ hybridAuthMockFns.mockCheckSessionOrInternalAuth.mockResolvedValue({ success: false })
+ const response = await POST(makeRequest(validBody))
+ expect(response.status).toBe(401)
+ })
+
+ it('returns 403 without write permission', async () => {
+ permissionsMockFns.mockGetUserEntityPermissions.mockResolvedValue('read')
+ const response = await POST(makeRequest(validBody))
+ expect(response.status).toBe(403)
+ expect(mockCreateTable).not.toHaveBeenCalled()
+ })
+
+ it('returns 400 when the body is missing required fields', async () => {
+ const response = await POST(makeRequest({ workspaceId: 'workspace-1' }))
+ expect(response.status).toBe(400)
+ })
+})
diff --git a/apps/sim/app/api/table/import-async/route.ts b/apps/sim/app/api/table/import-async/route.ts
new file mode 100644
index 00000000000..43fefeca9a6
--- /dev/null
+++ b/apps/sim/app/api/table/import-async/route.ts
@@ -0,0 +1,115 @@
+import { createLogger } from '@sim/logger'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { importTableAsyncContract } from '@/lib/api/contracts/tables'
+import { parseRequest } from '@/lib/api/server'
+import { checkSessionOrInternalAuth } from '@/lib/auth/hybrid'
+import { runDetached } from '@/lib/core/utils/background'
+import { generateRequestId } from '@/lib/core/utils/request'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import {
+ createTable,
+ getWorkspaceTableLimits,
+ listTables,
+ sanitizeName,
+ TABLE_LIMITS,
+ TableConflictError,
+} from '@/lib/table'
+import { runTableImport } from '@/lib/table/import-runner'
+import { getUserEntityPermissions } from '@/lib/workspaces/permissions/utils'
+
+const logger = createLogger('TableImportAsync')
+
+export const runtime = 'nodejs'
+export const dynamic = 'force-dynamic'
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateRequestId()
+
+ const authResult = await checkSessionOrInternalAuth(request, { requireWorkflowId: false })
+ if (!authResult.success || !authResult.userId) {
+ return NextResponse.json({ error: 'Authentication required' }, { status: 401 })
+ }
+ const userId = authResult.userId
+
+ const parsed = await parseRequest(importTableAsyncContract, request, {})
+ if (!parsed.success) return parsed.response
+ const { workspaceId, fileKey, fileName } = parsed.data.body
+
+ const permission = await getUserEntityPermissions(userId, 'workspace', workspaceId)
+ if (permission !== 'write' && permission !== 'admin') {
+ return NextResponse.json({ error: 'Access denied' }, { status: 403 })
+ }
+ // The fileKey is client-supplied — ensure it points at this workspace's storage prefix so a
+ // caller can't import another workspace's uploaded object.
+ if (!fileKey.startsWith(`workspace/${workspaceId}/`)) {
+ return NextResponse.json({ error: 'Invalid file key for workspace' }, { status: 400 })
+ }
+
+ const ext = fileName.split('.').pop()?.toLowerCase()
+ if (ext !== 'csv' && ext !== 'tsv') {
+ return NextResponse.json({ error: 'Only CSV and TSV files are supported' }, { status: 400 })
+ }
+ const delimiter = ext === 'tsv' ? '\t' : ','
+
+ const planLimits = await getWorkspaceTableLimits(workspaceId)
+ const baseName = sanitizeName(fileName.replace(/\.[^.]+$/, ''), 'imported_table').slice(
+ 0,
+ TABLE_LIMITS.MAX_TABLE_NAME_LENGTH
+ )
+ // Re-importing the same file shouldn't fail on a name collision — pick the next free
+ // `name_2`, `name_3`, … (matching how "New table" auto-names), keeping under the cap.
+ const existingNames = new Set(
+ (await listTables(workspaceId, { scope: 'all' })).map((t) => t.name.toLowerCase())
+ )
+ let tableName = baseName
+ for (let n = 2; existingNames.has(tableName.toLowerCase()); n++) {
+ const suffix = `_${n}`
+ tableName = `${baseName.slice(0, TABLE_LIMITS.MAX_TABLE_NAME_LENGTH - suffix.length)}${suffix}`
+ }
+ const importId = generateId()
+
+ // Placeholder schema satisfies createTable's validation; the import worker infers the
+ // real columns from the file and overwrites it before any rows become visible.
+ let table: Awaited>
+ try {
+ table = await createTable(
+ {
+ name: tableName,
+ description: `Imported from ${fileName}`,
+ schema: { columns: [{ name: 'column_1', type: 'string' }] },
+ workspaceId,
+ userId,
+ maxRows: planLimits.maxRowsPerTable,
+ maxTables: planLimits.maxTables,
+ importStatus: 'importing',
+ importId,
+ },
+ requestId
+ )
+ } catch (error) {
+ if (error instanceof TableConflictError) {
+ return NextResponse.json({ error: error.message }, { status: 409 })
+ }
+ if (error instanceof Error && error.message.includes('maximum table limit')) {
+ return NextResponse.json({ error: error.message }, { status: 400 })
+ }
+ throw error
+ }
+
+ runDetached('table-import', () =>
+ runTableImport({
+ importId,
+ tableId: table.id,
+ workspaceId,
+ userId,
+ fileKey,
+ fileName,
+ delimiter,
+ mode: 'create',
+ })
+ )
+
+ logger.info(`[${requestId}] Async CSV import started`, { tableId: table.id, importId, fileName })
+ return NextResponse.json({ success: true, data: { tableId: table.id, importId } })
+})
diff --git a/apps/sim/app/api/table/import-csv/route.test.ts b/apps/sim/app/api/table/import-csv/route.test.ts
index 9844bf69664..dc0bb0a53a5 100644
--- a/apps/sim/app/api/table/import-csv/route.test.ts
+++ b/apps/sim/app/api/table/import-csv/route.test.ts
@@ -5,10 +5,11 @@ import { hybridAuthMockFns, permissionsMock, permissionsMockFns } from '@sim/tes
import type { NextRequest } from 'next/server'
import { beforeEach, describe, expect, it, vi } from 'vitest'
-const { mockCreateTable, mockParseCsvBuffer, mockGetWorkspaceTableLimits } = vi.hoisted(() => ({
+const { mockCreateTable, mockBatchInsertRows, mockDeleteTable, mockGetLimits } = vi.hoisted(() => ({
mockCreateTable: vi.fn(),
- mockParseCsvBuffer: vi.fn(),
- mockGetWorkspaceTableLimits: vi.fn(),
+ mockBatchInsertRows: vi.fn(),
+ mockDeleteTable: vi.fn(),
+ mockGetLimits: vi.fn(),
}))
vi.mock('@sim/utils/id', () => ({
@@ -16,46 +17,83 @@ vi.mock('@sim/utils/id', () => ({
generateShortId: vi.fn().mockReturnValue('short-id'),
}))
-vi.mock('@/lib/table', () => ({
- batchInsertRows: vi.fn(),
- CSV_MAX_BATCH_SIZE: 1000,
- CSV_MAX_FILE_SIZE_BYTES: 25 * 1024 * 1024,
- coerceRowsForTable: vi.fn(),
+// Mock only the DB-backed service/billing functions; the real `./import` helpers
+// (createCsvParser, inferSchemaFromCsv, coerceRowsForTable, …) run for real so the
+// streaming multipart + CSV pipeline is exercised end-to-end.
+vi.mock('@/lib/table/service', () => ({
createTable: mockCreateTable,
- deleteTable: vi.fn(),
- getWorkspaceTableLimits: mockGetWorkspaceTableLimits,
- inferSchemaFromCsv: vi.fn(),
- parseCsvBuffer: mockParseCsvBuffer,
- sanitizeName: vi.fn((name: string) => name),
- TABLE_LIMITS: {
- MAX_TABLE_NAME_LENGTH: 64,
- },
+ batchInsertRows: mockBatchInsertRows,
+ deleteTable: mockDeleteTable,
}))
-
-vi.mock('@/app/api/table/utils', () => ({
- normalizeColumn: vi.fn((column) => column),
-}))
-
+vi.mock('@/lib/table/billing', () => ({ getWorkspaceTableLimits: mockGetLimits }))
+vi.mock('@/app/api/table/utils', async () => {
+ const { NextResponse } = await import('next/server')
+ return {
+ normalizeColumn: (column: unknown) => column,
+ csvProxyBodyCapResponse: () => null,
+ multipartErrorResponse: (error: { code: string; message: string }) =>
+ NextResponse.json(
+ { error: error.message },
+ { status: error.code === 'FILE_TOO_LARGE' ? 413 : 400 }
+ ),
+ }
+})
vi.mock('@/lib/workspaces/permissions/utils', () => permissionsMock)
import { POST } from '@/app/api/table/import-csv/route'
-function createCsvFile(contents: string, name = 'data.csv', type = 'text/csv'): File {
- return new File([contents], name, { type })
+type Part =
+ | { name: string; value: string }
+ | { name: string; filename: string; value: string; contentType?: string }
+
+const BOUNDARY = '----testboundaryCSV'
+
+function buildBody(parts: Part[]): Buffer {
+ const segments: Buffer[] = []
+ for (const part of parts) {
+ let header = `--${BOUNDARY}\r\nContent-Disposition: form-data; name="${part.name}"`
+ if ('filename' in part) {
+ header += `; filename="${part.filename}"\r\nContent-Type: ${part.contentType ?? 'text/csv'}`
+ }
+ header += '\r\n\r\n'
+ segments.push(Buffer.from(header, 'utf8'), Buffer.from(part.value, 'utf8'), Buffer.from('\r\n'))
+ }
+ segments.push(Buffer.from(`--${BOUNDARY}--\r\n`, 'utf8'))
+ return Buffer.concat(segments)
}
-function createFormData(file: File): FormData {
- const form = new FormData()
- form.append('file', file)
- form.append('workspaceId', 'workspace-1')
- return form
+function makeRequest(parts: Part[], chunkSize?: number): NextRequest {
+ const body = buildBody(parts)
+ const stream = new ReadableStream({
+ start(controller) {
+ if (chunkSize) {
+ for (let i = 0; i < body.length; i += chunkSize) {
+ controller.enqueue(new Uint8Array(body.subarray(i, i + chunkSize)))
+ }
+ } else {
+ controller.enqueue(new Uint8Array(body))
+ }
+ controller.close()
+ },
+ })
+ return {
+ headers: new Headers({ 'content-type': `multipart/form-data; boundary=${BOUNDARY}` }),
+ body: stream,
+ signal: undefined,
+ } as unknown as NextRequest
}
-async function callPost(form: FormData) {
- const req = {
- formData: async () => form,
- } as unknown as NextRequest
- return POST(req)
+function csvWithRows(count: number): string {
+ const lines = ['name,age']
+ for (let i = 0; i < count; i++) lines.push(`Person${i},${20 + (i % 50)}`)
+ return `${lines.join('\n')}\n`
+}
+
+function uploadParts(csv: string): Part[] {
+ return [
+ { name: 'workspaceId', value: 'workspace-1' },
+ { name: 'file', filename: 'data.csv', value: csv },
+ ]
}
describe('POST /api/table/import-csv', () => {
@@ -67,38 +105,93 @@ describe('POST /api/table/import-csv', () => {
authType: 'session',
})
permissionsMockFns.mockGetUserEntityPermissions.mockResolvedValue('write')
- mockGetWorkspaceTableLimits.mockResolvedValue({
- maxRowsPerTable: 1000,
- maxTables: 10,
- })
+ mockGetLimits.mockResolvedValue({ maxRowsPerTable: 1_000_000, maxTables: 50 })
+ mockCreateTable.mockImplementation(async (data) => ({
+ id: 'tbl_1',
+ name: data.name,
+ description: data.description ?? null,
+ schema: data.schema,
+ workspaceId: data.workspaceId,
+ maxRows: data.maxRows,
+ rowCount: 0,
+ createdBy: 'user-1',
+ archivedAt: null,
+ createdAt: new Date(),
+ updatedAt: new Date(),
+ }))
+ mockBatchInsertRows.mockImplementation(async ({ rows }: { rows: unknown[] }) =>
+ rows.map((_, i) => ({ id: `row-${i}` }))
+ )
+ mockDeleteTable.mockResolvedValue(undefined)
})
- it('returns 413 for oversized CSV files before reading their contents or creating a table', async () => {
- const file = createCsvFile('name,age\nAlice,30')
- Object.defineProperty(file, 'size', {
- value: 26 * 1024 * 1024,
- })
- const arrayBufferSpy = vi.spyOn(file, 'arrayBuffer')
+ it('streams a CSV upload into a new table and reports the row count', async () => {
+ const response = await POST(makeRequest(uploadParts(csvWithRows(250))))
+ const data = await response.json()
- const response = await callPost(createFormData(file))
+ expect(response.status).toBe(200)
+ expect(mockCreateTable).toHaveBeenCalledTimes(1)
+ expect(data.data.table.id).toBe('tbl_1')
+ expect(data.data.table.rowCount).toBe(250)
+ // 250 rows = a 100-row schema-sample batch + a 150-row remainder batch.
+ expect(mockBatchInsertRows).toHaveBeenCalledTimes(2)
+ })
+
+ it('parses a body delivered in tiny chunks (regression: missing final boundary)', async () => {
+ const response = await POST(makeRequest(uploadParts(csvWithRows(5)), 7))
const data = await response.json()
- expect(response.status).toBe(413)
- expect(data.error).toMatch(/CSV import file exceeds maximum size/)
- expect(arrayBufferSpy).not.toHaveBeenCalled()
- expect(mockParseCsvBuffer).not.toHaveBeenCalled()
+ expect(response.status).toBe(200)
+ expect(data.data.table.rowCount).toBe(5)
+ })
+
+ it('returns 400 for a CSV with no data rows', async () => {
+ const response = await POST(makeRequest(uploadParts('name,age\n')))
+ const data = await response.json()
+
+ expect(response.status).toBe(400)
+ expect(data.error).toMatch(/no data rows/i)
+ expect(mockCreateTable).not.toHaveBeenCalled()
+ })
+
+ it('returns 400 when the file precedes required fields', async () => {
+ const response = await POST(
+ makeRequest([
+ { name: 'file', filename: 'data.csv', value: csvWithRows(3) },
+ { name: 'workspaceId', value: 'workspace-1' },
+ ])
+ )
+
+ expect(response.status).toBe(400)
expect(mockCreateTable).not.toHaveBeenCalled()
})
- it('accepts chunked multipart requests without a content-length header', async () => {
- const req = {
- headers: new Headers({ 'transfer-encoding': 'chunked' }),
- formData: vi.fn(async () => createFormData(createCsvFile('name\nAlice'))),
- } as unknown as NextRequest
+ it('returns 400 when no file part is present', async () => {
+ const response = await POST(makeRequest([{ name: 'workspaceId', value: 'workspace-1' }]))
+ expect(response.status).toBe(400)
+ expect(mockCreateTable).not.toHaveBeenCalled()
+ })
+
+ it('rolls back the created table when a batch insert fails mid-stream', async () => {
+ mockBatchInsertRows
+ .mockResolvedValueOnce(Array.from({ length: 100 }, () => ({ id: 'row' })))
+ .mockRejectedValueOnce(new Error('insert boom'))
+
+ const response = await POST(makeRequest(uploadParts(csvWithRows(250))))
- const response = await POST(req)
+ expect(response.status).toBe(500)
+ expect(mockDeleteTable).toHaveBeenCalledWith('tbl_1', expect.any(String))
+ })
+
+ it('returns 401 when unauthenticated', async () => {
+ hybridAuthMockFns.mockCheckSessionOrInternalAuth.mockResolvedValue({ success: false })
+ const response = await POST(makeRequest(uploadParts(csvWithRows(3))))
+ expect(response.status).toBe(401)
+ })
- expect(response.status).not.toBe(411)
- expect(req.formData).toHaveBeenCalled()
+ it('returns 403 without write permission', async () => {
+ permissionsMockFns.mockGetUserEntityPermissions.mockResolvedValue('read')
+ const response = await POST(makeRequest(uploadParts(csvWithRows(3))))
+ expect(response.status).toBe(403)
})
})
diff --git a/apps/sim/app/api/table/import-csv/route.ts b/apps/sim/app/api/table/import-csv/route.ts
index 31927889202..4ab4d26920e 100644
--- a/apps/sim/app/api/table/import-csv/route.ts
+++ b/apps/sim/app/api/table/import-csv/route.ts
@@ -1,3 +1,4 @@
+import type { Readable } from 'node:stream'
import { createLogger } from '@sim/logger'
import { toError } from '@sim/utils/errors'
import { generateId } from '@sim/utils/id'
@@ -5,163 +6,213 @@ import { type NextRequest, NextResponse } from 'next/server'
import { csvExtensionSchema, csvImportFormSchema } from '@/lib/api/contracts/tables'
import { getValidationErrorMessage } from '@/lib/api/server'
import { checkSessionOrInternalAuth } from '@/lib/auth/hybrid'
+import { isMultipartError, readMultipart } from '@/lib/core/utils/multipart'
import { generateRequestId } from '@/lib/core/utils/request'
-import {
- isPayloadSizeLimitError,
- readFileToBufferWithLimit,
- readFormDataWithLimit,
-} from '@/lib/core/utils/stream-limits'
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
import {
batchInsertRows,
CSV_MAX_BATCH_SIZE,
CSV_MAX_FILE_SIZE_BYTES,
+ CSV_SCHEMA_SAMPLE_SIZE,
coerceRowsForTable,
+ createCsvParser,
createTable,
deleteTable,
getWorkspaceTableLimits,
inferSchemaFromCsv,
- parseCsvBuffer,
sanitizeName,
TABLE_LIMITS,
+ type TableDefinition,
type TableSchema,
} from '@/lib/table'
import { getUserEntityPermissions } from '@/lib/workspaces/permissions/utils'
-import { normalizeColumn } from '@/app/api/table/utils'
+import {
+ csvProxyBodyCapResponse,
+ multipartErrorResponse,
+ normalizeColumn,
+} from '@/app/api/table/utils'
const logger = createLogger('TableImportCSV')
-const MAX_MULTIPART_OVERHEAD_BYTES = 1024 * 1024
+
+export const runtime = 'nodejs'
+export const dynamic = 'force-dynamic'
+export const maxDuration = 300
export const POST = withRouteHandler(async (request: NextRequest) => {
const requestId = generateRequestId()
+ let fileStream: Readable | undefined
try {
const authResult = await checkSessionOrInternalAuth(request, { requireWorkflowId: false })
if (!authResult.success || !authResult.userId) {
return NextResponse.json({ error: 'Authentication required' }, { status: 401 })
}
+ const userId = authResult.userId
- const formData = await readFormDataWithLimit(request, {
- maxBytes: CSV_MAX_FILE_SIZE_BYTES + MAX_MULTIPART_OVERHEAD_BYTES,
- label: 'CSV import body',
- })
- const validation = csvImportFormSchema.safeParse({
- file: formData.get('file'),
- workspaceId: formData.get('workspaceId'),
- })
+ const oversize = csvProxyBodyCapResponse(request)
+ if (oversize) return oversize
+
+ let parsed: Awaited>
+ try {
+ parsed = await readMultipart(request, {
+ maxFileBytes: CSV_MAX_FILE_SIZE_BYTES,
+ requiredFieldsBeforeFile: ['workspaceId'],
+ signal: request.signal,
+ })
+ } catch (err) {
+ if (isMultipartError(err)) return multipartErrorResponse(err)
+ throw err
+ }
- if (!validation.success) {
- const message = getValidationErrorMessage(validation.error)
- const isSizeLimit = message.includes('File exceeds maximum allowed size')
+ const { fields, file } = parsed
+ if (!file) {
+ return NextResponse.json({ error: 'CSV file is required' }, { status: 400 })
+ }
+ fileStream = file.stream
+
+ const workspaceIdResult = csvImportFormSchema.shape.workspaceId.safeParse(fields.workspaceId)
+ if (!workspaceIdResult.success) {
return NextResponse.json(
- { error: isSizeLimit ? 'CSV import file exceeds maximum size' : message },
- { status: isSizeLimit ? 413 : 400 }
+ { error: getValidationErrorMessage(workspaceIdResult.error) },
+ { status: 400 }
)
}
+ const workspaceId = workspaceIdResult.data
- const { file, workspaceId } = validation.data
-
- const permission = await getUserEntityPermissions(authResult.userId, 'workspace', workspaceId)
+ const permission = await getUserEntityPermissions(userId, 'workspace', workspaceId)
if (permission !== 'write' && permission !== 'admin') {
return NextResponse.json({ error: 'Access denied' }, { status: 403 })
}
- const ext = file.name.split('.').pop()?.toLowerCase()
- const extensionValidation = csvExtensionSchema.safeParse(ext)
- if (!extensionValidation.success) {
+ const ext = file.filename.split('.').pop()?.toLowerCase()
+ const extensionResult = csvExtensionSchema.safeParse(ext)
+ if (!extensionResult.success) {
return NextResponse.json(
- { error: getValidationErrorMessage(extensionValidation.error) },
+ { error: getValidationErrorMessage(extensionResult.error) },
{ status: 400 }
)
}
+ const delimiter = extensionResult.data === 'tsv' ? '\t' : ','
- const buffer = await readFileToBufferWithLimit(file, {
- maxBytes: CSV_MAX_FILE_SIZE_BYTES,
- label: 'CSV import file',
- })
- const delimiter = extensionValidation.data === 'tsv' ? '\t' : ','
- const { headers, rows } = await parseCsvBuffer(buffer, delimiter)
+ const parser = createCsvParser(delimiter)
+ // `.pipe` doesn't forward source errors; forward them so the iterator throws.
+ file.stream.on('error', (err) => parser.destroy(err))
+ file.stream.pipe(parser)
- const { columns, headerToColumn } = inferSchemaFromCsv(headers, rows)
- const tableName = sanitizeName(file.name.replace(/\.[^.]+$/, ''), 'imported_table').slice(
- 0,
- TABLE_LIMITS.MAX_TABLE_NAME_LENGTH
- )
- const planLimits = await getWorkspaceTableLimits(workspaceId)
+ interface ImportState {
+ table: TableDefinition
+ schema: TableSchema
+ headerToColumn: Map
+ }
- const normalizedSchema: TableSchema = {
- columns: columns.map(normalizeColumn),
+ const insertRows = async (rows: Record[], state: ImportState) => {
+ if (rows.length === 0) return 0
+ const coerced = coerceRowsForTable(rows, state.schema, state.headerToColumn)
+ const result = await batchInsertRows(
+ { tableId: state.table.id, rows: coerced, workspaceId, userId },
+ state.table,
+ generateId().slice(0, 8)
+ )
+ return result.length
}
- const table = await createTable(
- {
- name: tableName,
- description: `Imported from ${file.name}`,
- schema: normalizedSchema,
- workspaceId,
- userId: authResult.userId,
- maxRows: planLimits.maxRowsPerTable,
- maxTables: planLimits.maxTables,
- },
- requestId
- )
+ /** Infer the schema from the buffered sample and create the (empty) table. */
+ const buildTable = async (sampleRows: Record[]): Promise => {
+ const inferred = inferSchemaFromCsv(Object.keys(sampleRows[0]), sampleRows)
+ const schema: TableSchema = { columns: inferred.columns.map(normalizeColumn) }
+ const planLimits = await getWorkspaceTableLimits(workspaceId)
+ const tableName = sanitizeName(file.filename.replace(/\.[^.]+$/, ''), 'imported_table').slice(
+ 0,
+ TABLE_LIMITS.MAX_TABLE_NAME_LENGTH
+ )
+ const table = await createTable(
+ {
+ name: tableName,
+ description: `Imported from ${file.filename}`,
+ schema,
+ workspaceId,
+ userId,
+ maxRows: planLimits.maxRowsPerTable,
+ maxTables: planLimits.maxTables,
+ },
+ requestId
+ )
+ return { table, schema, headerToColumn: inferred.headerToColumn }
+ }
+
+ let state: ImportState | null = null
+ let inserted = 0
+ const sample: Record[] = []
+ let batch: Record[] = []
try {
- const coerced = coerceRowsForTable(rows, normalizedSchema, headerToColumn)
- let inserted = 0
- for (let i = 0; i < coerced.length; i += CSV_MAX_BATCH_SIZE) {
- const batch = coerced.slice(i, i + CSV_MAX_BATCH_SIZE)
- const batchRequestId = generateId().slice(0, 8)
- const result = await batchInsertRows(
- { tableId: table.id, rows: batch, workspaceId, userId: authResult.userId },
- table,
- batchRequestId
- )
- inserted += result.length
+ for await (const record of parser as AsyncIterable>) {
+ if (!state) {
+ sample.push(record)
+ if (sample.length >= CSV_SCHEMA_SAMPLE_SIZE) {
+ state = await buildTable(sample)
+ inserted += await insertRows(sample, state)
+ }
+ continue
+ }
+ batch.push(record)
+ if (batch.length >= CSV_MAX_BATCH_SIZE) {
+ inserted += await insertRows(batch, state)
+ batch = []
+ }
}
- logger.info(`[${requestId}] CSV imported`, {
- tableId: table.id,
- fileName: file.name,
- columns: columns.length,
- rows: inserted,
- })
+ if (!state) {
+ if (sample.length === 0) {
+ return NextResponse.json({ error: 'CSV file has no data rows' }, { status: 400 })
+ }
+ state = await buildTable(sample)
+ inserted += await insertRows(sample, state)
+ } else {
+ inserted += await insertRows(batch, state)
+ }
+ } catch (streamError) {
+ if (state) await deleteTable(state.table.id, requestId).catch(() => {})
+ throw streamError
+ }
- return NextResponse.json({
- success: true,
- data: {
- table: {
- id: table.id,
- name: table.name,
- description: table.description,
- schema: normalizedSchema,
- rowCount: inserted,
- },
+ logger.info(`[${requestId}] CSV imported`, {
+ tableId: state.table.id,
+ fileName: file.filename,
+ columns: state.schema.columns.length,
+ rows: inserted,
+ })
+
+ return NextResponse.json({
+ success: true,
+ data: {
+ table: {
+ id: state.table.id,
+ name: state.table.name,
+ description: state.table.description,
+ schema: state.schema,
+ rowCount: inserted,
},
- })
- } catch (insertError) {
- await deleteTable(table.id, requestId).catch(() => {})
- throw insertError
- }
+ },
+ })
} catch (error) {
+ if (isMultipartError(error)) return multipartErrorResponse(error)
+
const message = toError(error).message
logger.error(`[${requestId}] CSV import failed:`, error)
- const isSizeLimitError =
- isPayloadSizeLimitError(error) || message.includes('CSV import file exceeds maximum size')
const isClientError =
message.includes('maximum table limit') ||
message.includes('CSV file has no') ||
message.includes('Invalid table name') ||
message.includes('Invalid schema') ||
- message.includes('already exists') ||
- isSizeLimitError
+ message.includes('already exists')
return NextResponse.json(
{ error: isClientError ? message : 'Failed to import CSV' },
- {
- status: isSizeLimitError ? 413 : isClientError ? 400 : 500,
- }
+ { status: isClientError ? 400 : 500 }
)
+ } finally {
+ fileStream?.destroy()
}
})
diff --git a/apps/sim/app/api/table/route.ts b/apps/sim/app/api/table/route.ts
index 89a48b80896..2d97dc4f639 100644
--- a/apps/sim/app/api/table/route.ts
+++ b/apps/sim/app/api/table/route.ts
@@ -203,6 +203,10 @@ export const GET = withRouteHandler(async (request: NextRequest) => {
: t.archivedAt
? String(t.archivedAt)
: null,
+ importStatus: t.importStatus ?? null,
+ importId: t.importId ?? null,
+ importError: t.importError ?? null,
+ importRowsProcessed: t.importRowsProcessed ?? 0,
}
})
diff --git a/apps/sim/app/api/table/utils.ts b/apps/sim/app/api/table/utils.ts
index 114271a9401..eef507c94ba 100644
--- a/apps/sim/app/api/table/utils.ts
+++ b/apps/sim/app/api/table/utils.ts
@@ -5,12 +5,46 @@ import {
deleteTableColumnBodySchema,
updateTableColumnBodySchema,
} from '@/lib/api/contracts/tables'
+import type { MultipartError } from '@/lib/core/utils/multipart'
import type { ColumnDefinition, TableDefinition } from '@/lib/table'
import { getTableById } from '@/lib/table'
import { getUserEntityPermissions } from '@/lib/workspaces/permissions/utils'
const logger = createLogger('TableUtils')
+/**
+ * Next.js buffers the request body for the proxy and silently truncates it past this
+ * size (`experimental.proxyClientMaxBodySize`, default 10MB). The synchronous CSV
+ * import routes reject bodies over the cap up front; larger files use the async
+ * direct-to-storage path instead.
+ */
+export const CSV_IMPORT_PROXY_BODY_CAP_BYTES = 10 * 1024 * 1024
+
+/** 413 response when a synchronous CSV upload would exceed (and be truncated at) the proxy cap; `null` otherwise. */
+export function csvProxyBodyCapResponse(request: { headers: Headers }): NextResponse | null {
+ const contentLength = Number(request.headers.get('content-length') ?? 0)
+ if (contentLength > CSV_IMPORT_PROXY_BODY_CAP_BYTES) {
+ return NextResponse.json(
+ {
+ error:
+ 'File too large to import through the server. Files over 10MB import in the background.',
+ },
+ { status: 413 }
+ )
+ }
+ return null
+}
+
+/** Maps a {@link MultipartError} from the streaming CSV parser to its HTTP response. */
+export function multipartErrorResponse(error: MultipartError): NextResponse {
+ if (error.code === 'FILE_TOO_LARGE') {
+ return NextResponse.json({ error: 'CSV import file exceeds maximum size' }, { status: 413 })
+ }
+ const message =
+ error.code === 'NO_FILE' ? 'CSV file is required' : `Invalid CSV upload: ${error.message}`
+ return NextResponse.json({ error: message }, { status: 400 })
+}
+
interface TableAccessResult {
hasAccess: true
table: TableDefinition
diff --git a/apps/sim/app/api/tools/clickhouse/count-rows/route.ts b/apps/sim/app/api/tools/clickhouse/count-rows/route.ts
new file mode 100644
index 00000000000..5b7b90821ca
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/count-rows/route.ts
@@ -0,0 +1,42 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseCountRowsContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseCountRows } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseCountRowsAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse count rows attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseCountRowsContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ const count = await executeClickHouseCountRows(params, params.table, params.where)
+
+ return NextResponse.json({
+ message: `Table contains ${count} row(s).`,
+ count,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse count rows failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse count rows failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/create-database/route.ts b/apps/sim/app/api/tools/clickhouse/create-database/route.ts
new file mode 100644
index 00000000000..b748a20595e
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/create-database/route.ts
@@ -0,0 +1,43 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseCreateDatabaseContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseCreateDatabase } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseCreateDatabaseAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse create database attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseCreateDatabaseContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ await executeClickHouseCreateDatabase(params, params.name)
+
+ return NextResponse.json({
+ message: `Database '${params.name}' created.`,
+ rows: [],
+ rowCount: 0,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse create database failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse create database failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/create-table/route.ts b/apps/sim/app/api/tools/clickhouse/create-table/route.ts
new file mode 100644
index 00000000000..47cc3ff5f7f
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/create-table/route.ts
@@ -0,0 +1,50 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseCreateTableContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseCreateTable } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseCreateTableAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse create table attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseCreateTableContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ await executeClickHouseCreateTable(
+ params,
+ params.table,
+ params.columns,
+ params.engine,
+ params.orderBy,
+ params.partitionBy
+ )
+
+ return NextResponse.json({
+ message: `Table '${params.table}' created.`,
+ rows: [],
+ rowCount: 0,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse create table failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse create table failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/delete/route.ts b/apps/sim/app/api/tools/clickhouse/delete/route.ts
new file mode 100644
index 00000000000..f773aabba4a
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/delete/route.ts
@@ -0,0 +1,49 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseDeleteContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseDelete } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseDeleteAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse delete attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseDeleteContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ logger.info(
+ `[${requestId}] Deleting data from ${params.table} on ${params.host}:${params.port}/${params.database}`
+ )
+
+ const result = await executeClickHouseDelete(params, params.table, params.where)
+
+ logger.info(`[${requestId}] Delete mutation submitted, ${result.rowCount} row(s) affected`)
+
+ return NextResponse.json({
+ message: `Delete mutation submitted. ClickHouse mutations run asynchronously. ${result.rowCount} row(s) affected.`,
+ rows: result.rows,
+ rowCount: result.rowCount,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse delete failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse delete failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/describe-table/route.ts b/apps/sim/app/api/tools/clickhouse/describe-table/route.ts
new file mode 100644
index 00000000000..e258d781bc1
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/describe-table/route.ts
@@ -0,0 +1,43 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseDescribeTableContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseDescribeTable } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseDescribeTableAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse describe table attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseDescribeTableContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ const result = await executeClickHouseDescribeTable(params, params.table)
+
+ return NextResponse.json({
+ message: `Described table with ${result.rowCount} column(s).`,
+ rows: result.rows,
+ rowCount: result.rowCount,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse describe table failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse describe table failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/drop-database/route.ts b/apps/sim/app/api/tools/clickhouse/drop-database/route.ts
new file mode 100644
index 00000000000..e06f897b337
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/drop-database/route.ts
@@ -0,0 +1,43 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseDropDatabaseContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseDropDatabase } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseDropDatabaseAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse drop database attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseDropDatabaseContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ await executeClickHouseDropDatabase(params, params.name)
+
+ return NextResponse.json({
+ message: `Database '${params.name}' dropped.`,
+ rows: [],
+ rowCount: 0,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse drop database failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse drop database failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/drop-partition/route.ts b/apps/sim/app/api/tools/clickhouse/drop-partition/route.ts
new file mode 100644
index 00000000000..790526586ba
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/drop-partition/route.ts
@@ -0,0 +1,43 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseDropPartitionContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseDropPartition } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseDropPartitionAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse drop partition attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseDropPartitionContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ await executeClickHouseDropPartition(params, params.table, params.partition)
+
+ return NextResponse.json({
+ message: `Dropped partition from table '${params.table}'.`,
+ rows: [],
+ rowCount: 0,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse drop partition failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse drop partition failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/drop-table/route.ts b/apps/sim/app/api/tools/clickhouse/drop-table/route.ts
new file mode 100644
index 00000000000..1ae9f6832a8
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/drop-table/route.ts
@@ -0,0 +1,43 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseDropTableContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseDropTable } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseDropTableAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse drop table attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseDropTableContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ await executeClickHouseDropTable(params, params.table)
+
+ return NextResponse.json({
+ message: `Table '${params.table}' dropped.`,
+ rows: [],
+ rowCount: 0,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse drop table failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse drop table failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/execute/route.ts b/apps/sim/app/api/tools/clickhouse/execute/route.ts
new file mode 100644
index 00000000000..3e2c4baacf6
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/execute/route.ts
@@ -0,0 +1,49 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseExecuteContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseQuery } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseExecuteAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse execute attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseExecuteContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ logger.info(
+ `[${requestId}] Executing ClickHouse statement on ${params.host}:${params.port}/${params.database}`
+ )
+
+ const result = await executeClickHouseQuery(params, params.query)
+
+ logger.info(`[${requestId}] Statement executed successfully, ${result.rowCount} row(s)`)
+
+ return NextResponse.json({
+ message: `Statement executed successfully. ${result.rowCount} row(s) returned or affected.`,
+ rows: result.rows,
+ rowCount: result.rowCount,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse execute failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse execute failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/insert-rows/route.ts b/apps/sim/app/api/tools/clickhouse/insert-rows/route.ts
new file mode 100644
index 00000000000..fb4f90b8634
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/insert-rows/route.ts
@@ -0,0 +1,43 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseInsertRowsContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseInsertRows } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseInsertRowsAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse insert rows attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseInsertRowsContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ const result = await executeClickHouseInsertRows(params, params.table, params.rows)
+
+ return NextResponse.json({
+ message: `Inserted ${result.rowCount} row(s) into '${params.table}'.`,
+ rows: result.rows,
+ rowCount: result.rowCount,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse insert rows failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse insert rows failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/insert/route.ts b/apps/sim/app/api/tools/clickhouse/insert/route.ts
new file mode 100644
index 00000000000..a7cc4ed908f
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/insert/route.ts
@@ -0,0 +1,49 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseInsertContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseInsert } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseInsertAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse insert attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseInsertContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ logger.info(
+ `[${requestId}] Inserting data into ${params.table} on ${params.host}:${params.port}/${params.database}`
+ )
+
+ const result = await executeClickHouseInsert(params, params.table, params.data)
+
+ logger.info(`[${requestId}] Insert executed successfully, ${result.rowCount} row(s) inserted`)
+
+ return NextResponse.json({
+ message: `Data inserted successfully. ${result.rowCount} row(s) affected.`,
+ rows: result.rows,
+ rowCount: result.rowCount,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse insert failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse insert failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/introspect/route.ts b/apps/sim/app/api/tools/clickhouse/introspect/route.ts
new file mode 100644
index 00000000000..cd3257c6275
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/introspect/route.ts
@@ -0,0 +1,50 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseIntrospectContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseIntrospect } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseIntrospectAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse introspect attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseIntrospectContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ logger.info(
+ `[${requestId}] Introspecting ClickHouse schema on ${params.host}:${params.port}/${params.database}`
+ )
+
+ const result = await executeClickHouseIntrospect(params)
+
+ logger.info(
+ `[${requestId}] Introspection completed successfully, found ${result.tables.length} tables`
+ )
+
+ return NextResponse.json({
+ message: `Schema introspection completed. Found ${result.tables.length} table(s) in database '${params.database}'.`,
+ tables: result.tables,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse introspection failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse introspection failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/kill-query/route.ts b/apps/sim/app/api/tools/clickhouse/kill-query/route.ts
new file mode 100644
index 00000000000..c46f6d1393c
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/kill-query/route.ts
@@ -0,0 +1,43 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseKillQueryContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseKillQuery } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseKillQueryAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse kill query attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseKillQueryContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ const result = await executeClickHouseKillQuery(params, params.queryId)
+
+ return NextResponse.json({
+ message: `Kill command executed for query '${params.queryId}'.`,
+ rows: result.rows,
+ rowCount: result.rowCount,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse kill query failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse kill query failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/list-clusters/route.ts b/apps/sim/app/api/tools/clickhouse/list-clusters/route.ts
new file mode 100644
index 00000000000..643c7be9621
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/list-clusters/route.ts
@@ -0,0 +1,43 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseListClustersContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseListClusters } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseListClustersAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse list clusters attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseListClustersContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ const result = await executeClickHouseListClusters(params)
+
+ return NextResponse.json({
+ message: `Found ${result.rowCount} cluster node(s).`,
+ rows: result.rows,
+ rowCount: result.rowCount,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse list clusters failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse list clusters failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/list-databases/route.ts b/apps/sim/app/api/tools/clickhouse/list-databases/route.ts
new file mode 100644
index 00000000000..c524b162474
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/list-databases/route.ts
@@ -0,0 +1,43 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseListDatabasesContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseListDatabases } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseListDatabasesAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse list databases attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseListDatabasesContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ const result = await executeClickHouseListDatabases(params)
+
+ return NextResponse.json({
+ message: `Found ${result.rowCount} database(s).`,
+ rows: result.rows,
+ rowCount: result.rowCount,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse list databases failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse list databases failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/list-mutations/route.ts b/apps/sim/app/api/tools/clickhouse/list-mutations/route.ts
new file mode 100644
index 00000000000..84034b42436
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/list-mutations/route.ts
@@ -0,0 +1,43 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseListMutationsContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseListMutations } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseListMutationsAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse list mutations attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseListMutationsContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ const result = await executeClickHouseListMutations(params, params.table, params.onlyRunning)
+
+ return NextResponse.json({
+ message: `Found ${result.rowCount} mutation(s).`,
+ rows: result.rows,
+ rowCount: result.rowCount,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse list mutations failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse list mutations failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/list-partitions/route.ts b/apps/sim/app/api/tools/clickhouse/list-partitions/route.ts
new file mode 100644
index 00000000000..d064850ad1f
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/list-partitions/route.ts
@@ -0,0 +1,43 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseListPartitionsContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseListPartitions } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseListPartitionsAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse list partitions attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseListPartitionsContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ const result = await executeClickHouseListPartitions(params, params.table)
+
+ return NextResponse.json({
+ message: `Found ${result.rowCount} partition(s).`,
+ rows: result.rows,
+ rowCount: result.rowCount,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse list partitions failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse list partitions failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/list-running-queries/route.ts b/apps/sim/app/api/tools/clickhouse/list-running-queries/route.ts
new file mode 100644
index 00000000000..d542966d5d0
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/list-running-queries/route.ts
@@ -0,0 +1,43 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseListRunningQueriesContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseListRunningQueries } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseListRunningQueriesAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse list running queries attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseListRunningQueriesContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ const result = await executeClickHouseListRunningQueries(params)
+
+ return NextResponse.json({
+ message: `Found ${result.rowCount} running query(ies).`,
+ rows: result.rows,
+ rowCount: result.rowCount,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse list running queries failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse list running queries failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/list-tables/route.ts b/apps/sim/app/api/tools/clickhouse/list-tables/route.ts
new file mode 100644
index 00000000000..4d9df7a2dc7
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/list-tables/route.ts
@@ -0,0 +1,43 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseListTablesContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseListTables } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseListTablesAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse list tables attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseListTablesContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ const result = await executeClickHouseListTables(params)
+
+ return NextResponse.json({
+ message: `Found ${result.rowCount} table(s).`,
+ rows: result.rows,
+ rowCount: result.rowCount,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse list tables failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse list tables failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/optimize-table/route.ts b/apps/sim/app/api/tools/clickhouse/optimize-table/route.ts
new file mode 100644
index 00000000000..3d22b8b3788
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/optimize-table/route.ts
@@ -0,0 +1,43 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseOptimizeTableContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseOptimizeTable } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseOptimizeTableAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse optimize table attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseOptimizeTableContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ await executeClickHouseOptimizeTable(params, params.table, params.final)
+
+ return NextResponse.json({
+ message: `Optimize submitted for table '${params.table}'.`,
+ rows: [],
+ rowCount: 0,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse optimize table failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse optimize table failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/query/route.ts b/apps/sim/app/api/tools/clickhouse/query/route.ts
new file mode 100644
index 00000000000..4d70b48b55b
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/query/route.ts
@@ -0,0 +1,46 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseQueryContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseQuery } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseQueryAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse query attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseQueryContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ logger.info(
+ `[${requestId}] Executing ClickHouse query on ${params.host}:${params.port}/${params.database}`
+ )
+
+ const result = await executeClickHouseQuery(params, params.query, { enforceReadOnly: true })
+
+ logger.info(`[${requestId}] Query executed successfully, returned ${result.rowCount} rows`)
+
+ return NextResponse.json({
+ message: `Query executed successfully. ${result.rowCount} row(s) returned.`,
+ rows: result.rows,
+ rowCount: result.rowCount,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse query failed:`, error)
+
+ return NextResponse.json({ error: `ClickHouse query failed: ${errorMessage}` }, { status: 500 })
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/rename-table/route.ts b/apps/sim/app/api/tools/clickhouse/rename-table/route.ts
new file mode 100644
index 00000000000..eec1f7ec436
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/rename-table/route.ts
@@ -0,0 +1,43 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseRenameTableContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseRenameTable } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseRenameTableAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse rename table attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseRenameTableContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ await executeClickHouseRenameTable(params, params.table, params.newTable)
+
+ return NextResponse.json({
+ message: `Renamed table '${params.table}' to '${params.newTable}'.`,
+ rows: [],
+ rowCount: 0,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse rename table failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse rename table failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/show-create-table/route.ts b/apps/sim/app/api/tools/clickhouse/show-create-table/route.ts
new file mode 100644
index 00000000000..8c93d402803
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/show-create-table/route.ts
@@ -0,0 +1,42 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseShowCreateTableContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseShowCreateTable } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseShowCreateTableAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse show create table attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseShowCreateTableContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ const ddl = await executeClickHouseShowCreateTable(params, params.table)
+
+ return NextResponse.json({
+ message: 'Retrieved CREATE statement.',
+ ddl,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse show create table failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse show create table failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/table-stats/route.ts b/apps/sim/app/api/tools/clickhouse/table-stats/route.ts
new file mode 100644
index 00000000000..405fbaf06cc
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/table-stats/route.ts
@@ -0,0 +1,43 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseTableStatsContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseTableStats } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseTableStatsAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse table stats attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseTableStatsContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ const result = await executeClickHouseTableStats(params, params.table)
+
+ return NextResponse.json({
+ message: `Retrieved stats for ${result.rowCount} table(s).`,
+ rows: result.rows,
+ rowCount: result.rowCount,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse table stats failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse table stats failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/truncate-table/route.ts b/apps/sim/app/api/tools/clickhouse/truncate-table/route.ts
new file mode 100644
index 00000000000..27452eb9849
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/truncate-table/route.ts
@@ -0,0 +1,43 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseTruncateTableContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseTruncateTable } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseTruncateTableAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse truncate table attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseTruncateTableContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ await executeClickHouseTruncateTable(params, params.table)
+
+ return NextResponse.json({
+ message: `Table '${params.table}' truncated.`,
+ rows: [],
+ rowCount: 0,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse truncate table failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse truncate table failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/update/route.ts b/apps/sim/app/api/tools/clickhouse/update/route.ts
new file mode 100644
index 00000000000..9d43755da4c
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/update/route.ts
@@ -0,0 +1,49 @@
+import { createLogger } from '@sim/logger'
+import { getErrorMessage } from '@sim/utils/errors'
+import { generateId } from '@sim/utils/id'
+import { type NextRequest, NextResponse } from 'next/server'
+import { clickhouseUpdateContract } from '@/lib/api/contracts/tools/databases/clickhouse'
+import { parseToolRequest } from '@/lib/api/server'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { executeClickHouseUpdate } from '@/app/api/tools/clickhouse/utils'
+
+const logger = createLogger('ClickHouseUpdateAPI')
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateId().slice(0, 8)
+
+ try {
+ const auth = await checkInternalAuth(request)
+ if (!auth.success || !auth.userId) {
+ logger.warn(`[${requestId}] Unauthorized ClickHouse update attempt`)
+ return NextResponse.json({ error: auth.error || 'Unauthorized' }, { status: 401 })
+ }
+
+ const parsed = await parseToolRequest(clickhouseUpdateContract, request, { logger })
+ if (!parsed.success) return parsed.response
+ const params = parsed.data.body
+
+ logger.info(
+ `[${requestId}] Updating data in ${params.table} on ${params.host}:${params.port}/${params.database}`
+ )
+
+ const result = await executeClickHouseUpdate(params, params.table, params.data, params.where)
+
+ logger.info(`[${requestId}] Update mutation submitted, ${result.rowCount} row(s) written`)
+
+ return NextResponse.json({
+ message: `Update mutation submitted. ClickHouse mutations run asynchronously. ${result.rowCount} row(s) written.`,
+ rows: result.rows,
+ rowCount: result.rowCount,
+ })
+ } catch (error) {
+ const errorMessage = getErrorMessage(error, 'Unknown error occurred')
+ logger.error(`[${requestId}] ClickHouse update failed:`, error)
+
+ return NextResponse.json(
+ { error: `ClickHouse update failed: ${errorMessage}` },
+ { status: 500 }
+ )
+ }
+})
diff --git a/apps/sim/app/api/tools/clickhouse/utils.ts b/apps/sim/app/api/tools/clickhouse/utils.ts
new file mode 100644
index 00000000000..ce4ad3afcbd
--- /dev/null
+++ b/apps/sim/app/api/tools/clickhouse/utils.ts
@@ -0,0 +1,852 @@
+import {
+ validateDatabaseHost,
+ validateSqlWhereClause,
+} from '@/lib/core/security/input-validation.server'
+import type { ClickHouseConnectionConfig } from '@/tools/clickhouse/types'
+
+const REQUEST_TIMEOUT_MS = 30_000
+
+interface ClickHouseSummary {
+ read_rows?: string
+ written_rows?: string
+ result_rows?: string
+}
+
+interface ClickHouseHttpResult {
+ text: string
+ summary: ClickHouseSummary | null
+}
+
+export interface ClickHouseRowsResult {
+ rows: unknown[]
+ rowCount: number
+}
+
+interface ClickHouseColumnRow {
+ table: string
+ name: string
+ type: string
+ default_kind?: string
+ default_expression?: string
+ is_in_primary_key?: number | string
+ is_in_sorting_key?: number | string
+ position?: number | string
+}
+
+interface ClickHouseTableRow {
+ name: string
+ engine?: string
+ total_rows?: number | string | null
+}
+
+export interface ClickHouseIntrospectionResult {
+ tables: Array<{
+ name: string
+ database: string
+ engine: string
+ totalRows?: number
+ columns: Array<{
+ name: string
+ type: string
+ defaultKind?: string
+ defaultExpression?: string
+ isInPrimaryKey: boolean
+ isInSortingKey: boolean
+ }>
+ }>
+}
+
+/**
+ * Sends a single statement to the ClickHouse HTTP interface and returns the raw
+ * response body alongside the parsed `X-ClickHouse-Summary` header.
+ * @see https://clickhouse.com/docs/interfaces/http
+ */
+async function clickhouseRequest(
+ config: ClickHouseConnectionConfig,
+ statement: string,
+ options: { readOnly?: boolean } = {}
+): Promise {
+ const hostValidation = await validateDatabaseHost(config.host, 'host')
+ if (!hostValidation.isValid) {
+ throw new Error(hostValidation.error)
+ }
+
+ const protocol = config.secure ? 'https' : 'http'
+ const url = new URL(`${protocol}://${config.host}:${config.port}/`)
+ url.searchParams.set('database', config.database)
+ if (options.readOnly) {
+ // Server-enforced read-only: ClickHouse rejects any write/DDL and forbids the
+ // query from re-enabling writes via `SET readonly=0`. This is the real boundary
+ // for the query operation; the SQL-shape checks below are defense-in-depth.
+ url.searchParams.set('readonly', '1')
+ }
+
+ const controller = new AbortController()
+ const timeout = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS)
+
+ let response: Response
+ try {
+ response = await fetch(url.toString(), {
+ method: 'POST',
+ headers: {
+ 'X-ClickHouse-User': config.username,
+ 'X-ClickHouse-Key': config.password,
+ 'Content-Type': 'text/plain; charset=utf-8',
+ },
+ body: statement,
+ signal: controller.signal,
+ })
+ } finally {
+ clearTimeout(timeout)
+ }
+
+ const text = await response.text()
+
+ if (!response.ok) {
+ throw new Error(text.trim() || `ClickHouse request failed with status ${response.status}`)
+ }
+
+ return { text, summary: parseSummary(response.headers.get('x-clickhouse-summary')) }
+}
+
+function parseSummary(header: string | null): ClickHouseSummary | null {
+ if (!header) return null
+ try {
+ return JSON.parse(header) as ClickHouseSummary
+ } catch {
+ return null
+ }
+}
+
+/**
+ * Parses a ClickHouse `FORMAT JSON` response body into rows, falling back to the
+ * summary header's row counts for statements that do not return a result set.
+ */
+function parseRowsResult(result: ClickHouseHttpResult): ClickHouseRowsResult {
+ const trimmed = result.text.trim()
+ if (trimmed) {
+ try {
+ const parsed = JSON.parse(trimmed) as { data?: unknown[]; rows?: number }
+ if (parsed && Array.isArray(parsed.data)) {
+ const rowCount = typeof parsed.rows === 'number' ? parsed.rows : parsed.data.length
+ return { rows: parsed.data, rowCount }
+ }
+ } catch {
+ // Body was not JSON (e.g. a non-SELECT statement); fall through to summary.
+ }
+ }
+
+ const written = Number(result.summary?.written_rows ?? 0)
+ const read = Number(result.summary?.read_rows ?? 0)
+ return { rows: [], rowCount: written || read || 0 }
+}
+
+/** Read-only statement leaders that return a result set and never mutate data. */
+const READ_ONLY_STATEMENT = /^(select|with|show|describe|desc|explain|exists)\b/i
+
+/**
+ * Normalizes the output format of a read statement to JSON so the HTTP response
+ * can always be parsed into rows. Strips every `FORMAT ` clause — wherever
+ * it sits relative to a trailing `SETTINGS` clause — and appends a single canonical
+ * `FORMAT JSON`. The `format()` function and `FORMAT`/format names appearing inside
+ * strings or comments are ignored (the scan runs on comment/string-masked SQL).
+ * Non-read statements are returned untouched (their own FORMAT, e.g. JSONEachRow
+ * for inserts, is preserved).
+ */
+function ensureJsonFormat(query: string): string {
+ const trimmed = query.trim().replace(/;+\s*$/, '')
+ if (!READ_ONLY_STATEMENT.test(trimmed)) {
+ return trimmed
+ }
+ const masked = maskSqlNoise(trimmed)
+ const formatClause = /\bformat\s+[a-z0-9_]+\b/gi
+ const spans: Array<[number, number]> = []
+ for (let match = formatClause.exec(masked); match !== null; match = formatClause.exec(masked)) {
+ spans.push([match.index, match.index + match[0].length])
+ }
+ let result = trimmed
+ for (let i = spans.length - 1; i >= 0; i--) {
+ result = result.slice(0, spans[i][0]) + result.slice(spans[i][1])
+ }
+ return `${result.replace(/\s+$/, '')}\nFORMAT JSON`
+}
+
+/**
+ * Replaces string literals ('...'), quoted identifiers ("..." / `...`), and SQL
+ * comments (`-- …` and `/* … */`) with spaces so that structural scans (e.g. for
+ * statement-chaining semicolons) only see actual SQL code, not data or comments.
+ */
+function maskSqlNoise(sql: string): string {
+ let out = ''
+ let i = 0
+ while (i < sql.length) {
+ const ch = sql[i]
+ if (ch === "'" || ch === '"' || ch === '`') {
+ out += ' '
+ i++
+ while (i < sql.length && sql[i] !== ch) {
+ if (ch !== '`' && sql[i] === '\\') {
+ out += ' '
+ i += 2
+ continue
+ }
+ out += ' '
+ i++
+ }
+ if (i < sql.length) {
+ out += ' '
+ i++
+ }
+ continue
+ }
+ if (ch === '-' && sql[i + 1] === '-') {
+ const newline = sql.indexOf('\n', i + 2)
+ const end = newline === -1 ? sql.length : newline
+ out += ' '.repeat(end - i)
+ i = end
+ continue
+ }
+ if (ch === '/' && sql[i + 1] === '*') {
+ const close = sql.indexOf('*/', i + 2)
+ const end = close === -1 ? sql.length : close + 2
+ out += ' '.repeat(end - i)
+ i = end
+ continue
+ }
+ out += ch
+ i++
+ }
+ return out
+}
+
+/**
+ * Detects whether a statement chains a second statement after a `;`, ignoring
+ * semicolons inside string literals, quoted identifiers, and comments. A trailing
+ * semicolon (with only whitespace/comments after it) is allowed.
+ */
+function hasChainedStatement(sql: string): boolean {
+ return /;\s*\S/.test(maskSqlNoise(sql))
+}
+
+/**
+ * Write/DDL statement shapes that must never run under the read-only query
+ * operation, even when wrapped by a leading `WITH` CTE (e.g. `WITH … INSERT INTO …`).
+ * Patterns require the keyword's statement context (e.g. `insert into`, `alter table`)
+ * so SQL functions/columns like `truncate(x)` or `created_at` are not false-positives.
+ */
+const MUTATING_STATEMENT = [
+ /\binsert\s+into\b/i,
+ /\bdelete\s+from\b/i,
+ /\bupdate\s+[\w.`"]+\s+set\b/i,
+ /\balter\s+table\b/i,
+ /\b(?:create|attach)\s+(?:or\s+replace\s+)?(?:temporary\s+)?(?:table|database|dictionary|view|materialized\s+view|live\s+view|function|user|role)\b/i,
+ /\bdrop\s+(?:table|database|dictionary|view|column|partition|index|function|user|role)\b/i,
+ /\btruncate\s+table\b/i,
+ /\brename\s+(?:table|database|dictionary)\b/i,
+ /\bdetach\s+(?:table|database|dictionary|view|permanently)\b/i,
+ /\b(?:grant|revoke)\b/i,
+ /\boptimize\s+table\b/i,
+]
+
+/** Whether a statement performs a write/DDL anywhere (comments and strings masked out). */
+function isMutatingStatement(sql: string): boolean {
+ const masked = maskSqlNoise(sql)
+ return MUTATING_STATEMENT.some((pattern) => pattern.test(masked))
+}
+
+/**
+ * Strips leading whitespace, `--`/`/* … */` comments, and opening parens from a
+ * statement so the read-only leader keyword can be detected even when a query
+ * starts with a comment (e.g. `-- note\nSELECT …`) or wrapping parens.
+ */
+function stripLeadingNoise(sql: string): string {
+ let s = sql.trim()
+ for (;;) {
+ if (s.startsWith('--')) {
+ const newline = s.indexOf('\n')
+ s = (newline === -1 ? '' : s.slice(newline + 1)).trim()
+ } else if (s.startsWith('/*')) {
+ const close = s.indexOf('*/')
+ s = (close === -1 ? '' : s.slice(close + 2)).trim()
+ } else if (s.startsWith('(')) {
+ s = s.slice(1).trim()
+ } else {
+ return s
+ }
+ }
+}
+
+export async function executeClickHouseQuery(
+ config: ClickHouseConnectionConfig,
+ query: string,
+ options: { enforceReadOnly?: boolean } = {}
+): Promise {
+ if (options.enforceReadOnly) {
+ // Strip leading comments/parens so wrapped or commented selects still validate.
+ const leader = stripLeadingNoise(query)
+ if (!READ_ONLY_STATEMENT.test(leader)) {
+ throw new Error(
+ 'The query operation only allows read-only statements (SELECT, WITH, SHOW, DESCRIBE, EXPLAIN, EXISTS). Use the Execute Raw SQL operation to run writes or DDL.'
+ )
+ }
+ if (hasChainedStatement(query)) {
+ throw new Error(
+ 'The query operation only allows a single statement; chained statements separated by ";" are not allowed. Use the Execute Raw SQL operation to run multiple statements.'
+ )
+ }
+ if (isMutatingStatement(query)) {
+ throw new Error(
+ 'The query operation only allows read-only statements; a write or DDL statement (e.g. INSERT/ALTER/DROP, including after a WITH clause) was detected. Use the Execute Raw SQL operation instead.'
+ )
+ }
+ }
+ const result = await clickhouseRequest(config, ensureJsonFormat(query), {
+ readOnly: options.enforceReadOnly,
+ })
+ return parseRowsResult(result)
+}
+
+export async function executeClickHouseInsert(
+ config: ClickHouseConnectionConfig,
+ table: string,
+ data: Record
+): Promise {
+ const sanitizedTable = sanitizeIdentifier(table)
+ const statement = `INSERT INTO ${sanitizedTable} FORMAT JSONEachRow\n${JSON.stringify(data)}`
+ const result = await clickhouseRequest(config, statement)
+ const written = Number(result.summary?.written_rows ?? 0)
+ return { rows: [], rowCount: written || 1 }
+}
+
+export async function executeClickHouseUpdate(
+ config: ClickHouseConnectionConfig,
+ table: string,
+ data: Record,
+ where: string
+): Promise {
+ validateWhereClause(where)
+ const sanitizedTable = sanitizeIdentifier(table)
+ const assignments = Object.entries(data)
+ .map(([column, value]) => `${sanitizeIdentifier(column)} = ${formatValue(value)}`)
+ .join(', ')
+
+ if (!assignments) {
+ throw new Error('Update data object cannot be empty')
+ }
+
+ const statement = `ALTER TABLE ${sanitizedTable} UPDATE ${assignments} WHERE ${where}`
+ const result = await clickhouseRequest(config, statement)
+ return { rows: [], rowCount: Number(result.summary?.written_rows ?? 0) }
+}
+
+export async function executeClickHouseDelete(
+ config: ClickHouseConnectionConfig,
+ table: string,
+ where: string
+): Promise {
+ validateWhereClause(where)
+ const sanitizedTable = sanitizeIdentifier(table)
+ const statement = `ALTER TABLE ${sanitizedTable} DELETE WHERE ${where}`
+ const result = await clickhouseRequest(config, statement)
+ return { rows: [], rowCount: Number(result.summary?.written_rows ?? 0) }
+}
+
+export async function executeClickHouseIntrospect(
+ config: ClickHouseConnectionConfig
+): Promise {
+ const database = quoteString(config.database)
+
+ const tablesResult = await clickhouseRequest(
+ config,
+ `SELECT name, engine, total_rows FROM system.tables WHERE database = ${database} ORDER BY name FORMAT JSON`
+ )
+ const tableRows = parseDataArray(tablesResult.text)
+
+ const columnsResult = await clickhouseRequest(
+ config,
+ `SELECT table, name, type, default_kind, default_expression, is_in_primary_key, is_in_sorting_key, position FROM system.columns WHERE database = ${database} ORDER BY table, position FORMAT JSON`
+ )
+ const columnRows = parseDataArray(columnsResult.text)
+
+ const columnsByTable = new Map<
+ string,
+ ClickHouseIntrospectionResult['tables'][number]['columns']
+ >()
+ for (const column of columnRows) {
+ const columns = columnsByTable.get(column.table) ?? []
+ columns.push({
+ name: column.name,
+ type: column.type,
+ defaultKind: column.default_kind || undefined,
+ defaultExpression: column.default_expression || undefined,
+ isInPrimaryKey: toBoolean(column.is_in_primary_key),
+ isInSortingKey: toBoolean(column.is_in_sorting_key),
+ })
+ columnsByTable.set(column.table, columns)
+ }
+
+ const tables = tableRows.map((table) => ({
+ name: table.name,
+ database: config.database,
+ engine: table.engine ?? '',
+ totalRows: table.total_rows != null ? Number(table.total_rows) : undefined,
+ columns: columnsByTable.get(table.name) ?? [],
+ }))
+
+ return { tables }
+}
+
+function parseDataArray(text: string): T[] {
+ const trimmed = text.trim()
+ if (!trimmed) return []
+ try {
+ const parsed = JSON.parse(trimmed) as { data?: T[] }
+ return Array.isArray(parsed.data) ? parsed.data : []
+ } catch {
+ return []
+ }
+}
+
+function toBoolean(value: number | string | undefined): boolean {
+ return value === 1 || value === '1'
+}
+
+/**
+ * Quotes and escapes a value for inline use in a ClickHouse statement.
+ * Strings use ClickHouse's backslash escaping for single quotes and backslashes.
+ */
+function formatValue(value: unknown): string {
+ if (value === null || value === undefined) {
+ return 'NULL'
+ }
+ if (typeof value === 'number') {
+ return Number.isFinite(value) ? String(value) : 'NULL'
+ }
+ if (typeof value === 'boolean') {
+ return value ? '1' : '0'
+ }
+ if (typeof value === 'object') {
+ return quoteString(JSON.stringify(value))
+ }
+ return quoteString(String(value))
+}
+
+function quoteString(value: string): string {
+ return `'${value.replace(/\\/g, '\\\\').replace(/'/g, "\\'")}'`
+}
+
+/**
+ * Validates and backtick-quotes a ClickHouse identifier, supporting
+ * `database.table` qualified names.
+ */
+export function sanitizeIdentifier(identifier: string): string {
+ if (identifier.includes('.')) {
+ return identifier
+ .split('.')
+ .map((part) => sanitizeSingleIdentifier(part))
+ .join('.')
+ }
+ return sanitizeSingleIdentifier(identifier)
+}
+
+function sanitizeSingleIdentifier(identifier: string): string {
+ const cleaned = identifier.replace(/`/g, '')
+ if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(cleaned)) {
+ throw new Error(
+ `Invalid identifier: ${identifier}. Identifiers must start with a letter or underscore and contain only letters, numbers, and underscores.`
+ )
+ }
+ return `\`${cleaned}\``
+}
+
+/**
+ * Rejects WHERE clauses containing SQL-injection or always-true tautology
+ * patterns so user-supplied conditions cannot broaden a mutation to every row.
+ * Delegates to the shared {@link validateSqlWhereClause} guard (defense-in-depth).
+ */
+function validateWhereClause(where: string): void {
+ const result = validateSqlWhereClause(where, 'WHERE clause')
+ if (!result.isValid) {
+ throw new Error(result.error)
+ }
+}
+
+/**
+ * Runs a SELECT statement (which must already include `FORMAT JSON`) and returns
+ * the parsed rows and row count.
+ */
+async function runSelect(
+ config: ClickHouseConnectionConfig,
+ statement: string
+): Promise {
+ const result = await clickhouseRequest(config, statement)
+ return parseRowsResult(result)
+}
+
+/**
+ * Runs a statement that does not return a result set (DDL or mutation) and
+ * returns the number of written rows reported by the summary header.
+ */
+async function runStatement(
+ config: ClickHouseConnectionConfig,
+ statement: string
+): Promise {
+ const result = await clickhouseRequest(config, statement)
+ return Number(result.summary?.written_rows ?? 0)
+}
+
+/**
+ * Validates a free-form SQL expression (ORDER BY, PARTITION BY, engine args)
+ * rejecting statement terminators and comment sequences.
+ */
+function validateExpression(expression: string, label: string): void {
+ if (/;|--|\/\*|\*\//.test(expression)) {
+ throw new Error(`${label} contains a disallowed character`)
+ }
+}
+
+/**
+ * Validates an ORDER BY / PARTITION BY expression that is spliced inside wrapping
+ * parentheses in the generated DDL. In addition to rejecting terminators/comments,
+ * it requires balanced parentheses (quote-aware) so the expression cannot close
+ * the wrapping `(...)` early and append extra clauses (e.g. `id) SETTINGS …`).
+ */
+function validateClauseExpression(expression: string, label: string): void {
+ const trimmed = expression.trim()
+ if (!trimmed) {
+ throw new Error(`${label} is required`)
+ }
+ if (/;|--|\/\*|\*\//.test(trimmed)) {
+ throw new Error(`${label} contains a disallowed sequence`)
+ }
+ let depth = 0
+ let inString = false
+ for (let i = 0; i < trimmed.length; i++) {
+ const ch = trimmed[i]
+ if (inString) {
+ if (ch === '\\') i++
+ else if (ch === "'") inString = false
+ continue
+ }
+ if (ch === "'") inString = true
+ else if (ch === '(') depth++
+ else if (ch === ')') {
+ depth--
+ if (depth < 0) {
+ throw new Error(`${label} has unbalanced parentheses`)
+ }
+ }
+ }
+ if (inString || depth !== 0) {
+ throw new Error(`${label} has unbalanced parentheses or quotes`)
+ }
+}
+
+/**
+ * Validates a partition value for `DROP PARTITION`. ClickHouse partition values
+ * are literals (signed numbers or single-quoted strings) or a parenthesised tuple
+ * of such literals, so anything else is rejected — barewords like `ALL`, function
+ * calls, operators, and extra tokens that could broaden the statement beyond
+ * dropping a single partition.
+ */
+function validatePartitionExpression(partition: string): void {
+ const partitionPattern =
+ /^\(?\s*(?:'(?:[^'\\]|\\.)*'|-?\d+(?:\.\d+)?)(?:\s*,\s*(?:'(?:[^'\\]|\\.)*'|-?\d+(?:\.\d+)?))*\s*\)?$/
+ if (!partitionPattern.test(partition.trim())) {
+ throw new Error(
+ "Partition must be a literal value or a tuple of literals (number or single-quoted string), e.g. 202401, '2024-01', or (2024, 'EU')"
+ )
+ }
+}
+
+export function executeClickHouseListDatabases(
+ config: ClickHouseConnectionConfig
+): Promise {
+ return runSelect(
+ config,
+ 'SELECT name, engine, comment FROM system.databases ORDER BY name FORMAT JSON'
+ )
+}
+
+export function executeClickHouseListTables(
+ config: ClickHouseConnectionConfig
+): Promise {
+ return runSelect(
+ config,
+ `SELECT name, engine, total_rows AS totalRows, total_bytes AS totalBytes, comment FROM system.tables WHERE database = ${quoteString(config.database)} ORDER BY name FORMAT JSON`
+ )
+}
+
+export function executeClickHouseDescribeTable(
+ config: ClickHouseConnectionConfig,
+ table: string
+): Promise {
+ const tableName = stripDatabasePrefix(table)
+ return runSelect(
+ config,
+ `SELECT name, type, default_kind AS defaultKind, default_expression AS defaultExpression, comment, is_in_primary_key AS isInPrimaryKey, is_in_sorting_key AS isInSortingKey FROM system.columns WHERE database = ${quoteString(config.database)} AND table = ${quoteString(tableName)} ORDER BY position FORMAT JSON`
+ )
+}
+
+export async function executeClickHouseShowCreateTable(
+ config: ClickHouseConnectionConfig,
+ table: string
+): Promise {
+ const result = await runSelect(
+ config,
+ `SHOW CREATE TABLE ${sanitizeIdentifier(table)} FORMAT JSON`
+ )
+ const firstRow = result.rows[0] as Record | undefined
+ if (!firstRow) {
+ return ''
+ }
+ // ClickHouse returns the DDL in a single String column (named `statement`);
+ // fall back to the first column value to stay robust to column-name changes.
+ const value = firstRow.statement ?? Object.values(firstRow)[0]
+ return typeof value === 'string' ? value : ''
+}
+
+export async function executeClickHouseCountRows(
+ config: ClickHouseConnectionConfig,
+ table: string,
+ where?: string
+): Promise {
+ let statement = `SELECT count() AS count FROM ${sanitizeIdentifier(table)}`
+ if (where?.trim()) {
+ validateWhereClause(where)
+ statement += ` WHERE ${where}`
+ }
+ const result = await runSelect(config, `${statement} FORMAT JSON`)
+ const firstRow = result.rows[0] as { count?: number | string } | undefined
+ return firstRow?.count != null ? Number(firstRow.count) : 0
+}
+
+export function executeClickHouseListPartitions(
+ config: ClickHouseConnectionConfig,
+ table: string
+): Promise {
+ const tableName = stripDatabasePrefix(table)
+ return runSelect(
+ config,
+ `SELECT partition, count() AS parts, sum(rows) AS rows, sum(bytes_on_disk) AS bytesOnDisk FROM system.parts WHERE database = ${quoteString(config.database)} AND table = ${quoteString(tableName)} AND active GROUP BY partition ORDER BY partition FORMAT JSON`
+ )
+}
+
+export function executeClickHouseListMutations(
+ config: ClickHouseConnectionConfig,
+ table?: string,
+ onlyRunning = false
+): Promise {
+ const filters = [`database = ${quoteString(config.database)}`]
+ if (table?.trim()) {
+ filters.push(`table = ${quoteString(stripDatabasePrefix(table))}`)
+ }
+ if (onlyRunning) {
+ filters.push('is_done = 0')
+ }
+ return runSelect(
+ config,
+ `SELECT table, mutation_id AS mutationId, command, create_time AS createTime, is_done AS isDone, parts_to_do AS partsToDo, latest_fail_reason AS latestFailReason FROM system.mutations WHERE ${filters.join(' AND ')} ORDER BY create_time DESC FORMAT JSON`
+ )
+}
+
+export function executeClickHouseListRunningQueries(
+ config: ClickHouseConnectionConfig
+): Promise {
+ return runSelect(
+ config,
+ 'SELECT query_id AS queryId, user, toFloat64(elapsed) AS elapsedSeconds, formatReadableSize(memory_usage) AS memoryUsage, query FROM system.processes ORDER BY elapsed DESC FORMAT JSON'
+ )
+}
+
+export function executeClickHouseTableStats(
+ config: ClickHouseConnectionConfig,
+ table?: string
+): Promise {
+ const filters = ['active', `database = ${quoteString(config.database)}`]
+ if (table?.trim()) {
+ filters.push(`table = ${quoteString(stripDatabasePrefix(table))}`)
+ }
+ return runSelect(
+ config,
+ `SELECT database, table, sum(rows) AS rows, sum(bytes_on_disk) AS bytesOnDisk, formatReadableSize(sum(bytes_on_disk)) AS sizeOnDisk, count() AS parts FROM system.parts WHERE ${filters.join(' AND ')} GROUP BY database, table ORDER BY sum(bytes_on_disk) DESC FORMAT JSON`
+ )
+}
+
+export function executeClickHouseListClusters(
+ config: ClickHouseConnectionConfig
+): Promise {
+ return runSelect(
+ config,
+ 'SELECT cluster, shard_num AS shardNum, replica_num AS replicaNum, host_name AS hostName, port, is_local AS isLocal FROM system.clusters ORDER BY cluster, shard_num, replica_num FORMAT JSON'
+ )
+}
+
+export async function executeClickHouseCreateDatabase(
+ config: ClickHouseConnectionConfig,
+ name: string
+): Promise {
+ await clickhouseRequest(config, `CREATE DATABASE IF NOT EXISTS ${sanitizeIdentifier(name)}`)
+}
+
+export async function executeClickHouseDropDatabase(
+ config: ClickHouseConnectionConfig,
+ name: string
+): Promise {
+ await clickhouseRequest(config, `DROP DATABASE IF EXISTS ${sanitizeIdentifier(name)}`)
+}
+
+/**
+ * Validates a single ClickHouse column type. Types may legitimately contain
+ * commas, single-quoted strings, `=`, and `-` inside their parameter parentheses
+ * (e.g. `Decimal(10, 2)`, `Enum8('a' = 1, 'b' = -2)`, `Map(String, UInt64)`,
+ * `Array(Tuple(a UInt8, b String))`). We allow those but reject anything that
+ * could break out of the single type literal and inject another column or SQL:
+ * comment/terminator sequences, a top-level (unparenthesised) comma, or an
+ * unbalanced closing paren.
+ */
+function validateColumnType(type: string): void {
+ const trimmed = type.trim()
+ if (!trimmed || !/^[A-Za-z_]/.test(trimmed)) {
+ throw new Error(`Invalid column type: ${type}`)
+ }
+ if (!/^[A-Za-z0-9_(),.\s'"=-]+$/.test(trimmed) || /--|;/.test(trimmed)) {
+ throw new Error(`Invalid column type: ${type}`)
+ }
+ let depth = 0
+ let inString = false
+ for (let i = 0; i < trimmed.length; i++) {
+ const ch = trimmed[i]
+ if (inString) {
+ if (ch === '\\') i++
+ else if (ch === "'") inString = false
+ continue
+ }
+ if (ch === "'") inString = true
+ else if (ch === '(') depth++
+ else if (ch === ')') {
+ depth--
+ if (depth < 0) throw new Error(`Invalid column type: ${type}`)
+ } else if (ch === ',' && depth === 0) {
+ throw new Error(`Invalid column type: ${type}`)
+ }
+ }
+ if (inString || depth !== 0) {
+ throw new Error(`Invalid column type: ${type}`)
+ }
+}
+
+export async function executeClickHouseCreateTable(
+ config: ClickHouseConnectionConfig,
+ table: string,
+ columns: Array<{ name: string; type: string }>,
+ engine: string,
+ orderBy: string,
+ partitionBy?: string
+): Promise {
+ if (!Array.isArray(columns) || columns.length === 0) {
+ throw new Error('At least one column definition is required')
+ }
+
+ const columnDefs = columns.map((column) => {
+ if (!column?.name || !column?.type) {
+ throw new Error('Each column requires a name and type')
+ }
+ validateColumnType(column.type)
+ return `${sanitizeIdentifier(column.name)} ${column.type.trim()}`
+ })
+
+ if (!/^[A-Za-z][A-Za-z0-9]*(\(.*\))?$/.test(engine.trim())) {
+ throw new Error(`Invalid table engine: ${engine}`)
+ }
+ validateExpression(engine, 'Engine')
+
+ if (!orderBy?.trim()) {
+ throw new Error('ORDER BY expression is required')
+ }
+ validateClauseExpression(orderBy, 'ORDER BY')
+
+ let statement = `CREATE TABLE IF NOT EXISTS ${sanitizeIdentifier(table)} (${columnDefs.join(', ')}) ENGINE = ${engine.trim()}`
+ if (partitionBy?.trim()) {
+ validateClauseExpression(partitionBy, 'PARTITION BY')
+ statement += ` PARTITION BY (${partitionBy.trim()})`
+ }
+ statement += ` ORDER BY (${orderBy.trim()})`
+
+ await clickhouseRequest(config, statement)
+}
+
+export async function executeClickHouseDropTable(
+ config: ClickHouseConnectionConfig,
+ table: string
+): Promise {
+ await clickhouseRequest(config, `DROP TABLE IF EXISTS ${sanitizeIdentifier(table)}`)
+}
+
+export async function executeClickHouseTruncateTable(
+ config: ClickHouseConnectionConfig,
+ table: string
+): Promise {
+ await clickhouseRequest(config, `TRUNCATE TABLE IF EXISTS ${sanitizeIdentifier(table)}`)
+}
+
+export async function executeClickHouseRenameTable(
+ config: ClickHouseConnectionConfig,
+ fromTable: string,
+ toTable: string
+): Promise {
+ await clickhouseRequest(
+ config,
+ `RENAME TABLE ${sanitizeIdentifier(fromTable)} TO ${sanitizeIdentifier(toTable)}`
+ )
+}
+
+export async function executeClickHouseOptimizeTable(
+ config: ClickHouseConnectionConfig,
+ table: string,
+ final: boolean
+): Promise {
+ await clickhouseRequest(
+ config,
+ `OPTIMIZE TABLE ${sanitizeIdentifier(table)}${final ? ' FINAL' : ''}`
+ )
+}
+
+export async function executeClickHouseDropPartition(
+ config: ClickHouseConnectionConfig,
+ table: string,
+ partition: string
+): Promise {
+ validatePartitionExpression(partition)
+ await clickhouseRequest(
+ config,
+ `ALTER TABLE ${sanitizeIdentifier(table)} DROP PARTITION ${partition.trim()}`
+ )
+}
+
+export function executeClickHouseKillQuery(
+ config: ClickHouseConnectionConfig,
+ queryId: string
+): Promise {
+ return runSelect(config, `KILL QUERY WHERE query_id = ${quoteString(queryId)} SYNC FORMAT JSON`)
+}
+
+export async function executeClickHouseInsertRows(
+ config: ClickHouseConnectionConfig,
+ table: string,
+ rows: Array>
+): Promise {
+ if (!Array.isArray(rows) || rows.length === 0) {
+ throw new Error('At least one row is required')
+ }
+ const sanitizedTable = sanitizeIdentifier(table)
+ const payload = rows.map((row) => JSON.stringify(row)).join('\n')
+ const statement = `INSERT INTO ${sanitizedTable} FORMAT JSONEachRow\n${payload}`
+ const written = await runStatement(config, statement)
+ return { rows: [], rowCount: written || rows.length }
+}
+
+function stripDatabasePrefix(table: string): string {
+ const parts = table.split('.')
+ return parts[parts.length - 1].replace(/`/g, '')
+}
diff --git a/apps/sim/app/api/tools/image/route.ts b/apps/sim/app/api/tools/image/route.ts
index 6476b53f5c9..e6560c71f64 100644
--- a/apps/sim/app/api/tools/image/route.ts
+++ b/apps/sim/app/api/tools/image/route.ts
@@ -39,7 +39,12 @@ const MAX_IMAGE_BYTES = 25 * 1024 * 1024
const MAX_IMAGE_JSON_BYTES = Math.ceil((MAX_IMAGE_BYTES * 4) / 3) + 256 * 1024
export const dynamic = 'force-dynamic'
-export const maxDuration = 600
+/**
+ * Mirrors the maximum plan execution timeout (enterprise async, 90 minutes) used by
+ * `getMaxExecutionTimeout()` for the provider polling loop below. Next.js requires a
+ * static literal for `maxDuration`, so this value must be kept in sync with that source.
+ */
+export const maxDuration = 5400
type ImageProvider = (typeof imageProviders)[number]
diff --git a/apps/sim/app/api/tools/stt/route.ts b/apps/sim/app/api/tools/stt/route.ts
index a320bcce008..fd9a6dc12d7 100644
--- a/apps/sim/app/api/tools/stt/route.ts
+++ b/apps/sim/app/api/tools/stt/route.ts
@@ -7,7 +7,7 @@ import { sttToolContract } from '@/lib/api/contracts/tools/media/stt'
import { getValidationErrorMessage, parseRequest, validationErrorResponse } from '@/lib/api/server'
import { extractAudioFromVideo, isVideoFile } from '@/lib/audio/extractor'
import { checkInternalAuth } from '@/lib/auth/hybrid'
-import { DEFAULT_EXECUTION_TIMEOUT_MS } from '@/lib/core/execution-limits'
+import { getMaxExecutionTimeout } from '@/lib/core/execution-limits'
import {
secureFetchWithPinnedIP,
validateUrlWithDNS,
@@ -25,7 +25,12 @@ const logger = createLogger('SttProxyAPI')
const ELEVENLABS_STT_MODEL = 'scribe_v2'
export const dynamic = 'force-dynamic'
-export const maxDuration = 300 // 5 minutes for large files
+/**
+ * Mirrors the maximum plan execution timeout (enterprise async, 90 minutes) used by
+ * `getMaxExecutionTimeout()` for the transcript polling loop below. Next.js requires a
+ * static literal for `maxDuration`, so this value must be kept in sync with that source.
+ */
+export const maxDuration = 5400
export const POST = withRouteHandler(async (request: NextRequest) => {
const requestId = generateId()
@@ -629,7 +634,7 @@ async function transcribeWithAssemblyAI(
let transcript: any
let attempts = 0
const pollIntervalMs = 5000
- const maxAttempts = Math.ceil(DEFAULT_EXECUTION_TIMEOUT_MS / pollIntervalMs)
+ const maxAttempts = Math.ceil(getMaxExecutionTimeout() / pollIntervalMs)
while (attempts < maxAttempts) {
const statusResponse = await fetch(`https://api.assemblyai.com/v2/transcript/${id}`, {
diff --git a/apps/sim/app/api/tools/textract/parse/route.ts b/apps/sim/app/api/tools/textract/parse/route.ts
index 48e6f07899f..b93cbbed4d9 100644
--- a/apps/sim/app/api/tools/textract/parse/route.ts
+++ b/apps/sim/app/api/tools/textract/parse/route.ts
@@ -6,7 +6,7 @@ import { type NextRequest, NextResponse } from 'next/server'
import { textractParseContract } from '@/lib/api/contracts/tools/media/document-parse'
import { getValidationErrorMessage, parseRequest } from '@/lib/api/server'
import { checkInternalAuth } from '@/lib/auth/hybrid'
-import { DEFAULT_EXECUTION_TIMEOUT_MS } from '@/lib/core/execution-limits'
+import { getMaxExecutionTimeout } from '@/lib/core/execution-limits'
import { validateS3BucketName } from '@/lib/core/security/input-validation'
import {
secureFetchWithPinnedIP,
@@ -22,7 +22,12 @@ import {
import { assertToolFileAccess } from '@/app/api/files/authorization'
export const dynamic = 'force-dynamic'
-export const maxDuration = 300 // 5 minutes for large multi-page PDF processing
+/**
+ * Mirrors the maximum plan execution timeout (enterprise async, 90 minutes) used by
+ * `getMaxExecutionTimeout()` for the job polling loop below. Next.js requires a static
+ * literal for `maxDuration`, so this value must be kept in sync with that source.
+ */
+export const maxDuration = 5400
const logger = createLogger('TextractParseAPI')
@@ -184,7 +189,7 @@ async function pollForJobCompletion(
requestId: string
): Promise> {
const pollIntervalMs = 5000
- const maxPollTimeMs = DEFAULT_EXECUTION_TIMEOUT_MS
+ const maxPollTimeMs = getMaxExecutionTimeout()
const maxAttempts = Math.ceil(maxPollTimeMs / pollIntervalMs)
const getTarget = useAnalyzeDocument
diff --git a/apps/sim/app/api/tools/video/route.ts b/apps/sim/app/api/tools/video/route.ts
index 1110432a473..9980121ed4c 100644
--- a/apps/sim/app/api/tools/video/route.ts
+++ b/apps/sim/app/api/tools/video/route.ts
@@ -28,7 +28,12 @@ const MAX_VIDEO_REFERENCE_IMAGE_BYTES = 25 * 1024 * 1024
const MAX_VIDEO_JSON_BYTES = 2 * 1024 * 1024
export const dynamic = 'force-dynamic'
-export const maxDuration = 600 // 10 minutes for video generation
+/**
+ * Mirrors the maximum plan execution timeout (enterprise async, 90 minutes) used by
+ * `getMaxExecutionTimeout()` for the provider polling loops below. Next.js requires a
+ * static literal for `maxDuration`, so this value must be kept in sync with that source.
+ */
+export const maxDuration = 5400
async function readVideoResponseBuffer(response: Response, label: string): Promise {
return readResponseToBufferWithLimit(response, {
diff --git a/apps/sim/app/api/v1/tables/[tableId]/rows/route.ts b/apps/sim/app/api/v1/tables/[tableId]/rows/route.ts
index e736a859eaa..55cf776dc7b 100644
--- a/apps/sim/app/api/v1/tables/[tableId]/rows/route.ts
+++ b/apps/sim/app/api/v1/tables/[tableId]/rows/route.ts
@@ -1,8 +1,5 @@
-import { db } from '@sim/db'
-import { userTableRows } from '@sim/db/schema'
import { createLogger } from '@sim/logger'
import { toError } from '@sim/utils/errors'
-import { and, eq, sql } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import {
type V1BatchInsertTableRowsBody,
@@ -24,13 +21,13 @@ import {
deleteRowsByFilter,
deleteRowsByIds,
insertRow,
- USER_TABLE_ROWS_SQL_NAME,
updateRowsByFilter,
validateBatchRows,
validateRowData,
validateRowSize,
} from '@/lib/table'
-import { buildFilterClause, buildSortClause, TableQueryValidationError } from '@/lib/table/sql'
+import { queryRows } from '@/lib/table/service'
+import { TableQueryValidationError } from '@/lib/table/sql'
import { accessError, checkAccess } from '@/app/api/table/utils'
import {
checkRateLimit,
@@ -153,92 +150,33 @@ export const GET = withRouteHandler(async (request: NextRequest, context: TableR
return NextResponse.json({ error: 'Invalid workspace ID' }, { status: 400 })
}
- const baseConditions = [
- eq(userTableRows.tableId, tableId),
- eq(userTableRows.workspaceId, validated.workspaceId),
- ]
-
- const schema = table.schema as TableSchema
-
- if (validated.filter) {
- const filterClause = buildFilterClause(
- validated.filter as Filter,
- USER_TABLE_ROWS_SQL_NAME,
- schema.columns
- )
- if (filterClause) {
- baseConditions.push(filterClause)
- }
- }
-
- let query = db
- .select({
- id: userTableRows.id,
- data: userTableRows.data,
- position: userTableRows.position,
- createdAt: userTableRows.createdAt,
- updatedAt: userTableRows.updatedAt,
- })
- .from(userTableRows)
- .where(and(...baseConditions))
-
- if (validated.sort) {
- const sortClause = buildSortClause(validated.sort, USER_TABLE_ROWS_SQL_NAME, schema.columns)
- if (sortClause) {
- query = query.orderBy(sortClause) as typeof query
- } else {
- query = query.orderBy(userTableRows.position) as typeof query
- }
- } else {
- query = query.orderBy(userTableRows.position) as typeof query
- }
-
- const rowsPromise = query.limit(validated.limit).offset(validated.offset)
-
- let totalCount: number | null = null
- if (validated.includeTotal) {
- const countQuery = db
- .select({ count: sql`count(*)` })
- .from(userTableRows)
- .where(and(...baseConditions))
- const [countResult, rows] = await Promise.all([countQuery, rowsPromise])
- totalCount = Number(countResult[0].count)
- return NextResponse.json({
- success: true,
- data: {
- rows: rows.map((r) => ({
- id: r.id,
- data: r.data,
- position: r.position,
- createdAt:
- r.createdAt instanceof Date ? r.createdAt.toISOString() : String(r.createdAt),
- updatedAt:
- r.updatedAt instanceof Date ? r.updatedAt.toISOString() : String(r.updatedAt),
- })),
- rowCount: rows.length,
- totalCount,
- limit: validated.limit,
- offset: validated.offset,
- },
- })
- }
-
- const rows = await rowsPromise
+ const result = await queryRows(
+ table,
+ {
+ filter: validated.filter as Filter | undefined,
+ sort: validated.sort,
+ limit: validated.limit,
+ offset: validated.offset,
+ includeTotal: validated.includeTotal,
+ withExecutions: false,
+ },
+ requestId
+ )
return NextResponse.json({
success: true,
data: {
- rows: rows.map((r) => ({
+ rows: result.rows.map((r) => ({
id: r.id,
data: r.data,
position: r.position,
createdAt: r.createdAt instanceof Date ? r.createdAt.toISOString() : String(r.createdAt),
updatedAt: r.updatedAt instanceof Date ? r.updatedAt.toISOString() : String(r.updatedAt),
})),
- rowCount: rows.length,
- totalCount,
- limit: validated.limit,
- offset: validated.offset,
+ rowCount: result.rowCount,
+ totalCount: result.totalCount,
+ limit: result.limit,
+ offset: result.offset,
},
})
} catch (error) {
diff --git a/apps/sim/app/chat/components/message/message.tsx b/apps/sim/app/chat/components/message/message.tsx
index 5bd6ac264bf..eb2f0e5c3e7 100644
--- a/apps/sim/app/chat/components/message/message.tsx
+++ b/apps/sim/app/chat/components/message/message.tsx
@@ -38,6 +38,49 @@ export interface ChatMessage {
files?: ChatFile[]
}
+const HTML_ESCAPES: Record = {
+ '&': '&',
+ '<': '<',
+ '>': '>',
+ '"': '"',
+ "'": ''',
+} as const
+
+/**
+ * Escapes HTML entities so untrusted strings are safe to interpolate into markup.
+ */
+function escapeHtml(value: string): string {
+ return value.replace(/[&<>"']/g, (c) => HTML_ESCAPES[c] || c)
+}
+
+/**
+ * Opens an image attachment preview in a new tab via a blob URL,
+ * escaping the user-controlled filename and data URL to prevent XSS.
+ */
+function openAttachmentPreview(name: string, dataUrl: string): void {
+ const safeName = escapeHtml(name)
+ const safeUrl = escapeHtml(dataUrl)
+ const html = `
+
+
+
+ ${safeName}
+
+
+
+
+
+
+ `
+ const blob = new Blob([html], { type: 'text/html' })
+ const blobUrl = URL.createObjectURL(blob)
+ window.open(blobUrl, '_blank', 'noopener,noreferrer')
+ setTimeout(() => URL.revokeObjectURL(blobUrl), 60_000)
+}
+
export const ClientChatMessage = memo(
function ClientChatMessage({ message }: { message: ChatMessage }) {
const [isCopied, setIsCopied] = useState(false)
@@ -103,25 +146,7 @@ export const ClientChatMessage = memo(
if (validDataUrl?.startsWith('data:')) {
e.preventDefault()
e.stopPropagation()
- const newWindow = window.open('', '_blank')
- if (newWindow) {
- newWindow.document.write(`
-
-
-
- ${attachment.name}
-
-
-
-
-
-
- `)
- newWindow.document.close()
- }
+ openAttachmentPreview(attachment.name, validDataUrl)
}
}}
onKeyDown={(event) => {
@@ -129,17 +154,7 @@ export const ClientChatMessage = memo(
if (!validDataUrl?.startsWith('data:')) return
if (event.key === 'Enter' || event.key === ' ') {
event.preventDefault()
- const newWindow = window.open('', '_blank')
- if (newWindow) {
- newWindow.document.write(`
-
- ${attachment.name}
-
-
-
-
- `)
- }
+ openAttachmentPreview(attachment.name, validDataUrl)
}
}}
>
diff --git a/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/cells/cell-render.tsx b/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/cells/cell-render.tsx
index 065385a9f05..c4c7904a711 100644
--- a/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/cells/cell-render.tsx
+++ b/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/cells/cell-render.tsx
@@ -22,6 +22,7 @@ export type CellRenderKind =
| { kind: 'error' }
| { kind: 'waiting'; labels: string[] }
| { kind: 'not-found' }
+ | { kind: 'no-output' }
// Plain typed cells
| { kind: 'boolean'; checked: boolean }
| { kind: 'json'; text: string }
@@ -106,6 +107,9 @@ export function resolveCellRender({
if (exec?.status === 'error') return { kind: 'error' }
// Enrichment ran to completion but matched nothing → "Not found".
if (isEnrichmentOutput && exec?.status === 'completed') return { kind: 'not-found' }
+ // Workflow output: the group's run completed but this block produced no
+ // value for the cell → grey "No output" (distinct from a never-run blank).
+ if (exec?.status === 'completed') return { kind: 'no-output' }
return { kind: 'empty' }
}
@@ -394,6 +398,15 @@ export function CellRender({ kind, isEditing }: CellRenderProps): React.ReactEle
)
+ case 'no-output':
+ return (
+
+
+ No output
+
+
+ )
+
case 'empty':
return null
diff --git a/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/data-row.tsx b/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/data-row.tsx
index 219a3376e78..c6b1703d0f8 100644
--- a/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/data-row.tsx
+++ b/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/data-row.tsx
@@ -194,10 +194,28 @@ export const DataRow = React.memo(function DataRow({
}, [workflowGroups, row])
const isMultiCell = sel !== null && (sel.startRow !== sel.endRow || sel.startCol !== sel.endCol)
const isRowSelected = isRowChecked
+ /**
+ * Whether the selection's left edge sits at column 0 for this row. The blue
+ * edge is drawn inside the sticky checkbox cell — over its gray right
+ * border — rather than as the col-0 overlay's `border-l`, so the sticky
+ * cell can never paint over it and the gray/blue lines never double up at
+ * the column boundary. The strip overlaps the row gridlines (`-top-px` /
+ * `-bottom-px`) so consecutive selected rows form one continuous line.
+ */
+ const rowInRange = sel !== null && rowIndex >= sel.startRow && rowIndex <= sel.endRow
+ const isLeftEdgeSelected = isRowChecked || (isMultiCell && rowInRange && sel!.startCol === 0)
return (
onContextMenu(e, row)}>
+ {isLeftEdgeSelected && (
+
+ )}
diff --git a/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/headers/column-header-menu.tsx b/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/headers/column-header-menu.tsx
index 463927819f7..f6218e2bac2 100644
--- a/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/headers/column-header-menu.tsx
+++ b/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/headers/column-header-menu.tsx
@@ -237,6 +237,10 @@ export const ColumnHeaderMenu = React.memo(function ColumnHeaderMenu({
setMenuOpen(true)
}
+ // Column whose workflow source block was deleted — the header icon swaps to
+ // `WorkflowX` with an explanatory tooltip.
+ const blockMissing = Boolean(sourceInfo?.blockMissing)
+
return (
{column.workflowGroupId ? column.headerLabel : column.name}
@@ -305,6 +311,7 @@ export const ColumnHeaderMenu = React.memo(function ColumnHeaderMenu({
type={column.type}
isWorkflowColumn={!!column.workflowGroupId && ownGroup?.type !== 'enrichment'}
blockIconInfo={sourceInfo?.blockIconInfo}
+ blockMissing={blockMissing}
/>
{column.workflowGroupId ? column.headerLabel : column.name}
diff --git a/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/headers/column-type-icon.tsx b/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/headers/column-type-icon.tsx
index e6a5a015f30..d8c7bbded1e 100644
--- a/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/headers/column-type-icon.tsx
+++ b/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/headers/column-type-icon.tsx
@@ -1,6 +1,7 @@
'use client'
import type React from 'react'
+import { Tooltip } from '@/components/emcn'
import {
Calendar as CalendarIcon,
PlayOutline,
@@ -8,6 +9,7 @@ import {
TypeJson,
TypeNumber,
TypeText,
+ WorkflowX,
} from '@/components/emcn/icons'
import type { BlockIconInfo } from '../types'
@@ -32,16 +34,39 @@ interface ColumnTypeIconProps {
* ignored — icons render in the plain `text-[var(--text-icon)]` tone like
* every other column-type icon, no per-block tint. */
blockIconInfo?: BlockIconInfo
+ /** Workflow-output column whose source block no longer exists in the
+ * workflow — renders the `WorkflowX` "not found" icon with a tooltip. */
+ blockMissing?: boolean
}
/**
* Tiny icon shown next to a column header. Workflow-output columns get the
* producing block's icon (falling back to `PlayOutline`); plain columns get
* their scalar type icon. Both render in the same `text-[var(--text-icon)]`
- * tone — no per-workflow color, no colored swatch.
+ * tone — no per-workflow color, no colored swatch. A workflow column whose
+ * source block was deleted renders a `WorkflowX` with an explanatory tooltip.
*/
-export function ColumnTypeIcon({ type, isWorkflowColumn, blockIconInfo }: ColumnTypeIconProps) {
+export function ColumnTypeIcon({
+ type,
+ isWorkflowColumn,
+ blockIconInfo,
+ blockMissing,
+}: ColumnTypeIconProps) {
if (isWorkflowColumn) {
+ if (blockMissing) {
+ return (
+
+
+
+
+
+
+
+ This column's source block no longer exists in the workflow.
+
+
+ )
+ }
const Icon = blockIconInfo?.icon ?? PlayOutline
return
}
diff --git a/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/table-grid.tsx b/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/table-grid.tsx
index de9888b539c..f5057e602df 100644
--- a/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/table-grid.tsx
+++ b/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/table-grid.tsx
@@ -837,9 +837,12 @@ export function TableGrid({
function handleInsertRow(offset: 0 | 1) {
if (!contextMenu.row) return
+ const anchorId = contextMenu.row.id
+ // Fractional ordering: express intent by neighbor id, not integer position.
+ const intent = offset === 0 ? { beforeRowId: anchorId } : { afterRowId: anchorId }
const position = contextMenu.row.position + offset
createRef.current(
- { data: {}, position },
+ { data: {}, ...intent },
{
onSuccess: (response: Record) => {
const newRowId = extractCreatedRowId(response)
@@ -904,7 +907,7 @@ export function TableGrid({
const sourceArrayIndex = rowsRef.current.findIndex((r) => r.id === contextRow.id)
closeContextMenu()
createRef.current(
- { data: rowData, position },
+ { data: rowData, afterRowId: contextRow.id },
{
onSuccess: (response: Record) => {
const newRowId = extractCreatedRowId(response)
diff --git a/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/types.ts b/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/types.ts
index 431cfc48789..af5cceea88c 100644
--- a/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/types.ts
+++ b/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/types.ts
@@ -9,6 +9,9 @@ export interface BlockIconInfo {
export interface ColumnSourceInfo {
blockIconInfo?: BlockIconInfo
blockName?: string
+ /** Workflow loaded but the column's source block no longer exists — the
+ * header renders a "Not found" badge. Only set for loaded states. */
+ blockMissing?: boolean
}
/**
diff --git a/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/utils.ts b/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/utils.ts
index 6a7f53b2185..46d3bcac739 100644
--- a/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/utils.ts
+++ b/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/table-grid/utils.ts
@@ -302,7 +302,12 @@ export function computeNormalizedSelection(
export function collectRowSnapshots(rows: Iterable): DeletedRowSnapshot[] {
const snapshots: DeletedRowSnapshot[] = []
for (const row of rows) {
- snapshots.push({ rowId: row.id, data: { ...row.data }, position: row.position })
+ snapshots.push({
+ rowId: row.id,
+ data: { ...row.data },
+ position: row.position,
+ orderKey: row.orderKey,
+ })
}
return snapshots
}
diff --git a/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/workflow-sidebar/input-mapping-section.tsx b/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/workflow-sidebar/input-mapping-section.tsx
index c667fc04c08..e255938c0be 100644
--- a/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/workflow-sidebar/input-mapping-section.tsx
+++ b/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/workflow-sidebar/input-mapping-section.tsx
@@ -39,7 +39,6 @@ export function InputMappingSection({
{namedFields.length === 0 ? (
diff --git a/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/workflow-sidebar/workflow-sidebar.tsx b/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/workflow-sidebar/workflow-sidebar.tsx
index f36cb0ac0ae..fa7c0f8cc30 100644
--- a/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/workflow-sidebar/workflow-sidebar.tsx
+++ b/apps/sim/app/workspace/[workspaceId]/tables/[tableId]/components/workflow-sidebar/workflow-sidebar.tsx
@@ -8,6 +8,8 @@ import { useMutation, useQueryClient } from '@tanstack/react-query'
import { ExternalLink, RepeatIcon, SplitIcon, X } from 'lucide-react'
import {
Button,
+ ButtonGroup,
+ ButtonGroupItem,
Combobox,
type ComboboxOptionGroup,
FieldDivider,
@@ -34,6 +36,7 @@ import type {
ColumnDefinition,
WorkflowGroup,
WorkflowGroupDependencies,
+ WorkflowGroupDeploymentMode,
WorkflowGroupInputMapping,
WorkflowGroupOutput,
} from '@/lib/table'
@@ -347,6 +350,11 @@ export function WorkflowSidebarBody({
const [autoRun, setAutoRun] = useState(() =>
existingGroup ? existingGroup.autoRun !== false : false
)
+ // Which workflow state per-cell runs execute against. Defaults to `'live'`
+ // (the editable draft) for both new and pre-feature groups.
+ const [deploymentMode, setDeploymentMode] = useState(
+ () => existingGroup?.deploymentMode ?? 'live'
+ )
// Deps default to none selected. With auto-run on, at least one is required
// (enforced via `depsValid` below); a legacy group with empty deps will
// surface the error on first open until the user picks at least one column.
@@ -709,6 +717,7 @@ export function WorkflowSidebarBody({
outputs: fullOutputs,
...(newOutputColumns.length > 0 ? { newOutputColumns } : {}),
inputMappings: inputMappingsList,
+ deploymentMode,
autoRun,
})
toast.success(`Saved "${existingGroup.name ?? 'Workflow'}"`)
@@ -740,6 +749,7 @@ export function WorkflowSidebarBody({
dependencies,
outputs: groupOutputs,
inputMappings: inputMappingsList,
+ deploymentMode,
autoRun,
}
await addWorkflowGroup.mutateAsync({ group, outputColumns: newOutputColumns })
@@ -1027,12 +1037,31 @@ export function WorkflowSidebarBody({