Streaming that actually works
text, loading, error — all reactive. Abort on unmount, retry on error, SSE under the hood. None of that is your problem.
Learn more
Production-ready streaming, agents, and chat history for SvelteKit, Next.js, Nuxt, SolidStart, and more — all from one library.
<script lang="ts">
import { Stream } from "@aibind/sveltekit";
const stream = new Stream({ model: "fast" });
</script>
<button onclick={() => stream.send("Hello!")}>Send</button><p>{stream.text}</p>"use client";
import { useStream } from "@aibind/nextjs";
export default function Chat() {
const { text, send } = useStream({ model: "fast" });
return <button onClick={() => send("Hello!")}>{text || "Send"}</button>;
}<script setup lang="ts">
import { useStream } from "@aibind/nuxt";
const { text, send } = useStream({ model: "fast" });
</script>
<template>
<button @click="send('Hello!')">Send</button>
<p>{{ text }}</p>
</template>import { useStream } from "@aibind/solidstart";
export default function Chat() {
const { text, send } = useStream({ model: "fast" });
return <button onClick={() => send("Hello!")}>{text() || "Send"}</button>;
}One function call. Your framework's native reactivity. That's the whole API.
// hooks.server.ts (SvelteKit) | app/api/ai/[...path]/route.ts (Next.js)
import { createStreamHandler } from "@aibind/sveltekit/server";
import { defineModels } from "@aibind/core";
import { createOpenRouter } from "@openrouter/ai-sdk-provider";
const openrouter = createOpenRouter({
apiKey: process.env.OPENROUTER_API_KEY!,
});
const models = defineModels({
fast: openrouter("google/gemini-3.1-flash-lite-preview"),
smart: openrouter("anthropic/claude-sonnet-4-6"),
reason: openrouter("google/gemini-2.5-pro"),
});
export const handle = createStreamHandler({ models });
// ↑ Routes /stream, /structured, /compact — done.// Server: one line enables memory
const handler = createStreamHandler({
models,
conversation: { store: new MemoryConversationStore() },
});<!-- Client: one field enables it -->
<script lang="ts">
import { Stream } from "@aibind/sveltekit";
const stream = new Stream({
model: "fast",
sessionId: crypto.randomUUID(), // server remembers the rest
});
</script>
<button onclick={() => stream.send("What did I just say?")}>Ask</button>
<p>{stream.text}</p>
<!-- Server has the full history. Client sends only the new message. -->import { z } from "zod";
const ProductSchema = z.object({
name: z.string(),
price: z.number(),
description: z.string(),
tags: z.array(z.string()),
});<script lang="ts">
import { StructuredStream } from "@aibind/sveltekit";
import { ProductSchema } from "$lib/schemas";
const stream = new StructuredStream({
model: "fast",
endpoint: "/__aibind__/structured",
schema: ProductSchema,
});
</script>
<!-- Partial renders as tokens arrive — name shows before description is done -->
{#if stream.partial}
<h2>{stream.partial.name ?? "…"}</h2>
<p>${stream.partial.price ?? "…"}</p>
<p>{stream.partial.description ?? "…"}</p>
{/if}<script lang="ts">
import { Stream } from "@aibind/sveltekit";
import { ChatHistory } from "@aibind/core";
const chat = new ChatHistory();
const stream = new Stream({ model: "smart", sessionId: crypto.randomUUID() });
</script>
<button
onclick={async () => {
const { tokensSaved } = await stream.compact(chat);
// History replaced with AI summary on both client and server
console.log(`${tokensSaved.toLocaleString()} tokens freed`);
}}
>
Compact history
</button>Chat is the wrong shape for writing assistants, search boxes, and code inputs. Completion is built for that:
<script lang="ts">
import { Completion } from "@aibind/sveltekit";
const completion = new Completion({ model: "fast" });
let input = $state("");
</script>
<input
bind:value={input}
oninput={() => completion.update(input)}
onkeydown={(e) => {
if (e.key === "Tab" && completion.suggestion) {
input = completion.accept(); // input + ghost text
e.preventDefault();
}
}}
/>
<!-- Ghost text: input value + dimmed continuation -->
<span class="ghost"
>{input}<span class="dim">{completion.suggestion}</span></span
>Debounced. Cancels automatically on each keystroke. Tab to accept, Escape to dismiss. No timer management, no AbortController, no state juggling.
Pass diff: defaultDiff once and every regenerate emits a word-level diff — no extra code per send:
<script lang="ts">
import { Stream, defaultDiff } from "@aibind/sveltekit";
const stream = new Stream({ model: "smart", diff: defaultDiff });
</script>
{#if stream.diff}
{#each stream.diff as chunk}
{#if chunk.type === "add"}
<ins>{chunk.text}</ins>
{:else if chunk.type === "remove"}
<del>{chunk.text}</del>
{:else}
<span>{chunk.text}</span>
{/if}
{/each}
{:else}
{stream.text}
{/if}Bring your own diff library — diff, fast-diff, diff-match-patch — with a one-liner adapter. The built-in defaultDiff is a zero-dependency LCS word diff.
<StreamMarkdown> recovers unterminated syntax mid-stream — no bold flicker, no broken code blocks, no split links:
<script lang="ts">
import { Stream } from "@aibind/sveltekit";
import { StreamMarkdown } from "@aibind/svelte/markdown";
const stream = new Stream({ model: "smart" });
</script>
<button onclick={() => stream.send("Explain async/await")}>Ask</button>
<StreamMarkdown text={stream.text} streaming={stream.loading} />Zero dependencies. 1M ops/s parser. Works with any framework via the raw StreamParser + HtmlRenderer from @aibind/markdown.
<script lang="ts">
import { Stream, UsageTracker } from "@aibind/sveltekit";
const tracker = new UsageTracker({
pricing: {
fast: { inputPerMillion: 0.15, outputPerMillion: 0.6 },
smart: { inputPerMillion: 3.0, outputPerMillion: 15.0 },
},
});
const stream = new Stream({ model: "fast", tracker });
</script>
<p>
{tracker.inputTokens + tracker.outputTokens} tokens — ${tracker.cost.toFixed(
4,
)}
</p>Accumulates across every send. Reactive. Pass the same tracker to multiple streams.
<script lang="ts">
import { Stream } from "@aibind/sveltekit";
import { routeByLength } from "@aibind/core";
const stream = new Stream({
routeModel: routeByLength(
[
{ maxLength: 200, model: "fast" },
{ maxLength: 800, model: "smart" },
],
"reason",
),
});
</script>
<!-- Short prompt → fast, long analysis → reason. Zero per-send logic. -->
<button onclick={() => stream.send(prompt)}>Send</button>Async routers work too — check user tier, A/B flags, anything. Explicit model on send() always overrides.
<script lang="ts">
import { Race } from "@aibind/sveltekit";
const race = new Race({
models: ["fast", "smart"],
endpoint: "/__aibind__/stream",
strategy: "first-token", // stream whoever responds first
});
</script>
<button onclick={() => race.send("Summarize this doc")}>Race</button>
{#if race.winner}<small>won by {race.winner}</small>{/if}
<p>{race.text}</p>Both models start simultaneously. "first-token" streams the winner live; "complete" waits for whoever finishes first. Losers are cancelled automatically.
// Need auth, rate limiting, or a custom framework? Use StreamHandler directly.
import { StreamHandler } from "@aibind/core";
const ai = new StreamHandler({ models });
// Hono
app.post("/__aibind__/stream", (c) => ai.stream(c.req.json()));
// Next.js with auth injection — session ID comes from server, not client
export async function POST(request: Request) {
const session = await getSession(request);
if (!session) return new Response("Unauthorized", { status: 401 });
const body = await request.json();
return ai.stream({ ...body, sessionId: session.userId });
}