Build AI-powered applications with OpenAI-compatible models in your Ploy workers.

AI Integration

Ploy provides built-in AI integration for workers, allowing you to easily build AI-powered applications using OpenAI-compatible APIs.

Enabling AI

To use AI in your worker, add ai: true to your ploy.yaml:

ploy.yaml

kind: dynamic
ai: true

This injects two environment variables into your worker:

AI_URL - Base URL for AI API requests
AI_TOKEN - Authentication token (automatically managed per deployment)

Basic AI Example

Here's a simple worker that calls an AI model:

src/index.ts

interface Env {
	AI_URL: string;
	AI_TOKEN: string;
}

export default {
	async fetch(request, env) {
		const url = new URL(request.url);

		// Health check endpoint
		if (url.pathname === "/health") {
			return new Response("ok");
		}

		// AI endpoint
		if (url.pathname === "/ai") {
			try {
				// Get parameters from query string
				const model = url.searchParams.get("model") || "glm-4.6v-flash";
				const prompt = url.searchParams.get("prompt") || "just reply 'OK'";

				// Call AI API
				const response = await fetch(`${env.AI_URL}/chat/completions`, {
					method: "POST",
					headers: {
						"Content-Type": "application/json",
						Authorization: `Bearer ${env.AI_TOKEN}`,
					},
					body: JSON.stringify({
						model: model,
						messages: [{ role: "user", content: prompt }],
					}),
				});

				const data = await response.json();
				return new Response(JSON.stringify(data), {
					status: response.status,
					headers: { "Content-Type": "application/json" },
				});
			} catch (error) {
				return new Response(
					JSON.stringify({
						error: error instanceof Error ? error.message : String(error),
					}),
					{
						status: 500,
						headers: { "Content-Type": "application/json" },
					},
				);
			}
		}

		return new Response("hi!");
	},
};

Usage

# Basic request
curl https://your-deployment.ploy.app/ai

# Custom prompt
curl "https://your-deployment.ploy.app/ai?prompt=What is TypeScript?"

# Different model
curl "https://your-deployment.ploy.app/ai?model=gpt-4&prompt=Hello"

OpenAI-Compatible API

Ploy's AI integration uses the OpenAI chat completions format:

Request Format

{
  model: string;           // Model name (e.g., "glm-4.6v-flash", "gpt-4")
  messages: Array<{        // Conversation messages
    role: "system" | "user" | "assistant";
    content: string;
  }>;
  temperature?: number;    // Randomness (0-2, default: 1)
  max_tokens?: number;     // Maximum response length
  top_p?: number;         // Nucleus sampling (0-1, default: 1)
  stream?: boolean;       // Enable streaming responses
}

Response Format

{
	id: string;
	object: "chat.completion";
	created: number;
	model: string;
	choices: Array<{
		index: number;
		message: {
			role: "assistant";
			content: string;
		};
		finish_reason: string;
	}>;
	usage: {
		prompt_tokens: number;
		completion_tokens: number;
		total_tokens: number;
	}
}

Conversation Context

Build conversational applications by maintaining message history:

interface Message {
	role: "system" | "user" | "assistant";
	content: string;
}

export default {
	async fetch(request, env) {
		if (request.method !== "POST") {
			return new Response("Method Not Allowed", { status: 405 });
		}

		try {
			const { messages, model = "glm-4.6v-flash" } = await request.json();

			// Add system prompt
			const fullMessages: Message[] = [
				{
					role: "system",
					content: "You are a helpful assistant that provides concise answers.",
				},
				...messages,
			];

			const response = await fetch(`${env.AI_URL}/chat/completions`, {
				method: "POST",
				headers: {
					"Content-Type": "application/json",
					Authorization: `Bearer ${env.AI_TOKEN}`,
				},
				body: JSON.stringify({
					model,
					messages: fullMessages,
				}),
			});

			const data = await response.json();
			return new Response(JSON.stringify(data), {
				headers: { "Content-Type": "application/json" },
			});
		} catch (error) {
			return new Response(
				JSON.stringify({
					error: error instanceof Error ? error.message : String(error),
				}),
				{
					status: 500,
					headers: { "Content-Type": "application/json" },
				},
			);
		}
	},
};

Usage

curl -X POST https://your-deployment.ploy.app/ai \
  -H "Content-Type: application/json" \
  -d '{
    "messages": [
      {"role": "user", "content": "What is JavaScript?"},
      {"role": "assistant", "content": "JavaScript is a programming language..."},
      {"role": "user", "content": "How do I use async/await?"}
    ]
  }'

LangChain Integration

Use LangChain for advanced AI features like agents and tools:

Installation

package.json

{
	"dependencies": {
		"@langchain/core": "^1.1.0",
		"@langchain/openai": "^1.1.3",
		"langchain": "^1.1.1",
		"zod": "^3.24.1"
	}
}

Creating Tools

Define tools that the AI can use:

src/index.ts

import { ChatOpenAI } from "@langchain/openai";
import { createAgent, tool } from "langchain";
import * as z from "zod";

interface Env {
	AI_URL: string;
	AI_TOKEN: string;
}

// Define a tool for getting weather information
const getWeather = tool((input) => `It's always sunny in ${input.city}!`, {
	name: "get_weather",
	description: "Get the weather for a given city",
	schema: z.object({
		city: z.string().describe("The city to get the weather for"),
	}),
});

export default {
	async fetch(request, env) {
		const url = new URL(request.url);

		if (url.pathname === "/health") {
			return new Response("ok");
		}

		if (url.pathname === "/ai") {
			try {
				const prompt = url.searchParams.get("prompt") || "just reply 'OK'";

				// Initialize OpenAI-compatible model
				const model = new ChatOpenAI({
					model: "glm-4.6v-flash",
					apiKey: env.AI_TOKEN,
					configuration: {
						baseURL: env.AI_URL,
					},
				});

				// Create agent with tools
				const agent = createAgent({
					model,
					tools: [getWeather],
				});

				// Invoke agent
				const result = await agent.invoke({
					messages: [{ role: "user", content: prompt }],
				});

				// Extract response
				const lastMessage = result.messages[result.messages.length - 1];
				const content =
					typeof lastMessage.content === "string"
						? lastMessage.content
						: JSON.stringify(lastMessage.content);

				// Return in OpenAI format
				return new Response(
					JSON.stringify({
						choices: [
							{
								message: {
									role: "assistant",
									content: content,
								},
							},
						],
					}),
					{
						status: 200,
						headers: { "Content-Type": "application/json" },
					},
				);
			} catch (error) {
				return new Response(
					JSON.stringify({
						error: error instanceof Error ? error.message : String(error),
					}),
					{
						status: 500,
						headers: { "Content-Type": "application/json" },
					},
				);
			}
		}

		return new Response("LangChain agent example!");
	},
};

Token Usage and Billing

Monitor token consumption in API responses:

const data = await response.json();

// Extract token usage
const usage = data.usage;
console.log(`Prompt tokens: ${usage.prompt_tokens}`);
console.log(`Completion tokens: ${usage.completion_tokens}`);
console.log(`Total tokens: ${usage.total_tokens}`);

// Track costs (example rates)
const cost = (
	usage.prompt_tokens * 0.00001 +
	usage.completion_tokens * 0.00002
).toFixed(4);
console.log(`Estimated cost: $${cost}`);

Token usage varies by model. Larger models (like GPT-5) cost more per token than smaller models (like GPT-4o-mini).

Examples

Summarization Service

export default {
	async fetch(request, env) {
		if (request.method !== "POST") {
			return new Response("Method Not Allowed", { status: 405 });
		}

		const { text } = await request.json();

		const response = await fetch(`${env.AI_URL}/chat/completions`, {
			method: "POST",
			headers: {
				"Content-Type": "application/json",
				Authorization: `Bearer ${env.AI_TOKEN}`,
			},
			body: JSON.stringify({
				model: "glm-4.6v-flash",
				messages: [
					{
						role: "system",
						content:
							"You are a summarization assistant. Provide concise summaries.",
					},
					{
						role: "user",
						content: `Summarize the following text:\n\n${text}`,
					},
				],
				max_tokens: 150,
			}),
		});

		const data = await response.json();
		const summary = data.choices[0].message.content;

		return new Response(JSON.stringify({ summary }), {
			headers: { "Content-Type": "application/json" },
		});
	},
};

Next Steps

Workers Guide - Learn more about building workers
Configuration - Configure your Ploy project
LangChain Documentation - Explore LangChain features

AI Integration

On this page