haixunMaster/lib/ai/vision.ts

84 lines
2.5 KiB
TypeScript

import { generateText } from "ai";
import type { ProviderApiKeys, ProviderId } from "./keys";
import { resolveApiKey } from "./keys";
import { withAgentSystem } from "./agent";
import { getModel } from "./provider";
const VISION_PROVIDERS: Array<{ provider: ProviderId; model: string }> = [
{ provider: "google", model: "gemini-2.0-flash" },
{ provider: "openai", model: "gpt-4o" },
{ provider: "anthropic", model: "claude-sonnet-4-20250514" },
{ provider: "xai", model: "grok-2-vision-1212" },
];
async function fetchImageBuffer(url: string): Promise<{ data: Uint8Array; mimeType: string } | null> {
try {
const res = await fetch(url, {
headers: { "User-Agent": "Mozilla/5.0 (compatible; ThreadTools/1.0)" },
signal: AbortSignal.timeout(15000),
});
if (!res.ok) return null;
const mimeType = res.headers.get("content-type") ?? "image/jpeg";
const buffer = await res.arrayBuffer();
return { data: new Uint8Array(buffer), mimeType };
} catch {
return null;
}
}
function resolveVisionModel(apiKeys: ProviderApiKeys) {
for (const candidate of VISION_PROVIDERS) {
if (!resolveApiKey(candidate.provider, apiKeys)) continue;
try {
return getModel(candidate.provider, candidate.model, apiKeys);
} catch {
continue;
}
}
return null;
}
export async function describePostImages(
imageUrls: string[],
postText: string,
apiKeys: ProviderApiKeys
): Promise<string | null> {
if (imageUrls.length === 0) return null;
const model = resolveVisionModel(apiKeys);
if (!model) return null;
const imageParts: Array<{ type: "image"; image: Uint8Array; mimeType?: string }> = [];
for (const url of imageUrls.slice(0, 4)) {
const img = await fetchImageBuffer(url);
if (img) {
imageParts.push({ type: "image", image: img.data, mimeType: img.mimeType });
}
}
if (imageParts.length === 0) return null;
const { text } = await generateText({
model,
system: withAgentSystem(
"你是 Threads 貼文視覺分析師。直接分析附圖的視覺設計,繁體中文台灣用語。"
),
messages: [
{
role: "user",
content: [
{
type: "text",
text: `分析這篇 Threads 貼文附圖的視覺設計:
貼文文字:${postText.slice(0, 300)}
請描述:版面配置、配色氛圍、是否有圖上文字、字體風格、視覺 hook、為什麼這張圖能吸引點擊。`,
},
...imageParts,
],
},
],
});
return text.trim() || null;
}