84 lines
2.5 KiB
TypeScript
84 lines
2.5 KiB
TypeScript
|
|
import { generateText } from "ai";
|
|||
|
|
import type { ProviderApiKeys, ProviderId } from "./keys";
|
|||
|
|
import { resolveApiKey } from "./keys";
|
|||
|
|
import { withAgentSystem } from "./agent";
|
|||
|
|
import { getModel } from "./provider";
|
|||
|
|
|
|||
|
|
const VISION_PROVIDERS: Array<{ provider: ProviderId; model: string }> = [
|
|||
|
|
{ provider: "google", model: "gemini-2.0-flash" },
|
|||
|
|
{ provider: "openai", model: "gpt-4o" },
|
|||
|
|
{ provider: "anthropic", model: "claude-sonnet-4-20250514" },
|
|||
|
|
{ provider: "xai", model: "grok-2-vision-1212" },
|
|||
|
|
];
|
|||
|
|
|
|||
|
|
async function fetchImageBuffer(url: string): Promise<{ data: Uint8Array; mimeType: string } | null> {
|
|||
|
|
try {
|
|||
|
|
const res = await fetch(url, {
|
|||
|
|
headers: { "User-Agent": "Mozilla/5.0 (compatible; ThreadTools/1.0)" },
|
|||
|
|
signal: AbortSignal.timeout(15000),
|
|||
|
|
});
|
|||
|
|
if (!res.ok) return null;
|
|||
|
|
const mimeType = res.headers.get("content-type") ?? "image/jpeg";
|
|||
|
|
const buffer = await res.arrayBuffer();
|
|||
|
|
return { data: new Uint8Array(buffer), mimeType };
|
|||
|
|
} catch {
|
|||
|
|
return null;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function resolveVisionModel(apiKeys: ProviderApiKeys) {
|
|||
|
|
for (const candidate of VISION_PROVIDERS) {
|
|||
|
|
if (!resolveApiKey(candidate.provider, apiKeys)) continue;
|
|||
|
|
try {
|
|||
|
|
return getModel(candidate.provider, candidate.model, apiKeys);
|
|||
|
|
} catch {
|
|||
|
|
continue;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return null;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
export async function describePostImages(
|
|||
|
|
imageUrls: string[],
|
|||
|
|
postText: string,
|
|||
|
|
apiKeys: ProviderApiKeys
|
|||
|
|
): Promise<string | null> {
|
|||
|
|
if (imageUrls.length === 0) return null;
|
|||
|
|
|
|||
|
|
const model = resolveVisionModel(apiKeys);
|
|||
|
|
if (!model) return null;
|
|||
|
|
|
|||
|
|
const imageParts: Array<{ type: "image"; image: Uint8Array; mimeType?: string }> = [];
|
|||
|
|
for (const url of imageUrls.slice(0, 4)) {
|
|||
|
|
const img = await fetchImageBuffer(url);
|
|||
|
|
if (img) {
|
|||
|
|
imageParts.push({ type: "image", image: img.data, mimeType: img.mimeType });
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (imageParts.length === 0) return null;
|
|||
|
|
|
|||
|
|
const { text } = await generateText({
|
|||
|
|
model,
|
|||
|
|
system: withAgentSystem(
|
|||
|
|
"你是 Threads 貼文視覺分析師。直接分析附圖的視覺設計,繁體中文台灣用語。"
|
|||
|
|
),
|
|||
|
|
messages: [
|
|||
|
|
{
|
|||
|
|
role: "user",
|
|||
|
|
content: [
|
|||
|
|
{
|
|||
|
|
type: "text",
|
|||
|
|
text: `分析這篇 Threads 貼文附圖的視覺設計:
|
|||
|
|
貼文文字:${postText.slice(0, 300)}
|
|||
|
|
|
|||
|
|
請描述:版面配置、配色氛圍、是否有圖上文字、字體風格、視覺 hook、為什麼這張圖能吸引點擊。`,
|
|||
|
|
},
|
|||
|
|
...imageParts,
|
|||
|
|
],
|
|||
|
|
},
|
|||
|
|
],
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
return text.trim() || null;
|
|||
|
|
}
|