haixunMaster/lib/ai/filter-discover-relevance.ts

110 lines
3.7 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { z } from "zod";
import type { ProviderApiKeys } from "./keys";
import { generateStructuredObject } from "./generate-structured";
import { getModel } from "./provider";
const filterSchema = z.object({
items: z.array(
z.object({
id: z.string(),
relevant: z.boolean(),
score: z.number().min(0).max(1),
reason: z.string(),
})
),
});
export interface DiscoverFilterItem {
id: string;
text: string;
username?: string;
source: string;
tags?: string[];
}
export interface DiscoverFilterResult {
relevant: boolean;
score: number;
reason: string;
}
export async function filterDiscoverItemsWithAi(input: {
label: string;
query: string;
brief?: string | null;
exclusions?: string[];
pillars?: string[];
requiredConcepts?: string[];
items: DiscoverFilterItem[];
aiProvider: string;
aiModel: string;
apiKeys?: ProviderApiKeys;
}): Promise<Map<string, DiscoverFilterResult>> {
const fallback = new Map<string, DiscoverFilterResult>();
for (const item of input.items) {
fallback.set(item.id, { relevant: true, score: 0.55, reason: "規則通過AI 未審核)" });
}
if (input.items.length === 0) return fallback;
try {
const model = getModel(input.aiProvider, input.aiModel, input.apiKeys ?? {});
const listBlock = input.items
.map((item) => {
const tags = item.tags?.length ? `\n標籤${item.tags.join("、")}` : "";
const user = item.username ? `\n帳號@${item.username}` : "";
return `[${item.id}](來源:${item.source}${user}${tags}\n內容${item.text.slice(0, 280)}`;
})
.join("\n\n");
const result = await generateStructuredObject({
model,
provider: input.aiProvider,
modelId: input.aiModel,
schema: filterSchema,
system: `你是 Threads 主題研究助理。任務:判斷搜尋結果是否與「指定主題」真正相關,用於找相似創作者帳號。
審核要嚴格、扣緊主題:
- 複合主題要同時符合核心概念,不能只命中其中一個字
- 易混淆要剔除例如「寵物洗毛精」vs「洗碗精」、「寵物」vs「人類掉髮洗髮」
- 太寬泛的泛用詞(只有「寵物」「洗毛精」其中一項而無法確認是目標情境)→ relevant=false
- 明顯不同產品類別、不同受眾、不同痛點 → relevant=false
- 只有稍微相關但無法當相似帳號參考 → score ≤ 0.4
每筆都要輸出 id、relevant、score0-1、reason繁體中文一句話`,
prompt: `【主題】${input.label}
【核心查詢】${input.query}
${input.brief ? `【受眾簡述】${input.brief}` : ""}
${input.pillars?.length ? `【內容支柱】${input.pillars.join("、")}` : ""}
${input.requiredConcepts?.length ? `【必備概念(需同時符合)】${input.requiredConcepts.join(" + ")}` : ""}
${input.exclusions?.length ? `【明確排除】${input.exclusions.join("、")}` : ""}
請審核以下 ${input.items.length} 筆搜尋結果:
${listBlock}`,
jsonPromptSuffix:
'\n\n只回傳 JSON{"items":[{"id":"...","relevant":true,"score":0.8,"reason":"..."}]}',
});
const mapped = new Map<string, DiscoverFilterResult>();
for (const row of result.items) {
mapped.set(row.id, {
relevant: row.relevant,
score: row.score,
reason: row.reason.slice(0, 120),
});
}
for (const item of input.items) {
if (!mapped.has(item.id)) {
mapped.set(item.id, { relevant: false, score: 0, reason: "AI 未回傳審核結果" });
}
}
return mapped;
} catch (error) {
console.warn("[filter-discover-relevance] AI filter failed, using rules only:", error);
return fallback;
}
}