haixunMaster/lib/ai/analyze-topic.ts

371 lines
14 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { generateObject, generateText } from "ai";
import { z } from "zod";
import {
coerceResearchMapRaw,
extractJsonFromText,
researchMapLooseSchema,
} from "./coerce-research-map";
import type { ProviderApiKeys } from "./keys";
import { getModel } from "./provider";
import {
buildResearchMapSystemPrompt,
buildResearchMapUserPrompt,
} from "./prompts/research-map";
import {
buildPlacementResearchMapSystemPrompt,
buildPlacementResearchMapUserPrompt,
} from "./prompts/research-map-placement";
import type { ResearchMap, SuggestedTag } from "@/lib/types/research";
import type { TopicGoal } from "@/lib/types/topic-goal";
import { isPlacementGoal } from "@/lib/types/topic-goal";
import {
explainProviderApiError,
getOpenCodeGenerationSettings,
prefersOpenCodeTextFirst,
} from "./opencode-go-settings";
import { formatProductContextForPrompt } from "@/lib/types/product-context";
import { filterSuggestedTags } from "./normalize-suggested-tags";
import { generateStructuredObject } from "./generate-structured";
export interface AnalyzeTopicInput {
label: string;
query: string;
brief?: string | null;
productContext?: string | null;
topicGoal?: TopicGoal | string | null;
persona?: string | null;
aiProvider: string;
aiModel: string;
apiKeys?: ProviderApiKeys;
}
const DEFAULT_QUESTIONS = [
"這個主題下,受眾最常感到焦慮的是什麼?",
"有哪些實用資訊是大家都在找的?",
"同溫層想聽什麼樣的親身經驗?",
"哪些迷思需要被釐清?",
"什麼情境下會特別想搜這個主題?",
];
const DEFAULT_PILLARS = ["實用知識", "同溫經驗", "語錄故事", "迷思破解"];
const DEFAULT_EXCLUSIONS = ["業配硬銷", "未查證偏方", "純情緒宣洩"];
function padStringList(items: string[], minimum: number, maximum: number, defaults: string[]) {
const out = items.map((s) => s.trim()).filter(Boolean);
let i = 0;
while (out.length < minimum) {
out.push(defaults[i] ?? `待補充項目 ${out.length + 1}`);
i += 1;
}
return out.slice(0, maximum);
}
function padSuggestedTags(
tags: SuggestedTag[],
input: AnalyzeTopicInput
): SuggestedTag[] {
return filterSuggestedTags(tags, {
label: input.label,
query: input.query,
brief: input.brief,
topicGoal: input.topicGoal,
});
}
function normalizeResearchMap(
raw: z.infer<typeof researchMapLooseSchema>,
input: AnalyzeTopicInput
): Omit<ResearchMap, "similarAccounts"> {
const questions = padStringList(raw.questions, 5, 8, DEFAULT_QUESTIONS);
const pillars = padStringList(raw.pillars, 4, 6, DEFAULT_PILLARS);
return {
audienceSummary: raw.audienceSummary.trim(),
contentGoal: raw.contentGoal.trim(),
questions,
pillars,
suggestedTags: padSuggestedTags(raw.suggestedTags, input),
exclusions: padStringList(raw.exclusions, 3, 8, DEFAULT_EXCLUSIONS),
};
}
const searchTagBatchSchema = z.object({
tags: z.array(z.object({
tag: z.string().min(2).max(10),
reason: z.string().min(1).max(60),
searchIntent: z.enum(["痛點", "知識", "經驗", "對比", "工具", "語錄", "需求", "求助"]),
searchType: z.enum(["短詞", "情境", "語錄"]),
})).min(6).max(8),
});
const UNNATURAL_SEARCH_TAG =
/[,。!?、:;()()「」【】]|一步步|過程中的|記錄改|的搞|舒適圈的|怎麼交到新|搜尋詞\d|^(這個|哪些|什麼情境)/;
function naturalSearchTags(tags: SuggestedTag[]): SuggestedTag[] {
const seen = new Set<string>();
return tags.filter((item) => {
const tag = item.tag.replace(/^#/, "").trim();
const key = tag.toLowerCase();
if (tag.length < 2 || tag.length > 10 || UNNATURAL_SEARCH_TAG.test(tag)) return false;
if (/的$|才$|要$|會$|怎麼$|如何$/.test(tag)) return false;
if (seen.has(key)) return false;
seen.add(key);
item.tag = tag;
return true;
});
}
async function generateNaturalSearchTags(
model: ReturnType<typeof getModel>,
input: AnalyzeTopicInput,
map: Omit<ResearchMap, "similarAccounts">
): Promise<SuggestedTag[]> {
const placementMode = isPlacementGoal(input.topicGoal);
const modeRules = placementMode
? `你正在替「找 TA」生成需求型搜尋詞
- 搜尋目標是正在遇到問題、求助、求推薦或準備選購的人,不是泛知識流量
- 至少 5 個詞要帶具體痛點、症狀、使用情境或選購意圖
- 好例子:狗狗皮膚癢、洗毛精推薦、敏感肌洗毛精、在家洗狗、洗完還是臭
- 壞例子:寵物生活、毛孩日常、產品需求、解決方案、內容分享
- 不產語錄、口號或品牌視角文案;每個詞都要能對應一個當下需求
- searchIntent 優先使用「痛點、需求、求助」,不可把品牌行銷詞當成受眾用語`
: `你正在替「拷貝忍者」生成內容型搜尋詞:
- 組合2 個核心主題、2 個具體痛點、2 個經驗行動、1 個受眾用語、最多 1 個語錄詞
- 以「宅男變身」為例,好詞是:宅男改造、男生穿搭、體味改善、內向社交、變帥過程、宅男自救`;
const result = await generateStructuredObject({
model,
provider: input.aiProvider,
modelId: input.aiModel,
schema: searchTagBatchSchema,
system: `你是台灣 Threads 站內搜尋策略師。你只負責把研究地圖改寫成真人會輸入搜尋框的詞。
硬性規則:
- 產出 8 個不重複搜尋詞,每個 28 個中文字為佳,最長 10 字
- 它是「搜尋詞」,不是文章標題、內容企劃、完整問句或 hashtag
- 禁止把內容支柱直接截斷;必須先理解語意,再壓縮成自然詞組
- 禁止「一步步記錄改」「穿搭的搞」「過程中的黑色」這類殘句
- 除非種子關鍵字或 Brief 明確指定品牌,否則搜尋詞不可自行加入品牌名
- 不要只換同義字湊數;每個詞應能找到不同但仍相關的貼文
- 想像一位台灣使用者真的會在 Threads 搜尋框輸入它
${modeRules}
壞詞範例:變身實戰記錄一步步記錄改。`,
prompt: `主題:${input.label}
種子關鍵字:${input.query}
Brief${input.brief ?? ""}
受眾:${map.audienceSummary}
受眾問題:${map.questions.join("")}
內容支柱:${map.pillars.join("")}
排除:${map.exclusions.join("")}
請重新產出自然、可搜尋的 Threads 搜尋詞。`,
jsonPromptSuffix: `\n\n只回傳 JSON不要 markdown。tags 必須恰好 8 筆且 tag 不可重複:\n{"tags":[{"tag":"2到8字搜尋詞1","reason":"搜尋意圖","searchIntent":"經驗","searchType":"短詞"},{"tag":"2到8字搜尋詞2","reason":"搜尋意圖","searchIntent":"痛點","searchType":"情境"},{"tag":"2到8字搜尋詞3","reason":"搜尋意圖","searchIntent":"知識","searchType":"短詞"},{"tag":"2到8字搜尋詞4","reason":"搜尋意圖","searchIntent":"工具","searchType":"情境"},{"tag":"2到8字搜尋詞5","reason":"搜尋意圖","searchIntent":"經驗","searchType":"情境"},{"tag":"2到8字搜尋詞6","reason":"搜尋意圖","searchIntent":"對比","searchType":"短詞"},{"tag":"2到8字搜尋詞7","reason":"搜尋意圖","searchIntent":"痛點","searchType":"情境"},{"tag":"2到8字搜尋詞8","reason":"搜尋意圖","searchIntent":"語錄","searchType":"語錄"}]}`,
});
const natural = naturalSearchTags(result.tags);
return placementMode
? natural.filter((tag) => tag.searchIntent !== "語錄" && tag.searchType !== "語錄")
: natural;
}
export async function regenerateSearchTags(
input: AnalyzeTopicInput,
map: Omit<ResearchMap, "similarAccounts">
): Promise<SuggestedTag[]> {
const model = getModel(input.aiProvider, input.aiModel, input.apiKeys ?? {});
const tags = await generateNaturalSearchTags(model, input, map);
if (tags.length < 6) {
throw new Error(`AI 只產出 ${tags.length} 個可用搜尋詞,未達 6 個,請重試或更換研究模型`);
}
return tags.slice(0, 8);
}
export function formatAnalyzeError(error: unknown): string {
if (typeof error === "string") return error;
if (error instanceof z.ZodError) {
const detail = error.issues
.slice(0, 4)
.map((issue) => {
const field = issue.path.join(".") || "回傳內容";
return `${field}${issue.message}`;
})
.join("");
return `AI 回傳的研究地圖格式不完整(${detail}。請再按一次「AI 分析主題」重試。`;
}
if (error instanceof Error) {
const extra = error as Error & { responseBody?: string };
const explained = explainProviderApiError(
error.message,
typeof extra.responseBody === "string" ? extra.responseBody : undefined
);
if (explained) return explained;
return error.message;
}
return "AI 分析失敗";
}
function buildJsonPromptSuffix(): string {
return `
請只回傳 JSON不要 markdown格式
{
"audienceSummary": "string",
"contentGoal": "string",
"questions": ["string", "..."],
"pillars": ["string", "..."],
"suggestedTags": [
{"tag":"2-4字短詞","reason":"為什麼搜","searchIntent":"痛點|知識|經驗|對比|工具|語錄","searchType":"短詞|情境|語錄"}
],
"exclusions": ["string", "..."]
}
questions 至少 5 個、pillars 至少 4 個、suggestedTags 1014 個、exclusions 至少 3 個。不要輸出 similarAccounts。`;
}
function resolvePrompts(input: AnalyzeTopicInput) {
if (isPlacementGoal(input.topicGoal)) {
const brief = input.brief?.trim() || `主題是「${input.query}」。`;
const productContext = formatProductContextForPrompt(input.productContext);
const persona =
input.persona?.trim() ||
"專業、願意提供實用建議的品牌代表,語氣自然不硬銷。";
return {
system: buildPlacementResearchMapSystemPrompt(),
prompt: buildPlacementResearchMapUserPrompt({
label: input.label,
query: input.query,
brief,
productContext,
persona,
}),
};
}
const brief = input.brief?.trim()
? input.brief.trim()
: `主題是「${input.query}」。請假設受眾是台灣 2540 歲、會在 Threads 找資訊與同溫感的一般使用者。`;
const persona = input.persona?.trim()
? input.persona.trim()
: "專業、有觀點、願意分享實用資訊的 Threads 創作者,語氣自然不說教。";
return {
system: buildResearchMapSystemPrompt(),
prompt: buildResearchMapUserPrompt({
label: input.label,
query: input.query,
brief,
persona,
}),
};
}
async function generateWithText(
model: ReturnType<typeof getModel>,
prompt: string,
system: string,
fallback: { label: string; query: string; brief: string },
provider: string,
modelId: string
) {
const settings = getOpenCodeGenerationSettings(provider, modelId);
const { text } = await generateText({
model,
system: `${system}${buildJsonPromptSuffix()}`,
prompt,
...settings,
});
const parsed = extractJsonFromText(text);
return coerceResearchMapRaw(parsed, fallback);
}
async function generateWithObject(
model: ReturnType<typeof getModel>,
prompt: string,
system: string,
fallback: { label: string; query: string; brief: string },
provider: string,
modelId: string
) {
const settings = getOpenCodeGenerationSettings(provider, modelId);
const result = await generateObject({
model,
schema: researchMapLooseSchema,
system,
prompt: `${prompt}${buildJsonPromptSuffix()}`,
...settings,
});
return coerceResearchMapRaw(result.object, fallback);
}
export async function analyzeTopicIntent(
input: AnalyzeTopicInput
): Promise<Omit<ResearchMap, "similarAccounts">> {
const model = getModel(input.aiProvider, input.aiModel, input.apiKeys ?? {});
const { system, prompt } = resolvePrompts(input);
const brief = input.brief?.trim() || `主題是「${input.query}」。`;
const fallback = { label: input.label, query: input.query, brief };
const preferText = prefersOpenCodeTextFirst(input.aiProvider, input.aiModel);
let raw: z.infer<typeof researchMapLooseSchema> | null = null;
let lastError: unknown;
const attempts: Array<() => Promise<z.infer<typeof researchMapLooseSchema>>> = preferText
? [
() => generateWithText(model, prompt, system, fallback, input.aiProvider, input.aiModel),
() => generateWithObject(model, prompt, system, fallback, input.aiProvider, input.aiModel),
() =>
generateWithText(
model,
`${prompt}\n\n上次格式不完整請務必補齊所有欄位。`,
system,
fallback,
input.aiProvider,
input.aiModel
),
]
: [
() => generateWithObject(model, prompt, system, fallback, input.aiProvider, input.aiModel),
() => generateWithText(model, prompt, system, fallback, input.aiProvider, input.aiModel),
() =>
generateWithText(
model,
`${prompt}\n\n上次格式不完整請務必補齊所有欄位。`,
system,
fallback,
input.aiProvider,
input.aiModel
),
];
for (const attempt of attempts) {
try {
raw = await attempt();
break;
} catch (error) {
lastError = error;
}
}
if (!raw) {
throw formatAnalyzeError(lastError);
}
try {
let normalized = normalizeResearchMap(raw, input);
{
try {
const rewritten = await generateNaturalSearchTags(model, input, normalized);
if (rewritten.length >= 6) {
normalized = { ...normalized, suggestedTags: rewritten.slice(0, 8) };
}
} catch (error) {
console.warn("[analyze-topic] natural search tag rewrite failed:", error);
}
}
return normalized;
} catch (error) {
throw formatAnalyzeError(error);
}
}