haixunMaster/lib/ai/generate-matrix.ts

315 lines
10 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { generateObject, generateText } from "ai";
import { z } from "zod";
import type { ProviderApiKeys } from "./keys";
import type { MatrixRow, ResearchMap } from "@/lib/types/research";
import { withAgentSystem } from "./agent";
import { buildPersonaPromptBlock } from "./persona";
import { getModel } from "./provider";
import { extractJsonFromText } from "./coerce-research-map";
import {
getOpenCodeGenerationSettings,
prefersOpenCodeTextFirst,
} from "./opencode-go-settings";
import {
coerceMatrixRaw,
formatMatrixError,
matrixLooseSchema,
} from "./coerce-matrix";
import { HASHTAG_USER_REMINDER, HASHTAG_WRITING_RULES } from "./hashtag-rules";
import { sanitizePromptText, THREADS_MAX_CHARS } from "@/lib/utils";
const matrixSchema = z.object({
rows: z.array(
z.object({
sortOrder: z.number().int().min(1),
searchTag: z.string(),
angle: z.string(),
hook: z.string(),
text: z.string().max(THREADS_MAX_CHARS),
referenceNotes: z.string(),
sourcePermalinks: z.array(z.string()),
rationale: z.string(),
})
),
});
const TEXT_FIRST_MODELS = new Set(["grok-3", "grok-3-fast"]);
const BATCH_COUNT = 3;
export interface GenerateMatrixInput {
topicLabel: string;
query: string;
brief?: string | null;
persona?: string | null;
researchMap?: ResearchMap | null;
aiProvider: string;
aiModel: string;
apiKeys?: ProviderApiKeys;
count: number;
posts: Array<{
text: string;
authorName?: string | null;
permalink?: string | null;
searchTag?: string | null;
likeCount?: number | null;
replyCount?: number | null;
qualityReason?: string | null;
replies?: Array<{ text: string; authorName?: string | null; likeCount?: number | null }>;
}>;
}
function distributePosts<T>(posts: T[], n: number): T[][] {
const batches: T[][] = Array.from({ length: n }, () => []);
posts.forEach((post, i) => batches[i % n].push(post));
return batches;
}
function buildMaterialsBlock(posts: GenerateMatrixInput["posts"], defaultQuery: string): string {
return posts
.map((post, i) => {
const repliesBlock =
post.replies && post.replies.length > 0
? `\n 留言:${post.replies
.slice(0, 3)
.map(
(r) =>
`@${sanitizePromptText(r.authorName) || "匿名"}${sanitizePromptText(r.text).slice(0, 80)}`
)
.join(" | ")}`
: "";
return `${i + 1}. [${sanitizePromptText(post.searchTag) || sanitizePromptText(defaultQuery)}] @${sanitizePromptText(post.authorName) || "匿名"}${post.likeCount ?? 0}讚)
${sanitizePromptText(post.text).slice(0, 300)}
連結:${sanitizePromptText(post.permalink) || "無"}${repliesBlock}${post.qualityReason ? `\n 品質說明:${sanitizePromptText(post.qualityReason)}` : ""}`;
})
.join("\n\n");
}
function buildResearchBlock(researchMap?: ResearchMap | null): string {
if (!researchMap) return "";
const similarAccounts = (researchMap.similarAccounts ?? [])
.slice(0, 10)
.map((a) => ` @${sanitizePromptText(a.username)}${a.reason ? `${sanitizePromptText(a.reason)}` : ""}`)
.join("\n");
return `
受眾:${sanitizePromptText(researchMap.audienceSummary)}
內容目標:${sanitizePromptText(researchMap.contentGoal)}
想回答的問題:${researchMap.questions.map(sanitizePromptText).join("、")}
內容支柱:${researchMap.pillars.map(sanitizePromptText).join("、")}
${similarAccounts ? `同領域參考帳號:\n${similarAccounts}` : ""}
`;
}
function buildSystemPrompt(persona?: string | null) {
return withAgentSystem(`你是 Threads 內容企劃師。根據參考素材(含優質與中等品質),產出「內容矩陣」——一週可發的貼文企劃表。
${buildPersonaPromptBlock(persona)}
規則:
- 每篇必須原創,不可抄襲參考貼文
- hook 是一句吸引人的開頭(可獨立於正文),風格符合人設開場習慣
- text 是完整 Threads 貼文≤500字含 #標籤),語感要像創作者本人親筆
${HASHTAG_WRITING_RULES}
- referenceNotes 是從參考素材摘出的 2-3 個重點(簡短條列感,用換行分隔)
- 每篇對應一個 searchTag主題標籤
- sortOrder 從 1 開始遞增
- 繁體中文台灣用語
- 知識型貼文不寫未查證的醫療數據斷言rationale 標註需網路查證的關鍵句
- 部分素材附有[品質說明]:品質較低的仍有參考價值(角度、切點可用),產出時不要直接沿用其內容`);
}
function buildBatchPrompt(
input: GenerateMatrixInput,
researchBlock: string,
materials: string,
batchIndex: number,
totalBatches: number,
thisBatchCount: number
) {
return `主題:${sanitizePromptText(input.topicLabel)}
種子關鍵字:${sanitizePromptText(input.query)}
${input.brief ? `Brief${sanitizePromptText(input.brief)}` : ""}
${researchBlock}
【批次 ${batchIndex + 1}/${totalBatches}
以下為第 ${batchIndex + 1} 批素材(共 ${materials.split("\n\n").length} 篇),請產出 ${thisBatchCount} 篇不同切角,注意與其他批次做出區隔,避免重複。
參考素材:
${materials}
寫作提醒:矩陣裡的 text 必須像創作者親筆,有代表句範例時語感要向範例靠攏。${HASHTAG_USER_REMINDER}`;
}
function buildJsonPromptSuffix(count: number) {
return `
請只回傳 JSON不要 markdown格式
{
"rows": [
{
"sortOrder": 1,
"searchTag": "標籤",
"angle": "切角說明",
"hook": "開頭一句",
"text": "完整貼文(結尾含 13 個 #話題標籤)",
"referenceNotes": "參考重點(多行以換行分隔)",
"sourcePermalinks": ["https://..."],
"rationale": "為什麼這篇有效"
}
]
}
請產出 ${count}sortOrder 從 1 遞增,每篇 text 不超過 ${THREADS_MAX_CHARS} 字。`;
}
function normalizeRows(rows: MatrixRow[]): MatrixRow[] {
return rows
.sort((a, b) => a.sortOrder - b.sortOrder)
.map((row) => ({
...row,
text: row.text.slice(0, THREADS_MAX_CHARS),
}));
}
function mergeBatches(batches: MatrixRow[][]): MatrixRow[] {
const all = batches.flat();
const seen = new Set<string>();
const merged: MatrixRow[] = [];
for (const row of all.sort((a, b) => a.sortOrder - b.sortOrder)) {
const key = row.angle.slice(0, 30).toLowerCase().replace(/\s+/g, "");
if (!seen.has(key)) {
seen.add(key);
merged.push(row);
}
}
return merged.map((row, i) => ({ ...row, sortOrder: i + 1 }));
}
async function generateWithText(
model: ReturnType<typeof getModel>,
system: string,
prompt: string,
fallback: { query: string; count: number },
provider: string,
modelId: string
) {
const settings = getOpenCodeGenerationSettings(provider, modelId);
const { text } = await generateText({
model,
system: sanitizePromptText(`${system}${buildJsonPromptSuffix(fallback.count)}`),
prompt: sanitizePromptText(prompt),
...settings,
});
const parsed = extractJsonFromText(text);
matrixLooseSchema.parse(parsed);
return coerceMatrixRaw(parsed, fallback);
}
async function generateWithObject(
model: ReturnType<typeof getModel>,
system: string,
prompt: string,
fallback: { query: string; count: number },
provider: string,
modelId: string
) {
const settings = getOpenCodeGenerationSettings(provider, modelId);
const { object } = await generateObject({
model,
schema: matrixSchema,
system: sanitizePromptText(system),
prompt: sanitizePromptText(`${prompt}${buildJsonPromptSuffix(fallback.count)}`),
...settings,
});
return normalizeRows(object.rows);
}
async function attemptBatch(
model: ReturnType<typeof getModel>,
system: string,
prompt: string,
fallback: { query: string; count: number },
provider: string,
modelId: string,
preferText: boolean
): Promise<MatrixRow[]> {
const attempts: Array<() => Promise<MatrixRow[]>> = preferText
? [
() => generateWithText(model, system, prompt, fallback, provider, modelId),
() => generateWithObject(model, system, prompt, fallback, provider, modelId),
() =>
generateWithText(
model,
system,
`${prompt}\n\n上次格式不完整請務必補齊 ${fallback.count} 篇 rows。`,
fallback,
provider,
modelId
),
]
: [
() => generateWithObject(model, system, prompt, fallback, provider, modelId),
() => generateWithText(model, system, prompt, fallback, provider, modelId),
() =>
generateWithText(
model,
system,
`${prompt}\n\n上次格式不完整請務必補齊 ${fallback.count} 篇 rows。`,
fallback,
provider,
modelId
),
];
let rows: MatrixRow[] | null = null;
let lastError: unknown;
for (const attempt of attempts) {
try {
rows = await attempt();
break;
} catch (error) {
lastError = error;
}
}
if (!rows) {
throw new Error(formatMatrixError(lastError));
}
return normalizeRows(rows);
}
export async function generateContentMatrix(input: GenerateMatrixInput): Promise<MatrixRow[]> {
if (input.posts.length === 0) {
throw new Error("此海巡沒有素材,請重新海巡");
}
const model = getModel(input.aiProvider, input.aiModel, input.apiKeys ?? {});
const researchBlock = buildResearchBlock(input.researchMap);
const system = buildSystemPrompt(input.persona);
const preferText =
prefersOpenCodeTextFirst(input.aiProvider, input.aiModel) ||
TEXT_FIRST_MODELS.has(input.aiModel);
const batches = distributePosts(input.posts, BATCH_COUNT);
const perBatch = Math.ceil(input.count / BATCH_COUNT);
const results = await Promise.all(
batches.map((batchPosts, batchIndex) => {
if (batchPosts.length === 0) return Promise.resolve([] as MatrixRow[]);
const materials = buildMaterialsBlock(batchPosts, input.query);
const prompt = buildBatchPrompt(input, researchBlock, materials, batchIndex, BATCH_COUNT, perBatch);
const fallback = { query: input.query, count: perBatch };
return attemptBatch(model, system, prompt, fallback, input.aiProvider, input.aiModel, preferText);
})
);
const merged = mergeBatches(results);
return normalizeRows(merged).slice(0, input.count);
}