haixunMaster/lib/ai/generate-matrix.ts

import { generateObject, generateText } from "ai";
import { z } from "zod";
import type { ProviderApiKeys } from "./keys";
import type { MatrixRow, ResearchMap } from "@/lib/types/research";
import { withAgentSystem } from "./agent";
import { buildPersonaPromptBlock } from "./persona";
import { getModel } from "./provider";
import { extractJsonFromText } from "./coerce-research-map";
import {
  getOpenCodeGenerationSettings,
  prefersOpenCodeTextFirst,
} from "./opencode-go-settings";
import {
  coerceMatrixRaw,
  formatMatrixError,
  matrixLooseSchema,
} from "./coerce-matrix";
import { HASHTAG_USER_REMINDER, HASHTAG_WRITING_RULES } from "./hashtag-rules";
import { sanitizePromptText, THREADS_MAX_CHARS } from "@/lib/utils";

const matrixSchema = z.object({
  rows: z.array(
    z.object({
      sortOrder: z.number().int().min(1),
      searchTag: z.string(),
      angle: z.string(),
      hook: z.string(),
      text: z.string().max(THREADS_MAX_CHARS),
      referenceNotes: z.string(),
      sourcePermalinks: z.array(z.string()),
      rationale: z.string(),
    })
  ),
});

const TEXT_FIRST_MODELS = new Set(["grok-3", "grok-3-fast"]);

const BATCH_COUNT = 3;

export interface GenerateMatrixInput {
  topicLabel: string;
  query: string;
  brief?: string | null;
  persona?: string | null;
  researchMap?: ResearchMap | null;
  aiProvider: string;
  aiModel: string;
  apiKeys?: ProviderApiKeys;
  count: number;
  posts: Array<{
    text: string;
    authorName?: string | null;
    permalink?: string | null;
    searchTag?: string | null;
    likeCount?: number | null;
    replyCount?: number | null;
    qualityReason?: string | null;
    replies?: Array<{ text: string; authorName?: string | null; likeCount?: number | null }>;
  }>;
}

function distributePosts<T>(posts: T[], n: number): T[][] {
  const batches: T[][] = Array.from({ length: n }, () => []);
  posts.forEach((post, i) => batches[i % n].push(post));
  return batches;
}

function buildMaterialsBlock(posts: GenerateMatrixInput["posts"], defaultQuery: string): string {
  return posts
    .map((post, i) => {
      const repliesBlock =
        post.replies && post.replies.length > 0
          ? `\n  留言：${post.replies
              .slice(0, 3)
              .map(
                (r) =>
                  `@${sanitizePromptText(r.authorName) || "匿名"}：${sanitizePromptText(r.text).slice(0, 80)}`
              )
              .join(" | ")}`
          : "";

      return `${i + 1}. [${sanitizePromptText(post.searchTag) || sanitizePromptText(defaultQuery)}] @${sanitizePromptText(post.authorName) || "匿名"}（${post.likeCount ?? 0}讚）
  ${sanitizePromptText(post.text).slice(0, 300)}
  連結：${sanitizePromptText(post.permalink) || "無"}${repliesBlock}${post.qualityReason ? `\n  品質說明：${sanitizePromptText(post.qualityReason)}` : ""}`;
    })
    .join("\n\n");
}

function buildResearchBlock(researchMap?: ResearchMap | null): string {
  if (!researchMap) return "";

  const similarAccounts = (researchMap.similarAccounts ?? [])
    .slice(0, 10)
    .map((a) => `  @${sanitizePromptText(a.username)}${a.reason ? `（${sanitizePromptText(a.reason)}）` : ""}`)
    .join("\n");

  return `
受眾：${sanitizePromptText(researchMap.audienceSummary)}
內容目標：${sanitizePromptText(researchMap.contentGoal)}
想回答的問題：${researchMap.questions.map(sanitizePromptText).join("、")}
內容支柱：${researchMap.pillars.map(sanitizePromptText).join("、")}
${similarAccounts ? `同領域參考帳號：\n${similarAccounts}` : ""}
`;
}

function buildSystemPrompt(persona?: string | null) {
  return withAgentSystem(`你是 Threads 內容企劃師。根據參考素材（含優質與中等品質），產出「內容矩陣」——一週可發的貼文企劃表。

${buildPersonaPromptBlock(persona)}

規則：
- 每篇必須原創，不可抄襲參考貼文
- hook 是一句吸引人的開頭（可獨立於正文），風格符合人設開場習慣
- text 是完整 Threads 貼文（≤500字，含 #標籤），語感要像創作者本人親筆
${HASHTAG_WRITING_RULES}
- referenceNotes 是從參考素材摘出的 2-3 個重點（簡短條列感，用換行分隔）
- 每篇對應一個 searchTag（主題標籤）
- sortOrder 從 1 開始遞增
- 繁體中文台灣用語
- 知識型貼文：不寫未查證的醫療／數據斷言，rationale 標註需網路查證的關鍵句
- 部分素材附有［品質說明］：品質較低的仍有參考價值（角度、切點可用），產出時不要直接沿用其內容`);
}

function buildBatchPrompt(
  input: GenerateMatrixInput,
  researchBlock: string,
  materials: string,
  batchIndex: number,
  totalBatches: number,
  thisBatchCount: number
) {
  return `主題：${sanitizePromptText(input.topicLabel)}
種子關鍵字：${sanitizePromptText(input.query)}
${input.brief ? `Brief：${sanitizePromptText(input.brief)}` : ""}
${researchBlock}

【批次 ${batchIndex + 1}/${totalBatches}】
以下為第 ${batchIndex + 1} 批素材（共 ${materials.split("\n\n").length} 篇），請產出 ${thisBatchCount} 篇不同切角，注意與其他批次做出區隔，避免重複。

參考素材：
${materials}

寫作提醒：矩陣裡的 text 必須像創作者親筆，有代表句範例時語感要向範例靠攏。${HASHTAG_USER_REMINDER}`;
}

function buildJsonPromptSuffix(count: number) {
  return `

請只回傳 JSON（不要 markdown），格式：
{
  "rows": [
    {
      "sortOrder": 1,
      "searchTag": "標籤",
      "angle": "切角說明",
      "hook": "開頭一句",
      "text": "完整貼文（結尾含 1～3 個 #話題標籤）",
      "referenceNotes": "參考重點（多行以換行分隔）",
      "sourcePermalinks": ["https://..."],
      "rationale": "為什麼這篇有效"
    }
  ]
}
請產出 ${count} 篇，sortOrder 從 1 遞增，每篇 text 不超過 ${THREADS_MAX_CHARS} 字。`;
}

function normalizeRows(rows: MatrixRow[]): MatrixRow[] {
  return rows
    .sort((a, b) => a.sortOrder - b.sortOrder)
    .map((row) => ({
      ...row,
      text: row.text.slice(0, THREADS_MAX_CHARS),
    }));
}

function mergeBatches(batches: MatrixRow[][]): MatrixRow[] {
  const all = batches.flat();
  const seen = new Set<string>();
  const merged: MatrixRow[] = [];

  for (const row of all.sort((a, b) => a.sortOrder - b.sortOrder)) {
    const key = row.angle.slice(0, 30).toLowerCase().replace(/\s+/g, "");
    if (!seen.has(key)) {
      seen.add(key);
      merged.push(row);
    }
  }

  return merged.map((row, i) => ({ ...row, sortOrder: i + 1 }));
}

async function generateWithText(
  model: ReturnType<typeof getModel>,
  system: string,
  prompt: string,
  fallback: { query: string; count: number },
  provider: string,
  modelId: string
) {
  const settings = getOpenCodeGenerationSettings(provider, modelId);
  const { text } = await generateText({
    model,
    system: sanitizePromptText(`${system}${buildJsonPromptSuffix(fallback.count)}`),
    prompt: sanitizePromptText(prompt),
    ...settings,
  });
  const parsed = extractJsonFromText(text);
  matrixLooseSchema.parse(parsed);
  return coerceMatrixRaw(parsed, fallback);
}

async function generateWithObject(
  model: ReturnType<typeof getModel>,
  system: string,
  prompt: string,
  fallback: { query: string; count: number },
  provider: string,
  modelId: string
) {
  const settings = getOpenCodeGenerationSettings(provider, modelId);
  const { object } = await generateObject({
    model,
    schema: matrixSchema,
    system: sanitizePromptText(system),
    prompt: sanitizePromptText(`${prompt}${buildJsonPromptSuffix(fallback.count)}`),
    ...settings,
  });
  return normalizeRows(object.rows);
}

async function attemptBatch(
  model: ReturnType<typeof getModel>,
  system: string,
  prompt: string,
  fallback: { query: string; count: number },
  provider: string,
  modelId: string,
  preferText: boolean
): Promise<MatrixRow[]> {
  const attempts: Array<() => Promise<MatrixRow[]>> = preferText
    ? [
        () => generateWithText(model, system, prompt, fallback, provider, modelId),
        () => generateWithObject(model, system, prompt, fallback, provider, modelId),
        () =>
          generateWithText(
            model,
            system,
            `${prompt}\n\n上次格式不完整，請務必補齊 ${fallback.count} 篇 rows。`,
            fallback,
            provider,
            modelId
          ),
      ]
    : [
        () => generateWithObject(model, system, prompt, fallback, provider, modelId),
        () => generateWithText(model, system, prompt, fallback, provider, modelId),
        () =>
          generateWithText(
            model,
            system,
            `${prompt}\n\n上次格式不完整，請務必補齊 ${fallback.count} 篇 rows。`,
            fallback,
            provider,
            modelId
          ),
      ];

  let rows: MatrixRow[] | null = null;
  let lastError: unknown;

  for (const attempt of attempts) {
    try {
      rows = await attempt();
      break;
    } catch (error) {
      lastError = error;
    }
  }

  if (!rows) {
    throw new Error(formatMatrixError(lastError));
  }

  return normalizeRows(rows);
}

export async function generateContentMatrix(input: GenerateMatrixInput): Promise<MatrixRow[]> {
  if (input.posts.length === 0) {
    throw new Error("此海巡沒有素材，請重新海巡");
  }

  const model = getModel(input.aiProvider, input.aiModel, input.apiKeys ?? {});
  const researchBlock = buildResearchBlock(input.researchMap);
  const system = buildSystemPrompt(input.persona);
  const preferText =
    prefersOpenCodeTextFirst(input.aiProvider, input.aiModel) ||
    TEXT_FIRST_MODELS.has(input.aiModel);

  const batches = distributePosts(input.posts, BATCH_COUNT);
  const perBatch = Math.ceil(input.count / BATCH_COUNT);

  const results = await Promise.all(
    batches.map((batchPosts, batchIndex) => {
      if (batchPosts.length === 0) return Promise.resolve([] as MatrixRow[]);
      const materials = buildMaterialsBlock(batchPosts, input.query);
      const prompt = buildBatchPrompt(input, researchBlock, materials, batchIndex, BATCH_COUNT, perBatch);
      const fallback = { query: input.query, count: perBatch };
      return attemptBatch(model, system, prompt, fallback, input.aiProvider, input.aiModel, preferText);
    })
  );

  const merged = mergeBatches(results);
  return normalizeRows(merged).slice(0, input.count);
}