273 lines
8.3 KiB
TypeScript
273 lines
8.3 KiB
TypeScript
import { isOnTopicTag } from "@/lib/topic-anchor";
|
||
import { isPlacementGoal } from "@/lib/types/topic-goal";
|
||
import {
|
||
isAccountTag,
|
||
normalizeUsername,
|
||
type ResearchMap,
|
||
} from "@/lib/types/research";
|
||
|
||
export type ScanTaskKind = "keyword" | "account";
|
||
|
||
export interface ScanTask {
|
||
id: string;
|
||
kind: ScanTaskKind;
|
||
query: string;
|
||
label: string;
|
||
limit: number;
|
||
}
|
||
|
||
import { getBrowserConcurrency } from "@/lib/threads-browser/human-behavior";
|
||
|
||
const MAX_CONCURRENCY = 2;
|
||
|
||
export function splitScanTasks(tasks: ScanTask[]): {
|
||
keywordTasks: ScanTask[];
|
||
accountTasks: ScanTask[];
|
||
} {
|
||
return {
|
||
keywordTasks: tasks.filter((t) => t.kind === "keyword"),
|
||
accountTasks: tasks.filter((t) => t.kind === "account"),
|
||
};
|
||
}
|
||
|
||
/** 使用者明確勾選的 @帳號(不自動補研究地圖相似帳號)。 */
|
||
export function getSelectedAccountUsernames(selectedTags: string[]): string[] {
|
||
return selectedTags
|
||
.filter(isAccountTag)
|
||
.map((tag) => normalizeUsername(tag))
|
||
.filter(Boolean);
|
||
}
|
||
|
||
export function hasSelectedAccountTags(selectedTags: string[]): boolean {
|
||
return getSelectedAccountUsernames(selectedTags).length > 0;
|
||
}
|
||
|
||
/** 有限平行 + 預設單線程,降低被 Meta 判定為機器人的風險 */
|
||
export function resolveScanConcurrency(taskCount: number): number {
|
||
const cap = Math.min(MAX_CONCURRENCY, getBrowserConcurrency());
|
||
return Math.min(Math.max(taskCount, 1), cap);
|
||
}
|
||
|
||
function addTask(
|
||
tasks: ScanTask[],
|
||
seen: Set<string>,
|
||
task: Omit<ScanTask, "id">
|
||
) {
|
||
const key = `${task.kind}:${task.query}`;
|
||
if (seen.has(key)) return;
|
||
seen.add(key);
|
||
tasks.push({ ...task, id: key });
|
||
}
|
||
|
||
const PLACEMENT_QUERY_PREFIX_RE = /^(請問|想問|有人|大家|想知道|想請教|求助|請益)/;
|
||
const PLACEMENT_QUERY_MAX = 8;
|
||
|
||
/** 將研究地圖的問題/支柱轉成 Threads 可搜尋的關鍵字 */
|
||
export function normalizePlacementSearchPhrase(text: string): string {
|
||
return text
|
||
.replace(/[??!!。.、,,;;::""''「」【】()()]/g, "")
|
||
.replace(PLACEMENT_QUERY_PREFIX_RE, "")
|
||
.replace(/\s+/g, "")
|
||
.trim()
|
||
.slice(0, 28);
|
||
}
|
||
|
||
function placementPhraseScore(phrase: string, seedQuery: string): number {
|
||
if (phrase.length < 3) return -1;
|
||
if (phrase.length > 28) return -1;
|
||
let score = phrase.length;
|
||
if (seedQuery && (phrase.includes(seedQuery) || seedQuery.includes(phrase))) score += 6;
|
||
if (/推薦|請益|求助|請問|怎麼|用什麼|哪款|困擾|煩惱|怕|癢|過敏/.test(phrase)) score += 4;
|
||
return score;
|
||
}
|
||
|
||
/** 置入模式:從受眾問題 + 內容支柱自動組搜尋詞(不需手動勾 tag) */
|
||
export function resolvePlacementSearchQueries(
|
||
researchMap: ResearchMap | null | undefined,
|
||
seedQuery: string,
|
||
selectedTags: string[] = []
|
||
): string[] {
|
||
if (!researchMap) {
|
||
const q = normalizePlacementSearchPhrase(seedQuery);
|
||
return q.length >= 3 ? [q] : [];
|
||
}
|
||
|
||
const candidates: Array<{ phrase: string; score: number }> = [];
|
||
|
||
const preferredTags = selectedTags.length > 0
|
||
? selectedTags
|
||
: (researchMap.suggestedTags ?? [])
|
||
.filter((tag) => tag.searchType !== "帳號" && !isAccountTag(tag.tag))
|
||
.map((tag) => tag.tag);
|
||
for (const item of preferredTags) {
|
||
const phrase = normalizePlacementSearchPhrase(item);
|
||
const score = placementPhraseScore(phrase, seedQuery);
|
||
if (score >= 0) candidates.push({ phrase, score: score + 30 });
|
||
}
|
||
|
||
for (const item of [...(researchMap.questions ?? []), ...(researchMap.pillars ?? [])]) {
|
||
const phrase = normalizePlacementSearchPhrase(item);
|
||
const score = placementPhraseScore(phrase, seedQuery);
|
||
if (score >= 0) candidates.push({ phrase, score });
|
||
}
|
||
|
||
const seed = normalizePlacementSearchPhrase(seedQuery);
|
||
if (seed.length >= 3) {
|
||
candidates.push({ phrase: seed, score: placementPhraseScore(seed, seedQuery) + 2 });
|
||
}
|
||
|
||
const seen = new Set<string>();
|
||
const sorted = candidates
|
||
.sort((a, b) => b.score - a.score)
|
||
.filter(({ phrase }) => {
|
||
const key = phrase.toLowerCase();
|
||
if (seen.has(key)) return false;
|
||
seen.add(key);
|
||
return true;
|
||
})
|
||
.slice(0, PLACEMENT_QUERY_MAX);
|
||
|
||
return sorted.map((c) => c.phrase);
|
||
}
|
||
|
||
export function hasPlacementSearchSources(
|
||
researchMap: ResearchMap | null | undefined,
|
||
seedQuery = ""
|
||
): boolean {
|
||
return resolvePlacementSearchQueries(researchMap, seedQuery).length > 0;
|
||
}
|
||
|
||
export function buildPlacementScanTasks(params: {
|
||
researchMap?: ResearchMap | null;
|
||
seedQuery: string;
|
||
selectedTags?: string[];
|
||
}): ScanTask[] {
|
||
const queries = resolvePlacementSearchQueries(
|
||
params.researchMap,
|
||
params.seedQuery,
|
||
params.selectedTags
|
||
);
|
||
const tasks: ScanTask[] = [];
|
||
const seen = new Set<string>();
|
||
|
||
for (const query of queries) {
|
||
addTask(tasks, seen, {
|
||
kind: "keyword",
|
||
query,
|
||
label: query,
|
||
limit: 14,
|
||
});
|
||
}
|
||
|
||
return tasks;
|
||
}
|
||
|
||
export function buildScanTasks(params: {
|
||
selectedTags: string[];
|
||
researchMap?: ResearchMap | null;
|
||
seedQuery: string;
|
||
topicGoal?: string | null;
|
||
topicLabel?: string;
|
||
}): ScanTask[] {
|
||
const { selectedTags, researchMap, seedQuery, topicGoal, topicLabel = seedQuery } = params;
|
||
const placementMode = isPlacementGoal(topicGoal);
|
||
const tagMeta = new Map(
|
||
(researchMap?.suggestedTags ?? []).map((t) => [t.tag, t])
|
||
);
|
||
const tasks: ScanTask[] = [];
|
||
const seen = new Set<string>();
|
||
|
||
for (const tag of selectedTags) {
|
||
const meta = tagMeta.get(tag);
|
||
const isAccount =
|
||
isAccountTag(tag) || meta?.searchType === "帳號";
|
||
|
||
if (isAccount) {
|
||
if (placementMode) continue;
|
||
const username = normalizeUsername(tag);
|
||
const isApprovedTopicAccount =
|
||
meta?.searchType === "帳號" ||
|
||
(researchMap?.similarAccounts ?? []).some(
|
||
(account) => normalizeUsername(account.username).toLowerCase() === username.toLowerCase()
|
||
);
|
||
// 手動輸入、但未被研究地圖確認為同領域的帳號,不直接整頁爬取。
|
||
if (!isApprovedTopicAccount) continue;
|
||
addTask(tasks, seen, {
|
||
kind: "account",
|
||
query: username,
|
||
label: `@${username}`,
|
||
limit: 15,
|
||
});
|
||
continue;
|
||
}
|
||
|
||
// 研究地圖內的標籤已經過專用 AI 與自然搜尋詞驗證,不再要求字面包含完整種子詞。
|
||
// 只有不在研究地圖中的手動標籤,才套用嚴格主題錨點防止跑題。
|
||
const isApprovedResearchTag = Boolean(meta);
|
||
if (
|
||
!isAccountTag(tag) &&
|
||
!isApprovedResearchTag &&
|
||
!isOnTopicTag(tag, { label: topicLabel, query: seedQuery })
|
||
) {
|
||
continue;
|
||
}
|
||
|
||
const isShort = meta?.searchType === "短詞" || tag.length <= 4;
|
||
const isNeedTag =
|
||
meta?.searchIntent === "需求" ||
|
||
meta?.searchIntent === "求助" ||
|
||
meta?.searchIntent === "痛點";
|
||
addTask(tasks, seen, {
|
||
kind: "keyword",
|
||
query: tag,
|
||
label: tag,
|
||
limit: placementMode ? (isNeedTag ? 16 : isShort ? 14 : 12) : isShort ? 15 : 12,
|
||
});
|
||
}
|
||
|
||
return tasks;
|
||
}
|
||
|
||
export function pickDefaultSelectedTags(
|
||
researchMap: ResearchMap,
|
||
topicGoal?: string | null
|
||
): string[] {
|
||
const tags = researchMap.suggestedTags;
|
||
const shorts = tags.filter((t) => t.searchType === "短詞").map((t) => t.tag);
|
||
const quotes = tags.filter((t) => t.searchType === "語錄").map((t) => t.tag);
|
||
const scenes = tags.filter((t) => t.searchType === "情境").map((t) => t.tag);
|
||
const needTags = tags
|
||
.filter((t) => t.searchIntent === "需求" || t.searchIntent === "求助" || t.searchIntent === "痛點")
|
||
.map((t) => t.tag);
|
||
const acctTags = tags
|
||
.filter((t) => t.searchType === "帳號")
|
||
.map((t) => (t.tag.startsWith("@") ? t.tag : `@${t.tag}`));
|
||
const isPlacement = topicGoal === "placement";
|
||
|
||
const picked = isPlacement
|
||
? [
|
||
...needTags.slice(0, 6),
|
||
...scenes.slice(0, 5),
|
||
...shorts.slice(0, 4),
|
||
...quotes.slice(0, 1),
|
||
]
|
||
: [
|
||
...shorts.slice(0, 4),
|
||
...scenes.slice(0, 2),
|
||
...quotes.slice(0, 1),
|
||
...acctTags.slice(0, 1),
|
||
];
|
||
|
||
const target = isPlacement ? 10 : 8;
|
||
const balanced = [...new Set(picked)];
|
||
const fallbackOrder = [
|
||
...tags.filter((t) => t.searchType !== "帳號" && !isAccountTag(t.tag)).map((t) => t.tag),
|
||
...acctTags.slice(0, 2),
|
||
];
|
||
for (const tag of fallbackOrder) {
|
||
if (balanced.length >= target) break;
|
||
if (!balanced.includes(tag)) balanced.push(tag);
|
||
}
|
||
return balanced.slice(0, target);
|
||
}
|