haixunMaster/lib/topic-anchor.ts

236 lines
7.0 KiB
TypeScript

/** 主題錨點:標籤/帳號/海巡結果是否扣緊核心主題 */
const CONCEPT_PARTS = [
"寵物洗毛精",
"寵物沐浴",
"寵物美容",
"寵物用品",
"貓咪洗澡",
"狗狗洗澡",
"洗毛精",
"沐浴乳",
"沐浴精",
"洗髮精",
"洗碗精",
"寵物",
"毛孩",
"貓咪",
"狗狗",
"貓",
"狗",
"犬",
];
const CONCEPT_ALIASES: Record<string, string[]> = {
: ["寵物", "毛孩", "毛小孩", "毛寶", "貓咪", "狗狗", "貓", "狗", "犬"],
: ["洗毛精", "沐浴乳", "沐浴精", "洗澡", "沐浴", "梳毛"],
: ["沐浴", "沐浴乳", "洗澡", "洗毛精"],
: ["洗澡", "沐浴", "洗毛精"],
: ["貓咪", "貓", "毛孩"],
: ["狗狗", "狗", "犬", "毛孩"],
};
const COMPOUND_CONCEPTS: Record<string, string[]> = {
: ["寵物", "洗毛精"],
: ["寵物", "沐浴"],
: ["寵物", "洗澡"],
: ["貓咪", "洗澡"],
: ["狗狗", "洗澡"],
};
/** 太寬泛、容易跑題的單字標籤 */
export const BROAD_TAG_BLOCKLIST = new Set([
"寵物",
"洗毛精",
"沐浴",
"貓",
"狗",
"犬",
"貓咪",
"狗狗",
"毛孩",
"洗碗精",
"掉髮",
"落髮",
"語錄",
"心情",
"日常",
"分享",
"推薦",
"求助",
"請益",
]);
export interface TopicAnchorInput {
label: string;
query: string;
brief?: string | null;
pillars?: string[];
suggestedTags?: string[];
exclusions?: string[];
}
export interface TopicAnchor {
corePhrase: string;
requiredConcepts: string[];
rejects: Array<{ test: (text: string) => boolean }>;
}
export function extractTopicConcepts(phrase: string): string[] {
const normalized = phrase.replace(/\s+/g, "").trim();
if (!normalized) return [];
if (COMPOUND_CONCEPTS[normalized]) {
return COMPOUND_CONCEPTS[normalized];
}
const found: string[] = [];
let scratch = normalized;
const parts = [...CONCEPT_PARTS].sort((a, b) => b.length - a.length);
for (const part of parts) {
if (scratch.includes(part)) {
found.push(part);
scratch = scratch.replace(part, "");
}
}
const pruned = found.filter(
(part) => !found.some((other) => other !== part && other.includes(part))
);
if (pruned.length === 1 && pruned[0].length >= 4) {
const atoms = ["寵物", "毛孩", "貓咪", "狗狗", "貓", "狗", "洗毛精", "沐浴", "洗澡", "梳毛"];
const inner = atoms.filter((a) => pruned[0].includes(a));
if (inner.length >= 2) return [...new Set(inner)];
}
if (pruned.length >= 2) return [...new Set(pruned)];
return [normalized];
}
function conceptMatched(text: string, concept: string): boolean {
const aliases = CONCEPT_ALIASES[concept] ?? [concept];
return aliases.some((alias) => text.includes(alias));
}
function buildRejectChecks(corePhrase: string, exclusions?: string[]): TopicAnchor["rejects"] {
const rejects: TopicAnchor["rejects"] = [];
if (/洗毛精|寵物沐浴|寵物洗澡/.test(corePhrase) && !/洗碗/.test(corePhrase)) {
rejects.push({ test: (t) => /洗碗精|洗碗乳|碗盤清潔|餐具清潔|廚房清潔/.test(t) });
}
if (/寵物|毛孩|貓|狗|犬/.test(corePhrase)) {
rejects.push({
test: (t) =>
/掉髮|落髮|植髮|生髮|禿頭|髮量減少|頭皮療程|人髮/.test(t) &&
!/寵物|毛孩|貓|狗|犬|毛小孩/.test(t),
});
}
if (/寵物/.test(corePhrase) && /洗毛精|沐浴|洗澡|梳毛/.test(corePhrase)) {
rejects.push({
test: (t) =>
/洗髮精|護髮|頭皮屑|控油洗髮/.test(t) &&
!/寵物|毛孩|貓|狗|犬|毛小孩|洗毛精/.test(t),
});
}
for (const ex of exclusions ?? []) {
const term = ex.trim();
if (term.length >= 2) {
rejects.push({ test: (t) => t.includes(term) });
}
}
return rejects;
}
export function buildTopicAnchor(input: TopicAnchorInput): TopicAnchor {
const corePhrase = (input.query || input.label).trim();
return {
corePhrase,
requiredConcepts: extractTopicConcepts(corePhrase),
rejects: buildRejectChecks(corePhrase, input.exclusions),
};
}
/** 分數 ≥3 視為扣緊主題;-100 為明確跑題 */
export function scoreTopicRelevance(text: string, anchor: TopicAnchor): number {
const t = text.trim();
if (!t) return -10;
for (const reject of anchor.rejects) {
if (reject.test(t)) return -100;
}
const compact = t.replace(/\s+/g, "");
const coreCompact = anchor.corePhrase.replace(/\s+/g, "");
if (coreCompact.length >= 3 && compact.includes(coreCompact)) return 12;
if (t.includes(anchor.corePhrase)) return 10;
if (anchor.requiredConcepts.length >= 2) {
const matched = anchor.requiredConcepts.filter((c) => conceptMatched(t, c));
if (matched.length >= anchor.requiredConcepts.length) return 8;
if (matched.length === 1) return -8;
return -12;
}
if (anchor.corePhrase.length >= 3 && t.includes(anchor.corePhrase)) return 6;
return -5;
}
export function isOnTopicText(
text: string,
input: TopicAnchorInput,
minScore = 3
): boolean {
return scoreTopicRelevance(text, buildTopicAnchor(input)) >= minScore;
}
export function isOnTopicTag(
tag: string,
input: TopicAnchorInput,
options?: { allowBroad?: boolean }
): boolean {
const clean = tag.replace(/^@/, "").trim();
if (!clean) return false;
if (!options?.allowBroad && BROAD_TAG_BLOCKLIST.has(clean)) return false;
return isOnTopicText(clean, input, 3);
}
export function buildTopicAnchorPromptBlock(input: TopicAnchorInput): string {
const anchor = buildTopicAnchor(input);
return `【主題錨點】核心:${anchor.corePhrase}
${
anchor.requiredConcepts.length >= 2
? `必備概念(需同時符合,不可只命中其一):${anchor.requiredConcepts.join(" + ")}`
: ""
}
常見跑題(務必排除):只沾邊單字、易混淆產品(如洗碗精≠洗毛精)、人類掉髮≠寵物洗澡、泛用詞(心情/日常/分享)`;
}
/** 置入模式:貼文是否像「求推薦/求助/有痛點」 */
export function looksLikeRecommendationPost(text: string): boolean {
return /推薦|求助|請益|請問|哪裡買|有沒有|求分享|困擾|煩惱|怎麼辦|怎麼選|不知道|有推|拜託|求救|卡關/.test(
text
);
}
/** 置入模式:是否有產品需求或困擾訊號(含未明說「求推薦」的痛點描述) */
export function hasPlacementIntent(text: string): boolean {
if (looksLikeRecommendationPost(text)) return true;
return /用什麼洗|哪款|哪牌|哪一牌|洗什麼|買什麼|在家洗|自己洗|洗澡怕|洗不乾淨|味道重|皮膚癢|皮膚紅|一直抓|掉毛多|敏感肌|過敏|紅腫|抓癢|異味|不敢洗|第一次洗|洗完還是|越洗越/.test(
text
);
}
/** 純閒聊/晒照,沒有可置入的需求訊號 */
export function looksLikeCasualChat(text: string): boolean {
if (hasPlacementIntent(text)) return false;
return /好可愛|太萌|晒照|日常分享|隨便發|廢文|路過|笑死|哈哈哈|哈囉|早安|晚安|按讚|追蹤我|純分享|沒有要問/.test(
text
);
}