236 lines
7.0 KiB
TypeScript
236 lines
7.0 KiB
TypeScript
/** 主題錨點:標籤/帳號/海巡結果是否扣緊核心主題 */
|
|
|
|
const CONCEPT_PARTS = [
|
|
"寵物洗毛精",
|
|
"寵物沐浴",
|
|
"寵物美容",
|
|
"寵物用品",
|
|
"貓咪洗澡",
|
|
"狗狗洗澡",
|
|
"洗毛精",
|
|
"沐浴乳",
|
|
"沐浴精",
|
|
"洗髮精",
|
|
"洗碗精",
|
|
"寵物",
|
|
"毛孩",
|
|
"貓咪",
|
|
"狗狗",
|
|
"貓",
|
|
"狗",
|
|
"犬",
|
|
];
|
|
|
|
const CONCEPT_ALIASES: Record<string, string[]> = {
|
|
寵物: ["寵物", "毛孩", "毛小孩", "毛寶", "貓咪", "狗狗", "貓", "狗", "犬"],
|
|
洗毛精: ["洗毛精", "沐浴乳", "沐浴精", "洗澡", "沐浴", "梳毛"],
|
|
沐浴: ["沐浴", "沐浴乳", "洗澡", "洗毛精"],
|
|
洗澡: ["洗澡", "沐浴", "洗毛精"],
|
|
貓咪: ["貓咪", "貓", "毛孩"],
|
|
狗狗: ["狗狗", "狗", "犬", "毛孩"],
|
|
};
|
|
|
|
const COMPOUND_CONCEPTS: Record<string, string[]> = {
|
|
寵物洗毛精: ["寵物", "洗毛精"],
|
|
寵物沐浴: ["寵物", "沐浴"],
|
|
寵物洗澡: ["寵物", "洗澡"],
|
|
貓咪洗澡: ["貓咪", "洗澡"],
|
|
狗狗洗澡: ["狗狗", "洗澡"],
|
|
};
|
|
|
|
/** 太寬泛、容易跑題的單字標籤 */
|
|
export const BROAD_TAG_BLOCKLIST = new Set([
|
|
"寵物",
|
|
"洗毛精",
|
|
"沐浴",
|
|
"貓",
|
|
"狗",
|
|
"犬",
|
|
"貓咪",
|
|
"狗狗",
|
|
"毛孩",
|
|
"洗碗精",
|
|
"掉髮",
|
|
"落髮",
|
|
"語錄",
|
|
"心情",
|
|
"日常",
|
|
"分享",
|
|
"推薦",
|
|
"求助",
|
|
"請益",
|
|
]);
|
|
|
|
export interface TopicAnchorInput {
|
|
label: string;
|
|
query: string;
|
|
brief?: string | null;
|
|
pillars?: string[];
|
|
suggestedTags?: string[];
|
|
exclusions?: string[];
|
|
}
|
|
|
|
export interface TopicAnchor {
|
|
corePhrase: string;
|
|
requiredConcepts: string[];
|
|
rejects: Array<{ test: (text: string) => boolean }>;
|
|
}
|
|
|
|
export function extractTopicConcepts(phrase: string): string[] {
|
|
const normalized = phrase.replace(/\s+/g, "").trim();
|
|
if (!normalized) return [];
|
|
|
|
if (COMPOUND_CONCEPTS[normalized]) {
|
|
return COMPOUND_CONCEPTS[normalized];
|
|
}
|
|
|
|
const found: string[] = [];
|
|
let scratch = normalized;
|
|
const parts = [...CONCEPT_PARTS].sort((a, b) => b.length - a.length);
|
|
|
|
for (const part of parts) {
|
|
if (scratch.includes(part)) {
|
|
found.push(part);
|
|
scratch = scratch.replace(part, "");
|
|
}
|
|
}
|
|
|
|
const pruned = found.filter(
|
|
(part) => !found.some((other) => other !== part && other.includes(part))
|
|
);
|
|
|
|
if (pruned.length === 1 && pruned[0].length >= 4) {
|
|
const atoms = ["寵物", "毛孩", "貓咪", "狗狗", "貓", "狗", "洗毛精", "沐浴", "洗澡", "梳毛"];
|
|
const inner = atoms.filter((a) => pruned[0].includes(a));
|
|
if (inner.length >= 2) return [...new Set(inner)];
|
|
}
|
|
|
|
if (pruned.length >= 2) return [...new Set(pruned)];
|
|
return [normalized];
|
|
}
|
|
|
|
function conceptMatched(text: string, concept: string): boolean {
|
|
const aliases = CONCEPT_ALIASES[concept] ?? [concept];
|
|
return aliases.some((alias) => text.includes(alias));
|
|
}
|
|
|
|
function buildRejectChecks(corePhrase: string, exclusions?: string[]): TopicAnchor["rejects"] {
|
|
const rejects: TopicAnchor["rejects"] = [];
|
|
|
|
if (/洗毛精|寵物沐浴|寵物洗澡/.test(corePhrase) && !/洗碗/.test(corePhrase)) {
|
|
rejects.push({ test: (t) => /洗碗精|洗碗乳|碗盤清潔|餐具清潔|廚房清潔/.test(t) });
|
|
}
|
|
|
|
if (/寵物|毛孩|貓|狗|犬/.test(corePhrase)) {
|
|
rejects.push({
|
|
test: (t) =>
|
|
/掉髮|落髮|植髮|生髮|禿頭|髮量減少|頭皮療程|人髮/.test(t) &&
|
|
!/寵物|毛孩|貓|狗|犬|毛小孩/.test(t),
|
|
});
|
|
}
|
|
|
|
if (/寵物/.test(corePhrase) && /洗毛精|沐浴|洗澡|梳毛/.test(corePhrase)) {
|
|
rejects.push({
|
|
test: (t) =>
|
|
/洗髮精|護髮|頭皮屑|控油洗髮/.test(t) &&
|
|
!/寵物|毛孩|貓|狗|犬|毛小孩|洗毛精/.test(t),
|
|
});
|
|
}
|
|
|
|
for (const ex of exclusions ?? []) {
|
|
const term = ex.trim();
|
|
if (term.length >= 2) {
|
|
rejects.push({ test: (t) => t.includes(term) });
|
|
}
|
|
}
|
|
|
|
return rejects;
|
|
}
|
|
|
|
export function buildTopicAnchor(input: TopicAnchorInput): TopicAnchor {
|
|
const corePhrase = (input.query || input.label).trim();
|
|
return {
|
|
corePhrase,
|
|
requiredConcepts: extractTopicConcepts(corePhrase),
|
|
rejects: buildRejectChecks(corePhrase, input.exclusions),
|
|
};
|
|
}
|
|
|
|
/** 分數 ≥3 視為扣緊主題;-100 為明確跑題 */
|
|
export function scoreTopicRelevance(text: string, anchor: TopicAnchor): number {
|
|
const t = text.trim();
|
|
if (!t) return -10;
|
|
|
|
for (const reject of anchor.rejects) {
|
|
if (reject.test(t)) return -100;
|
|
}
|
|
|
|
const compact = t.replace(/\s+/g, "");
|
|
const coreCompact = anchor.corePhrase.replace(/\s+/g, "");
|
|
|
|
if (coreCompact.length >= 3 && compact.includes(coreCompact)) return 12;
|
|
if (t.includes(anchor.corePhrase)) return 10;
|
|
|
|
if (anchor.requiredConcepts.length >= 2) {
|
|
const matched = anchor.requiredConcepts.filter((c) => conceptMatched(t, c));
|
|
if (matched.length >= anchor.requiredConcepts.length) return 8;
|
|
if (matched.length === 1) return -8;
|
|
return -12;
|
|
}
|
|
|
|
if (anchor.corePhrase.length >= 3 && t.includes(anchor.corePhrase)) return 6;
|
|
return -5;
|
|
}
|
|
|
|
export function isOnTopicText(
|
|
text: string,
|
|
input: TopicAnchorInput,
|
|
minScore = 3
|
|
): boolean {
|
|
return scoreTopicRelevance(text, buildTopicAnchor(input)) >= minScore;
|
|
}
|
|
|
|
export function isOnTopicTag(
|
|
tag: string,
|
|
input: TopicAnchorInput,
|
|
options?: { allowBroad?: boolean }
|
|
): boolean {
|
|
const clean = tag.replace(/^@/, "").trim();
|
|
if (!clean) return false;
|
|
if (!options?.allowBroad && BROAD_TAG_BLOCKLIST.has(clean)) return false;
|
|
return isOnTopicText(clean, input, 3);
|
|
}
|
|
|
|
export function buildTopicAnchorPromptBlock(input: TopicAnchorInput): string {
|
|
const anchor = buildTopicAnchor(input);
|
|
return `【主題錨點】核心:${anchor.corePhrase}
|
|
${
|
|
anchor.requiredConcepts.length >= 2
|
|
? `必備概念(需同時符合,不可只命中其一):${anchor.requiredConcepts.join(" + ")}`
|
|
: ""
|
|
}
|
|
常見跑題(務必排除):只沾邊單字、易混淆產品(如洗碗精≠洗毛精)、人類掉髮≠寵物洗澡、泛用詞(心情/日常/分享)`;
|
|
}
|
|
|
|
/** 置入模式:貼文是否像「求推薦/求助/有痛點」 */
|
|
export function looksLikeRecommendationPost(text: string): boolean {
|
|
return /推薦|求助|請益|請問|哪裡買|有沒有|求分享|困擾|煩惱|怎麼辦|怎麼選|不知道|有推|拜託|求救|卡關/.test(
|
|
text
|
|
);
|
|
}
|
|
|
|
/** 置入模式:是否有產品需求或困擾訊號(含未明說「求推薦」的痛點描述) */
|
|
export function hasPlacementIntent(text: string): boolean {
|
|
if (looksLikeRecommendationPost(text)) return true;
|
|
return /用什麼洗|哪款|哪牌|哪一牌|洗什麼|買什麼|在家洗|自己洗|洗澡怕|洗不乾淨|味道重|皮膚癢|皮膚紅|一直抓|掉毛多|敏感肌|過敏|紅腫|抓癢|異味|不敢洗|第一次洗|洗完還是|越洗越/.test(
|
|
text
|
|
);
|
|
}
|
|
|
|
/** 純閒聊/晒照,沒有可置入的需求訊號 */
|
|
export function looksLikeCasualChat(text: string): boolean {
|
|
if (hasPlacementIntent(text)) return false;
|
|
return /好可愛|太萌|晒照|日常分享|隨便發|廢文|路過|笑死|哈哈哈|哈囉|早安|晚安|按讚|追蹤我|純分享|沒有要問/.test(
|
|
text
|
|
);
|
|
} |