package knowledge import ( "strings" "unicode/utf8" ) const ( patrolIntentRelevance = "relevance" patrolIntentRecency = "recency" ) var patrolTopicAnchors = []string{ "化療", "乳癌", "荷爾蒙", "標靶", "康復", "癌症", "病友", "敏感", "無香", "抗敏", "香料", "香精", "皮膚", "乾癢", "沐浴", "洗髮", "洗面", "卸妝", "防曬", "懷孕", "換季", "屏障", "過敏", "搔癢", "紅腫", } var patrolFillers = []string{ "要", "什麼", "嗎", "怎麼", "請問", "有人", "適合", "用的", "分享", "經驗", "挑選", "可以", "不能", "需要", "應該", "到底", "真的", "覺得", "知道", "告訴", "請益", "推薦嗎", "好用嗎", "用過", "想", "還是", "會不會", "是不是", "有沒有", "如何", "為什麼", } // PatrolTagFromQuestion keeps research-map questions when already search-shaped. func PatrolTagFromQuestion(raw string) string { raw = strings.TrimSpace(raw) raw = strings.Join(strings.Fields(raw), " ") if raw == "" { return "" } runes := utf8.RuneCountInString(raw) if runes >= minPatrolTagRunes && runes <= maxPatrolTagRunes { if looksLikeThreadsSearch(raw) { return raw } if runes >= 8 && (strings.Contains(raw, " ") || productCategoryHint(raw, "") != "") { phrase := ensurePatrolIntent(raw, patrolIntentRelevance) if utf8.RuneCountInString(phrase) <= maxPatrolTagRunes && !isMechanicalTag(phrase) { return phrase } } } return humanizePatrolPhrase(raw, patrolIntentRelevance) } // PatrolTagFromPillar compresses pillar phrases but keeps more context than generic labels. func PatrolTagFromPillar(raw string) string { raw = strings.TrimSpace(raw) if raw == "" { return "" } runes := utf8.RuneCountInString(raw) if runes >= minPatrolTagRunes && runes <= maxPatrolTagRunes && strings.Contains(raw, " ") { return ensurePatrolIntent(raw, patrolIntentRelevance) } return humanizePatrolPhrase(raw, patrolIntentRelevance) } func humanizePatrolPhrase(raw, intent string) string { raw = strings.TrimSpace(raw) raw = strings.Join(strings.Fields(raw), " ") if raw == "" { return "" } runes := utf8.RuneCountInString(raw) if runes <= maxPatrolTagRunes && runes >= minPatrolTagRunes && looksLikeThreadsSearch(raw) { return raw } phrase := compressPatrolKeywords(raw) if phrase == "" { phrase = truncateRunes(raw, maxPatrolTagRunes) } phrase = ensurePatrolIntent(phrase, intent) if utf8.RuneCountInString(phrase) > maxPatrolTagRunes { phrase = truncateRunes(phrase, maxPatrolTagRunes) } if utf8.RuneCountInString(phrase) < minPatrolTagRunes { return "" } if isMechanicalTag(phrase) { return "" } return phrase } func looksLikeThreadsSearch(text string) bool { if strings.Contains(text, " ") { return true } for _, suffix := range []string{"推薦", "請問", "怎麼辦", "好用嗎", "有人", "求助"} { if strings.Contains(text, suffix) { return true } } return false } func compressPatrolKeywords(text string) string { category := productCategoryHint(text, "") anchors := []string{} seen := map[string]struct{}{} for _, anchor := range patrolTopicAnchors { if !strings.Contains(text, anchor) { continue } if _, ok := seen[anchor]; ok { continue } seen[anchor] = struct{}{} anchors = append(anchors, anchor) if len(anchors) >= 2 { break } } parts := append([]string{}, anchors...) if category != "" { parts = append(parts, category) } if len(parts) == 0 { for _, chunk := range splitPatrolChunks(text) { if isPatrolFiller(chunk) { continue } parts = append(parts, chunk) if len(parts) >= 2 { break } } } if len(parts) == 0 { return "" } phrase := strings.Join(parts, " ") if utf8.RuneCountInString(phrase) > maxPatrolTagRunes { return truncateRunes(phrase, maxPatrolTagRunes) } return phrase } func splitPatrolChunks(text string) []string { text = strings.TrimSpace(text) if text == "" { return nil } if strings.Contains(text, " ") { return strings.Fields(text) } runes := []rune(text) if len(runes) <= 6 { return []string{text} } // Long continuous Chinese: take leading topic chunk + trailing product-ish chunk. head := string(runes[:minInt(4, len(runes))]) tail := string(runes[maxInt(0, len(runes)-4):]) if head == tail { return []string{head} } return []string{head, tail} } func ensurePatrolIntent(phrase, intent string) string { phrase = strings.TrimSpace(phrase) if phrase == "" { return "" } if strings.ContainsAny(phrase, "推薦請問怎麼辦好用嗎有人求助") { return phrase } suffix := " 推薦" if intent == patrolIntentRecency { suffix = " 請問" } if utf8.RuneCountInString(phrase+suffix) <= maxPatrolTagRunes { return phrase + suffix } return phrase } func isPatrolFiller(chunk string) bool { chunk = strings.TrimSpace(chunk) if chunk == "" || utf8.RuneCountInString(chunk) < 2 { return true } for _, filler := range patrolFillers { if chunk == filler { return true } } return false } func truncateRunes(text string, max int) string { runes := []rune(strings.TrimSpace(text)) if len(runes) <= max { return string(runes) } return string(runes[:max]) } func maxInt(values ...int) int { max := values[0] for _, v := range values[1:] { if v > max { max = v } } return max }