thread-master/internal/library/knowledge/patrol_phrase.go

213 lines
5.2 KiB
Go
Raw Normal View History

2026-06-26 08:37:04 +00:00
package knowledge
import (
"strings"
"unicode/utf8"
)
const (
patrolIntentRelevance = "relevance"
patrolIntentRecency = "recency"
)
var patrolTopicAnchors = []string{
"化療", "乳癌", "荷爾蒙", "標靶", "康復", "癌症", "病友", "敏感", "無香", "抗敏",
"香料", "香精", "皮膚", "乾癢", "沐浴", "洗髮", "洗面", "卸妝", "防曬", "懷孕",
"換季", "屏障", "過敏", "搔癢", "紅腫",
}
var patrolFillers = []string{
"要", "什麼", "嗎", "怎麼", "請問", "有人", "適合", "用的", "分享", "經驗", "挑選",
"可以", "不能", "需要", "應該", "到底", "真的", "覺得", "知道", "告訴", "請益",
"推薦嗎", "好用嗎", "用過", "想", "還是", "會不會", "是不是", "有沒有", "如何", "為什麼",
}
// PatrolTagFromQuestion keeps research-map questions when already search-shaped.
func PatrolTagFromQuestion(raw string) string {
raw = strings.TrimSpace(raw)
raw = strings.Join(strings.Fields(raw), " ")
if raw == "" {
return ""
}
runes := utf8.RuneCountInString(raw)
if runes >= minPatrolTagRunes && runes <= maxPatrolTagRunes {
if looksLikeThreadsSearch(raw) {
return raw
}
if runes >= 8 && (strings.Contains(raw, " ") || productCategoryHint(raw, "") != "") {
phrase := ensurePatrolIntent(raw, patrolIntentRelevance)
if utf8.RuneCountInString(phrase) <= maxPatrolTagRunes && !isMechanicalTag(phrase) {
return phrase
}
}
}
return humanizePatrolPhrase(raw, patrolIntentRelevance)
}
// PatrolTagFromPillar compresses pillar phrases but keeps more context than generic labels.
func PatrolTagFromPillar(raw string) string {
raw = strings.TrimSpace(raw)
if raw == "" {
return ""
}
runes := utf8.RuneCountInString(raw)
if runes >= minPatrolTagRunes && runes <= maxPatrolTagRunes && strings.Contains(raw, " ") {
return ensurePatrolIntent(raw, patrolIntentRelevance)
}
return humanizePatrolPhrase(raw, patrolIntentRelevance)
}
func humanizePatrolPhrase(raw, intent string) string {
raw = strings.TrimSpace(raw)
raw = strings.Join(strings.Fields(raw), " ")
if raw == "" {
return ""
}
runes := utf8.RuneCountInString(raw)
if runes <= maxPatrolTagRunes && runes >= minPatrolTagRunes && looksLikeThreadsSearch(raw) {
return raw
}
phrase := compressPatrolKeywords(raw)
if phrase == "" {
phrase = truncateRunes(raw, maxPatrolTagRunes)
}
phrase = ensurePatrolIntent(phrase, intent)
if utf8.RuneCountInString(phrase) > maxPatrolTagRunes {
phrase = truncateRunes(phrase, maxPatrolTagRunes)
}
if utf8.RuneCountInString(phrase) < minPatrolTagRunes {
return ""
}
if isMechanicalTag(phrase) {
return ""
}
return phrase
}
func looksLikeThreadsSearch(text string) bool {
if strings.Contains(text, " ") {
return true
}
for _, suffix := range []string{"推薦", "請問", "怎麼辦", "好用嗎", "有人", "求助"} {
if strings.Contains(text, suffix) {
return true
}
}
return false
}
func compressPatrolKeywords(text string) string {
category := productCategoryHint(text, "")
anchors := []string{}
seen := map[string]struct{}{}
for _, anchor := range patrolTopicAnchors {
if !strings.Contains(text, anchor) {
continue
}
if _, ok := seen[anchor]; ok {
continue
}
seen[anchor] = struct{}{}
anchors = append(anchors, anchor)
if len(anchors) >= 2 {
break
}
}
parts := append([]string{}, anchors...)
if category != "" {
parts = append(parts, category)
}
if len(parts) == 0 {
for _, chunk := range splitPatrolChunks(text) {
if isPatrolFiller(chunk) {
continue
}
parts = append(parts, chunk)
if len(parts) >= 2 {
break
}
}
}
if len(parts) == 0 {
return ""
}
phrase := strings.Join(parts, " ")
if utf8.RuneCountInString(phrase) > maxPatrolTagRunes {
return truncateRunes(phrase, maxPatrolTagRunes)
}
return phrase
}
func splitPatrolChunks(text string) []string {
text = strings.TrimSpace(text)
if text == "" {
return nil
}
if strings.Contains(text, " ") {
return strings.Fields(text)
}
runes := []rune(text)
if len(runes) <= 6 {
return []string{text}
}
// Long continuous Chinese: take leading topic chunk + trailing product-ish chunk.
head := string(runes[:minInt(4, len(runes))])
tail := string(runes[maxInt(0, len(runes)-4):])
if head == tail {
return []string{head}
}
return []string{head, tail}
}
func ensurePatrolIntent(phrase, intent string) string {
phrase = strings.TrimSpace(phrase)
if phrase == "" {
return ""
}
if strings.ContainsAny(phrase, "推薦請問怎麼辦好用嗎有人求助") {
return phrase
}
suffix := " 推薦"
if intent == patrolIntentRecency {
suffix = " 請問"
}
if utf8.RuneCountInString(phrase+suffix) <= maxPatrolTagRunes {
return phrase + suffix
}
return phrase
}
func isPatrolFiller(chunk string) bool {
chunk = strings.TrimSpace(chunk)
if chunk == "" || utf8.RuneCountInString(chunk) < 2 {
return true
}
for _, filler := range patrolFillers {
if chunk == filler {
return true
}
}
return false
}
func truncateRunes(text string, max int) string {
runes := []rune(strings.TrimSpace(text))
if len(runes) <= max {
return string(runes)
}
return string(runes[:max])
}
func maxInt(values ...int) int {
max := values[0]
for _, v := range values[1:] {
if v > max {
max = v
}
}
return max
}