213 lines
5.2 KiB
Go
213 lines
5.2 KiB
Go
|
|
package knowledge
|
||
|
|
|
||
|
|
import (
|
||
|
|
"strings"
|
||
|
|
"unicode/utf8"
|
||
|
|
)
|
||
|
|
|
||
|
|
const (
|
||
|
|
patrolIntentRelevance = "relevance"
|
||
|
|
patrolIntentRecency = "recency"
|
||
|
|
)
|
||
|
|
|
||
|
|
var patrolTopicAnchors = []string{
|
||
|
|
"化療", "乳癌", "荷爾蒙", "標靶", "康復", "癌症", "病友", "敏感", "無香", "抗敏",
|
||
|
|
"香料", "香精", "皮膚", "乾癢", "沐浴", "洗髮", "洗面", "卸妝", "防曬", "懷孕",
|
||
|
|
"換季", "屏障", "過敏", "搔癢", "紅腫",
|
||
|
|
}
|
||
|
|
|
||
|
|
var patrolFillers = []string{
|
||
|
|
"要", "什麼", "嗎", "怎麼", "請問", "有人", "適合", "用的", "分享", "經驗", "挑選",
|
||
|
|
"可以", "不能", "需要", "應該", "到底", "真的", "覺得", "知道", "告訴", "請益",
|
||
|
|
"推薦嗎", "好用嗎", "用過", "想", "還是", "會不會", "是不是", "有沒有", "如何", "為什麼",
|
||
|
|
}
|
||
|
|
|
||
|
|
// PatrolTagFromQuestion keeps research-map questions when already search-shaped.
|
||
|
|
func PatrolTagFromQuestion(raw string) string {
|
||
|
|
raw = strings.TrimSpace(raw)
|
||
|
|
raw = strings.Join(strings.Fields(raw), " ")
|
||
|
|
if raw == "" {
|
||
|
|
return ""
|
||
|
|
}
|
||
|
|
runes := utf8.RuneCountInString(raw)
|
||
|
|
if runes >= minPatrolTagRunes && runes <= maxPatrolTagRunes {
|
||
|
|
if looksLikeThreadsSearch(raw) {
|
||
|
|
return raw
|
||
|
|
}
|
||
|
|
if runes >= 8 && (strings.Contains(raw, " ") || productCategoryHint(raw, "") != "") {
|
||
|
|
phrase := ensurePatrolIntent(raw, patrolIntentRelevance)
|
||
|
|
if utf8.RuneCountInString(phrase) <= maxPatrolTagRunes && !isMechanicalTag(phrase) {
|
||
|
|
return phrase
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return humanizePatrolPhrase(raw, patrolIntentRelevance)
|
||
|
|
}
|
||
|
|
|
||
|
|
// PatrolTagFromPillar compresses pillar phrases but keeps more context than generic labels.
|
||
|
|
func PatrolTagFromPillar(raw string) string {
|
||
|
|
raw = strings.TrimSpace(raw)
|
||
|
|
if raw == "" {
|
||
|
|
return ""
|
||
|
|
}
|
||
|
|
runes := utf8.RuneCountInString(raw)
|
||
|
|
if runes >= minPatrolTagRunes && runes <= maxPatrolTagRunes && strings.Contains(raw, " ") {
|
||
|
|
return ensurePatrolIntent(raw, patrolIntentRelevance)
|
||
|
|
}
|
||
|
|
return humanizePatrolPhrase(raw, patrolIntentRelevance)
|
||
|
|
}
|
||
|
|
|
||
|
|
func humanizePatrolPhrase(raw, intent string) string {
|
||
|
|
raw = strings.TrimSpace(raw)
|
||
|
|
raw = strings.Join(strings.Fields(raw), " ")
|
||
|
|
if raw == "" {
|
||
|
|
return ""
|
||
|
|
}
|
||
|
|
|
||
|
|
runes := utf8.RuneCountInString(raw)
|
||
|
|
if runes <= maxPatrolTagRunes && runes >= minPatrolTagRunes && looksLikeThreadsSearch(raw) {
|
||
|
|
return raw
|
||
|
|
}
|
||
|
|
|
||
|
|
phrase := compressPatrolKeywords(raw)
|
||
|
|
if phrase == "" {
|
||
|
|
phrase = truncateRunes(raw, maxPatrolTagRunes)
|
||
|
|
}
|
||
|
|
phrase = ensurePatrolIntent(phrase, intent)
|
||
|
|
if utf8.RuneCountInString(phrase) > maxPatrolTagRunes {
|
||
|
|
phrase = truncateRunes(phrase, maxPatrolTagRunes)
|
||
|
|
}
|
||
|
|
if utf8.RuneCountInString(phrase) < minPatrolTagRunes {
|
||
|
|
return ""
|
||
|
|
}
|
||
|
|
if isMechanicalTag(phrase) {
|
||
|
|
return ""
|
||
|
|
}
|
||
|
|
return phrase
|
||
|
|
}
|
||
|
|
|
||
|
|
func looksLikeThreadsSearch(text string) bool {
|
||
|
|
if strings.Contains(text, " ") {
|
||
|
|
return true
|
||
|
|
}
|
||
|
|
for _, suffix := range []string{"推薦", "請問", "怎麼辦", "好用嗎", "有人", "求助"} {
|
||
|
|
if strings.Contains(text, suffix) {
|
||
|
|
return true
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return false
|
||
|
|
}
|
||
|
|
|
||
|
|
func compressPatrolKeywords(text string) string {
|
||
|
|
category := productCategoryHint(text, "")
|
||
|
|
anchors := []string{}
|
||
|
|
seen := map[string]struct{}{}
|
||
|
|
for _, anchor := range patrolTopicAnchors {
|
||
|
|
if !strings.Contains(text, anchor) {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
if _, ok := seen[anchor]; ok {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
seen[anchor] = struct{}{}
|
||
|
|
anchors = append(anchors, anchor)
|
||
|
|
if len(anchors) >= 2 {
|
||
|
|
break
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
parts := append([]string{}, anchors...)
|
||
|
|
if category != "" {
|
||
|
|
parts = append(parts, category)
|
||
|
|
}
|
||
|
|
if len(parts) == 0 {
|
||
|
|
for _, chunk := range splitPatrolChunks(text) {
|
||
|
|
if isPatrolFiller(chunk) {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
parts = append(parts, chunk)
|
||
|
|
if len(parts) >= 2 {
|
||
|
|
break
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if len(parts) == 0 {
|
||
|
|
return ""
|
||
|
|
}
|
||
|
|
phrase := strings.Join(parts, " ")
|
||
|
|
if utf8.RuneCountInString(phrase) > maxPatrolTagRunes {
|
||
|
|
return truncateRunes(phrase, maxPatrolTagRunes)
|
||
|
|
}
|
||
|
|
return phrase
|
||
|
|
}
|
||
|
|
|
||
|
|
func splitPatrolChunks(text string) []string {
|
||
|
|
text = strings.TrimSpace(text)
|
||
|
|
if text == "" {
|
||
|
|
return nil
|
||
|
|
}
|
||
|
|
if strings.Contains(text, " ") {
|
||
|
|
return strings.Fields(text)
|
||
|
|
}
|
||
|
|
runes := []rune(text)
|
||
|
|
if len(runes) <= 6 {
|
||
|
|
return []string{text}
|
||
|
|
}
|
||
|
|
// Long continuous Chinese: take leading topic chunk + trailing product-ish chunk.
|
||
|
|
head := string(runes[:minInt(4, len(runes))])
|
||
|
|
tail := string(runes[maxInt(0, len(runes)-4):])
|
||
|
|
if head == tail {
|
||
|
|
return []string{head}
|
||
|
|
}
|
||
|
|
return []string{head, tail}
|
||
|
|
}
|
||
|
|
|
||
|
|
func ensurePatrolIntent(phrase, intent string) string {
|
||
|
|
phrase = strings.TrimSpace(phrase)
|
||
|
|
if phrase == "" {
|
||
|
|
return ""
|
||
|
|
}
|
||
|
|
if strings.ContainsAny(phrase, "推薦請問怎麼辦好用嗎有人求助") {
|
||
|
|
return phrase
|
||
|
|
}
|
||
|
|
suffix := " 推薦"
|
||
|
|
if intent == patrolIntentRecency {
|
||
|
|
suffix = " 請問"
|
||
|
|
}
|
||
|
|
if utf8.RuneCountInString(phrase+suffix) <= maxPatrolTagRunes {
|
||
|
|
return phrase + suffix
|
||
|
|
}
|
||
|
|
return phrase
|
||
|
|
}
|
||
|
|
|
||
|
|
func isPatrolFiller(chunk string) bool {
|
||
|
|
chunk = strings.TrimSpace(chunk)
|
||
|
|
if chunk == "" || utf8.RuneCountInString(chunk) < 2 {
|
||
|
|
return true
|
||
|
|
}
|
||
|
|
for _, filler := range patrolFillers {
|
||
|
|
if chunk == filler {
|
||
|
|
return true
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return false
|
||
|
|
}
|
||
|
|
|
||
|
|
func truncateRunes(text string, max int) string {
|
||
|
|
runes := []rune(strings.TrimSpace(text))
|
||
|
|
if len(runes) <= max {
|
||
|
|
return string(runes)
|
||
|
|
}
|
||
|
|
return string(runes[:max])
|
||
|
|
}
|
||
|
|
|
||
|
|
func maxInt(values ...int) int {
|
||
|
|
max := values[0]
|
||
|
|
for _, v := range values[1:] {
|
||
|
|
if v > max {
|
||
|
|
max = v
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return max
|
||
|
|
}
|