377 lines
9.7 KiB
Go
377 lines
9.7 KiB
Go
|
|
package knowledge
|
|||
|
|
|
|||
|
|
import (
|
|||
|
|
"encoding/json"
|
|||
|
|
"fmt"
|
|||
|
|
"regexp"
|
|||
|
|
"strings"
|
|||
|
|
|
|||
|
|
libprompt "haixun-backend/internal/library/prompt"
|
|||
|
|
|
|||
|
|
"github.com/google/uuid"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
type SynthInput struct {
|
|||
|
|
BrandDisplayName string
|
|||
|
|
TopicName string
|
|||
|
|
ProductLabel string
|
|||
|
|
Goals string
|
|||
|
|
Seed string
|
|||
|
|
ProductBrief string
|
|||
|
|
TargetAudience string
|
|||
|
|
Persona string
|
|||
|
|
ResearchPillars []string
|
|||
|
|
ResearchQuestions []string
|
|||
|
|
Sources []BraveSource
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
type rawSynthNode struct {
|
|||
|
|
Label string `json:"label"`
|
|||
|
|
NodeKind string `json:"nodeKind"`
|
|||
|
|
NodeKindSnake string `json:"node_kind"`
|
|||
|
|
Type string `json:"type"`
|
|||
|
|
Layer int `json:"layer"`
|
|||
|
|
Relation string `json:"relation"`
|
|||
|
|
PlacementValue string `json:"placementValue"`
|
|||
|
|
PlacementValueAlt string `json:"placement_value"`
|
|||
|
|
ProductFitScore int `json:"productFitScore"`
|
|||
|
|
ProductFitScoreAlt int `json:"product_fit_score"`
|
|||
|
|
EvidenceURLs []string `json:"evidenceUrls"`
|
|||
|
|
EvidenceURLsAlt []string `json:"evidence_urls"`
|
|||
|
|
RelevanceQueries []string `json:"relevanceQueries"`
|
|||
|
|
RelevanceQueriesAlt []string `json:"relevance_queries"`
|
|||
|
|
RecencyQueries []string `json:"recencyQueries"`
|
|||
|
|
RecencyQueriesAlt []string `json:"recency_queries"`
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
type rawSynthOutput struct {
|
|||
|
|
Nodes []rawSynthNode `json:"nodes"`
|
|||
|
|
Edges []struct {
|
|||
|
|
From string `json:"from"`
|
|||
|
|
To string `json:"to"`
|
|||
|
|
Relation string `json:"relation"`
|
|||
|
|
} `json:"edges"`
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
var codeFenceRE = regexp.MustCompile("(?s)```(?:json)?\\s*([\\s\\S]*?)```")
|
|||
|
|
|
|||
|
|
func BuildUserPrompt(in SynthInput) (string, error) {
|
|||
|
|
var sources strings.Builder
|
|||
|
|
limit := len(in.Sources)
|
|||
|
|
if limit > 30 {
|
|||
|
|
limit = 30
|
|||
|
|
}
|
|||
|
|
for i := 0; i < limit; i++ {
|
|||
|
|
src := in.Sources[i]
|
|||
|
|
fmt.Fprintf(&sources, "[%d] query=%s\nurl=%s\ntitle=%s\nsnippet=%s\n\n",
|
|||
|
|
i+1, src.Query, src.URL, src.Title, src.Snippet)
|
|||
|
|
}
|
|||
|
|
vars := map[string]string{
|
|||
|
|
"brand_line": optionalLine("品牌", in.BrandDisplayName),
|
|||
|
|
"topic_line": optionalLine("主題名稱", in.TopicName),
|
|||
|
|
"product_line": optionalLine("置入產品", in.ProductLabel),
|
|||
|
|
"goals_line": optionalLine("置入目標", in.Goals),
|
|||
|
|
"seed": strings.TrimSpace(in.Seed),
|
|||
|
|
"product_brief_line": optionalLine("產品簡述", in.ProductBrief),
|
|||
|
|
"target_audience_line": optionalLine("目標受眾", in.TargetAudience),
|
|||
|
|
"persona_line": optionalLine("主題目標", in.Persona),
|
|||
|
|
"research_pillars_line": bulletLine("內容支柱(延伸知識要往這些方向廣泛展開)", in.ResearchPillars),
|
|||
|
|
"research_questions_line": bulletLine("受眾提問方向(可衍生成更多周邊節點)", in.ResearchQuestions),
|
|||
|
|
"sources": strings.TrimSpace(sources.String()),
|
|||
|
|
}
|
|||
|
|
return libprompt.KnowledgeGraphUser(vars)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func optionalLine(label, value string) string {
|
|||
|
|
return OptionalPromptLine(label, value)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func BulletPromptLine(title string, items []string) string {
|
|||
|
|
return bulletLine(title, items)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func bulletLine(title string, items []string) string {
|
|||
|
|
lines := make([]string, 0, len(items))
|
|||
|
|
for _, item := range items {
|
|||
|
|
item = strings.TrimSpace(item)
|
|||
|
|
if item == "" {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
lines = append(lines, "- "+item)
|
|||
|
|
}
|
|||
|
|
if len(lines) == 0 {
|
|||
|
|
return ""
|
|||
|
|
}
|
|||
|
|
return title + ":\n" + strings.Join(lines, "\n") + "\n"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func OptionalPromptLine(label, value string) string {
|
|||
|
|
value = strings.TrimSpace(value)
|
|||
|
|
if value == "" {
|
|||
|
|
return ""
|
|||
|
|
}
|
|||
|
|
return label + ":" + value + "\n"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func ParseSynthOutput(raw string, in SynthInput, sources []BraveSource) (Graph, error) {
|
|||
|
|
payload, err := extractJSONObject(raw)
|
|||
|
|
if err != nil {
|
|||
|
|
return Graph{}, err
|
|||
|
|
}
|
|||
|
|
var out rawSynthOutput
|
|||
|
|
if err := json.Unmarshal(payload, &out); err != nil {
|
|||
|
|
return Graph{}, fmt.Errorf("parse knowledge graph json: %w", err)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
seed := strings.TrimSpace(in.Seed)
|
|||
|
|
graph := Graph{
|
|||
|
|
Seed: seed,
|
|||
|
|
BraveSources: sources,
|
|||
|
|
Nodes: []Node{},
|
|||
|
|
Edges: []Edge{},
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
sourceByURL := map[string]BraveSource{}
|
|||
|
|
for _, src := range sources {
|
|||
|
|
if src.URL != "" {
|
|||
|
|
sourceByURL[src.URL] = src
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
hasCore := false
|
|||
|
|
for _, item := range out.Nodes {
|
|||
|
|
label := strings.TrimSpace(item.Label)
|
|||
|
|
if label == "" {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
layer := item.Layer
|
|||
|
|
nodeType := strings.TrimSpace(item.Type)
|
|||
|
|
nodeKind := firstNonEmpty(item.NodeKind, item.NodeKindSnake)
|
|||
|
|
if layer == 0 || nodeType == "core" {
|
|||
|
|
layer = 0
|
|||
|
|
nodeType = "core"
|
|||
|
|
if nodeKind == "" {
|
|||
|
|
nodeKind = "pain"
|
|||
|
|
}
|
|||
|
|
hasCore = true
|
|||
|
|
}
|
|||
|
|
if nodeKind == "" {
|
|||
|
|
if layer >= 2 {
|
|||
|
|
nodeKind = "cause"
|
|||
|
|
} else if layer == 1 {
|
|||
|
|
nodeKind = "symptom"
|
|||
|
|
} else {
|
|||
|
|
nodeKind = "knowledge"
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
evidenceURLs := item.EvidenceURLs
|
|||
|
|
if len(evidenceURLs) == 0 {
|
|||
|
|
evidenceURLs = item.EvidenceURLsAlt
|
|||
|
|
}
|
|||
|
|
evidence := make([]Evidence, 0, len(evidenceURLs))
|
|||
|
|
for _, u := range evidenceURLs {
|
|||
|
|
u = strings.TrimSpace(u)
|
|||
|
|
if u == "" {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
ev := Evidence{URL: u}
|
|||
|
|
if src, ok := sourceByURL[u]; ok {
|
|||
|
|
ev.Snippet = src.Snippet
|
|||
|
|
ev.Query = src.Query
|
|||
|
|
}
|
|||
|
|
evidence = append(evidence, ev)
|
|||
|
|
}
|
|||
|
|
fit := item.ProductFitScore
|
|||
|
|
if fit <= 0 {
|
|||
|
|
fit = item.ProductFitScoreAlt
|
|||
|
|
}
|
|||
|
|
if fit <= 0 {
|
|||
|
|
fit = defaultProductFit(nodeKind, layer)
|
|||
|
|
}
|
|||
|
|
placementValue := firstNonEmpty(item.PlacementValue, item.PlacementValueAlt)
|
|||
|
|
graph.Nodes = append(graph.Nodes, Node{
|
|||
|
|
ID: uuid.NewString(),
|
|||
|
|
Label: label,
|
|||
|
|
NodeKind: nodeKind,
|
|||
|
|
Type: nodeType,
|
|||
|
|
Layer: layer,
|
|||
|
|
Relation: strings.TrimSpace(item.Relation),
|
|||
|
|
PlacementValue: normalizePlacementReason(placementValue, item.Relation, nodeKind, fit),
|
|||
|
|
ProductFitScore: fit,
|
|||
|
|
PatrolRelevance: mergeStringLists(item.RelevanceQueries, item.RelevanceQueriesAlt),
|
|||
|
|
PatrolRecency: mergeStringLists(item.RecencyQueries, item.RecencyQueriesAlt),
|
|||
|
|
Evidence: evidence,
|
|||
|
|
})
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if !hasCore && seed != "" {
|
|||
|
|
graph.Nodes = append([]Node{{
|
|||
|
|
ID: uuid.NewString(),
|
|||
|
|
Label: seed,
|
|||
|
|
NodeKind: "pain",
|
|||
|
|
Type: "core",
|
|||
|
|
Layer: 0,
|
|||
|
|
Relation: "核心種子主題",
|
|||
|
|
PlacementValue: "核心痛點帖最常求推薦,適合以產品使用經驗自然回覆",
|
|||
|
|
ProductFitScore: 90,
|
|||
|
|
}}, graph.Nodes...)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
labelToID := map[string]string{}
|
|||
|
|
for _, node := range graph.Nodes {
|
|||
|
|
labelToID[strings.ToLower(strings.TrimSpace(node.Label))] = node.ID
|
|||
|
|
}
|
|||
|
|
for _, edge := range out.Edges {
|
|||
|
|
from := resolveNodeRef(edge.From, labelToID, graph.Nodes)
|
|||
|
|
to := resolveNodeRef(edge.To, labelToID, graph.Nodes)
|
|||
|
|
if from == "" || to == "" || from == to {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
graph.Edges = append(graph.Edges, Edge{
|
|||
|
|
From: from,
|
|||
|
|
To: to,
|
|||
|
|
Relation: strings.TrimSpace(edge.Relation),
|
|||
|
|
})
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return graph, nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func firstNonEmpty(values ...string) string {
|
|||
|
|
for _, value := range values {
|
|||
|
|
if strings.TrimSpace(value) != "" {
|
|||
|
|
return strings.TrimSpace(value)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return ""
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func mergeStringLists(groups ...[]string) []string {
|
|||
|
|
out := []string{}
|
|||
|
|
seen := map[string]struct{}{}
|
|||
|
|
for _, group := range groups {
|
|||
|
|
for _, item := range group {
|
|||
|
|
item = strings.TrimSpace(item)
|
|||
|
|
if item == "" {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
if _, ok := seen[item]; ok {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
seen[item] = struct{}{}
|
|||
|
|
out = append(out, item)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return out
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func defaultProductFit(nodeKind string, layer int) int {
|
|||
|
|
switch nodeKind {
|
|||
|
|
case "pain":
|
|||
|
|
if layer == 0 {
|
|||
|
|
return 90
|
|||
|
|
}
|
|||
|
|
return 80
|
|||
|
|
case "symptom", "cause":
|
|||
|
|
return 70
|
|||
|
|
default:
|
|||
|
|
return 50
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func normalizePlacementReason(value, relation, nodeKind string, productFit int) string {
|
|||
|
|
value = strings.TrimSpace(value)
|
|||
|
|
if value != "" {
|
|||
|
|
switch strings.ToLower(value) {
|
|||
|
|
case "high":
|
|||
|
|
return "受眾在此情境常有明確產品需求,適合自然分享使用經驗"
|
|||
|
|
case "medium":
|
|||
|
|
return "與產品使用情境相關,可輕量帶入經驗而不硬推"
|
|||
|
|
case "low":
|
|||
|
|
return "多為背景脈絡,置入需非常克制"
|
|||
|
|
default:
|
|||
|
|
return value
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
relation = strings.TrimSpace(relation)
|
|||
|
|
if relation != "" && productFit >= 70 && IsPainNode(Node{NodeKind: nodeKind}) {
|
|||
|
|
return "與「" + relation + "」相關的求助帖,有機會自然帶入產品經驗"
|
|||
|
|
}
|
|||
|
|
if IsPainNode(Node{NodeKind: nodeKind}) {
|
|||
|
|
return "痛點類討論串,可視情境分享產品使用心得"
|
|||
|
|
}
|
|||
|
|
if productFit >= 60 {
|
|||
|
|
return "與產品使用情境相關,可作輕量經驗分享"
|
|||
|
|
}
|
|||
|
|
return ""
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func resolveNodeRef(ref string, labelToID map[string]string, nodes []Node) string {
|
|||
|
|
ref = strings.TrimSpace(ref)
|
|||
|
|
if ref == "" {
|
|||
|
|
return ""
|
|||
|
|
}
|
|||
|
|
for _, node := range nodes {
|
|||
|
|
if node.ID == ref {
|
|||
|
|
return node.ID
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
if id, ok := labelToID[strings.ToLower(ref)]; ok {
|
|||
|
|
return id
|
|||
|
|
}
|
|||
|
|
return ""
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func extractJSONObject(raw string) ([]byte, error) {
|
|||
|
|
text := strings.TrimSpace(raw)
|
|||
|
|
if text == "" {
|
|||
|
|
return nil, fmt.Errorf("empty LLM response")
|
|||
|
|
}
|
|||
|
|
if m := codeFenceRE.FindStringSubmatch(text); len(m) == 2 {
|
|||
|
|
text = strings.TrimSpace(m[1])
|
|||
|
|
}
|
|||
|
|
start := strings.Index(text, "{")
|
|||
|
|
if start < 0 {
|
|||
|
|
return nil, fmt.Errorf("LLM response does not contain JSON object")
|
|||
|
|
}
|
|||
|
|
end, ok := matchJSONObjectEnd(text, start)
|
|||
|
|
if !ok {
|
|||
|
|
return nil, fmt.Errorf("LLM response does not contain complete JSON object")
|
|||
|
|
}
|
|||
|
|
return []byte(text[start : end+1]), nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func matchJSONObjectEnd(text string, start int) (int, bool) {
|
|||
|
|
depth := 0
|
|||
|
|
inString := false
|
|||
|
|
escaped := false
|
|||
|
|
for i := start; i < len(text); i++ {
|
|||
|
|
ch := text[i]
|
|||
|
|
if inString {
|
|||
|
|
if escaped {
|
|||
|
|
escaped = false
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
switch ch {
|
|||
|
|
case '\\':
|
|||
|
|
escaped = true
|
|||
|
|
case '"':
|
|||
|
|
inString = false
|
|||
|
|
}
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
switch ch {
|
|||
|
|
case '"':
|
|||
|
|
inString = true
|
|||
|
|
case '{':
|
|||
|
|
depth++
|
|||
|
|
case '}':
|
|||
|
|
depth--
|
|||
|
|
if depth == 0 {
|
|||
|
|
return i, true
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return 0, false
|
|||
|
|
}
|