thread-master/backend/internal/library/knowledge/synth.go

377 lines
9.7 KiB
Go
Raw Normal View History

2026-06-26 08:37:04 +00:00
package knowledge
import (
"encoding/json"
"fmt"
"regexp"
"strings"
libprompt "haixun-backend/internal/library/prompt"
"github.com/google/uuid"
)
type SynthInput struct {
BrandDisplayName string
TopicName string
ProductLabel string
Goals string
Seed string
ProductBrief string
TargetAudience string
Persona string
ResearchPillars []string
ResearchQuestions []string
Sources []BraveSource
}
type rawSynthNode struct {
Label string `json:"label"`
NodeKind string `json:"nodeKind"`
NodeKindSnake string `json:"node_kind"`
Type string `json:"type"`
Layer int `json:"layer"`
Relation string `json:"relation"`
PlacementValue string `json:"placementValue"`
PlacementValueAlt string `json:"placement_value"`
ProductFitScore int `json:"productFitScore"`
ProductFitScoreAlt int `json:"product_fit_score"`
EvidenceURLs []string `json:"evidenceUrls"`
EvidenceURLsAlt []string `json:"evidence_urls"`
RelevanceQueries []string `json:"relevanceQueries"`
RelevanceQueriesAlt []string `json:"relevance_queries"`
RecencyQueries []string `json:"recencyQueries"`
RecencyQueriesAlt []string `json:"recency_queries"`
}
type rawSynthOutput struct {
Nodes []rawSynthNode `json:"nodes"`
Edges []struct {
From string `json:"from"`
To string `json:"to"`
Relation string `json:"relation"`
} `json:"edges"`
}
var codeFenceRE = regexp.MustCompile("(?s)```(?:json)?\\s*([\\s\\S]*?)```")
func BuildUserPrompt(in SynthInput) (string, error) {
var sources strings.Builder
limit := len(in.Sources)
if limit > 30 {
limit = 30
}
for i := 0; i < limit; i++ {
src := in.Sources[i]
fmt.Fprintf(&sources, "[%d] query=%s\nurl=%s\ntitle=%s\nsnippet=%s\n\n",
i+1, src.Query, src.URL, src.Title, src.Snippet)
}
vars := map[string]string{
"brand_line": optionalLine("品牌", in.BrandDisplayName),
"topic_line": optionalLine("主題名稱", in.TopicName),
"product_line": optionalLine("置入產品", in.ProductLabel),
"goals_line": optionalLine("置入目標", in.Goals),
"seed": strings.TrimSpace(in.Seed),
"product_brief_line": optionalLine("產品簡述", in.ProductBrief),
"target_audience_line": optionalLine("目標受眾", in.TargetAudience),
"persona_line": optionalLine("主題目標", in.Persona),
"research_pillars_line": bulletLine("內容支柱(延伸知識要往這些方向廣泛展開)", in.ResearchPillars),
"research_questions_line": bulletLine("受眾提問方向(可衍生成更多周邊節點)", in.ResearchQuestions),
"sources": strings.TrimSpace(sources.String()),
}
return libprompt.KnowledgeGraphUser(vars)
}
func optionalLine(label, value string) string {
return OptionalPromptLine(label, value)
}
func BulletPromptLine(title string, items []string) string {
return bulletLine(title, items)
}
func bulletLine(title string, items []string) string {
lines := make([]string, 0, len(items))
for _, item := range items {
item = strings.TrimSpace(item)
if item == "" {
continue
}
lines = append(lines, "- "+item)
}
if len(lines) == 0 {
return ""
}
return title + "\n" + strings.Join(lines, "\n") + "\n"
}
func OptionalPromptLine(label, value string) string {
value = strings.TrimSpace(value)
if value == "" {
return ""
}
return label + "" + value + "\n"
}
func ParseSynthOutput(raw string, in SynthInput, sources []BraveSource) (Graph, error) {
payload, err := extractJSONObject(raw)
if err != nil {
return Graph{}, err
}
var out rawSynthOutput
if err := json.Unmarshal(payload, &out); err != nil {
return Graph{}, fmt.Errorf("parse knowledge graph json: %w", err)
}
seed := strings.TrimSpace(in.Seed)
graph := Graph{
Seed: seed,
BraveSources: sources,
Nodes: []Node{},
Edges: []Edge{},
}
sourceByURL := map[string]BraveSource{}
for _, src := range sources {
if src.URL != "" {
sourceByURL[src.URL] = src
}
}
hasCore := false
for _, item := range out.Nodes {
label := strings.TrimSpace(item.Label)
if label == "" {
continue
}
layer := item.Layer
nodeType := strings.TrimSpace(item.Type)
nodeKind := firstNonEmpty(item.NodeKind, item.NodeKindSnake)
if layer == 0 || nodeType == "core" {
layer = 0
nodeType = "core"
if nodeKind == "" {
nodeKind = "pain"
}
hasCore = true
}
if nodeKind == "" {
if layer >= 2 {
nodeKind = "cause"
} else if layer == 1 {
nodeKind = "symptom"
} else {
nodeKind = "knowledge"
}
}
evidenceURLs := item.EvidenceURLs
if len(evidenceURLs) == 0 {
evidenceURLs = item.EvidenceURLsAlt
}
evidence := make([]Evidence, 0, len(evidenceURLs))
for _, u := range evidenceURLs {
u = strings.TrimSpace(u)
if u == "" {
continue
}
ev := Evidence{URL: u}
if src, ok := sourceByURL[u]; ok {
ev.Snippet = src.Snippet
ev.Query = src.Query
}
evidence = append(evidence, ev)
}
fit := item.ProductFitScore
if fit <= 0 {
fit = item.ProductFitScoreAlt
}
if fit <= 0 {
fit = defaultProductFit(nodeKind, layer)
}
placementValue := firstNonEmpty(item.PlacementValue, item.PlacementValueAlt)
graph.Nodes = append(graph.Nodes, Node{
ID: uuid.NewString(),
Label: label,
NodeKind: nodeKind,
Type: nodeType,
Layer: layer,
Relation: strings.TrimSpace(item.Relation),
PlacementValue: normalizePlacementReason(placementValue, item.Relation, nodeKind, fit),
ProductFitScore: fit,
PatrolRelevance: mergeStringLists(item.RelevanceQueries, item.RelevanceQueriesAlt),
PatrolRecency: mergeStringLists(item.RecencyQueries, item.RecencyQueriesAlt),
Evidence: evidence,
})
}
if !hasCore && seed != "" {
graph.Nodes = append([]Node{{
ID: uuid.NewString(),
Label: seed,
NodeKind: "pain",
Type: "core",
Layer: 0,
Relation: "核心種子主題",
PlacementValue: "核心痛點帖最常求推薦,適合以產品使用經驗自然回覆",
ProductFitScore: 90,
}}, graph.Nodes...)
}
labelToID := map[string]string{}
for _, node := range graph.Nodes {
labelToID[strings.ToLower(strings.TrimSpace(node.Label))] = node.ID
}
for _, edge := range out.Edges {
from := resolveNodeRef(edge.From, labelToID, graph.Nodes)
to := resolveNodeRef(edge.To, labelToID, graph.Nodes)
if from == "" || to == "" || from == to {
continue
}
graph.Edges = append(graph.Edges, Edge{
From: from,
To: to,
Relation: strings.TrimSpace(edge.Relation),
})
}
return graph, nil
}
func firstNonEmpty(values ...string) string {
for _, value := range values {
if strings.TrimSpace(value) != "" {
return strings.TrimSpace(value)
}
}
return ""
}
func mergeStringLists(groups ...[]string) []string {
out := []string{}
seen := map[string]struct{}{}
for _, group := range groups {
for _, item := range group {
item = strings.TrimSpace(item)
if item == "" {
continue
}
if _, ok := seen[item]; ok {
continue
}
seen[item] = struct{}{}
out = append(out, item)
}
}
return out
}
func defaultProductFit(nodeKind string, layer int) int {
switch nodeKind {
case "pain":
if layer == 0 {
return 90
}
return 80
case "symptom", "cause":
return 70
default:
return 50
}
}
func normalizePlacementReason(value, relation, nodeKind string, productFit int) string {
value = strings.TrimSpace(value)
if value != "" {
switch strings.ToLower(value) {
case "high":
return "受眾在此情境常有明確產品需求,適合自然分享使用經驗"
case "medium":
return "與產品使用情境相關,可輕量帶入經驗而不硬推"
case "low":
return "多為背景脈絡,置入需非常克制"
default:
return value
}
}
relation = strings.TrimSpace(relation)
if relation != "" && productFit >= 70 && IsPainNode(Node{NodeKind: nodeKind}) {
return "與「" + relation + "」相關的求助帖,有機會自然帶入產品經驗"
}
if IsPainNode(Node{NodeKind: nodeKind}) {
return "痛點類討論串,可視情境分享產品使用心得"
}
if productFit >= 60 {
return "與產品使用情境相關,可作輕量經驗分享"
}
return ""
}
func resolveNodeRef(ref string, labelToID map[string]string, nodes []Node) string {
ref = strings.TrimSpace(ref)
if ref == "" {
return ""
}
for _, node := range nodes {
if node.ID == ref {
return node.ID
}
}
if id, ok := labelToID[strings.ToLower(ref)]; ok {
return id
}
return ""
}
func extractJSONObject(raw string) ([]byte, error) {
text := strings.TrimSpace(raw)
if text == "" {
return nil, fmt.Errorf("empty LLM response")
}
if m := codeFenceRE.FindStringSubmatch(text); len(m) == 2 {
text = strings.TrimSpace(m[1])
}
start := strings.Index(text, "{")
if start < 0 {
return nil, fmt.Errorf("LLM response does not contain JSON object")
}
end, ok := matchJSONObjectEnd(text, start)
if !ok {
return nil, fmt.Errorf("LLM response does not contain complete JSON object")
}
return []byte(text[start : end+1]), nil
}
func matchJSONObjectEnd(text string, start int) (int, bool) {
depth := 0
inString := false
escaped := false
for i := start; i < len(text); i++ {
ch := text[i]
if inString {
if escaped {
escaped = false
continue
}
switch ch {
case '\\':
escaped = true
case '"':
inString = false
}
continue
}
switch ch {
case '"':
inString = true
case '{':
depth++
case '}':
depth--
if depth == 0 {
return i, true
}
}
}
return 0, false
}