package knowledge import ( "encoding/json" "fmt" "regexp" "strings" libprompt "haixun-backend/internal/library/prompt" "github.com/google/uuid" ) type SynthInput struct { BrandDisplayName string TopicName string ProductLabel string Goals string Seed string ProductBrief string TargetAudience string Persona string ResearchPillars []string ResearchQuestions []string Sources []BraveSource } type rawSynthNode struct { Label string `json:"label"` NodeKind string `json:"nodeKind"` NodeKindSnake string `json:"node_kind"` Type string `json:"type"` Layer int `json:"layer"` Relation string `json:"relation"` PlacementValue string `json:"placementValue"` PlacementValueAlt string `json:"placement_value"` ProductFitScore int `json:"productFitScore"` ProductFitScoreAlt int `json:"product_fit_score"` EvidenceURLs []string `json:"evidenceUrls"` EvidenceURLsAlt []string `json:"evidence_urls"` RelevanceQueries []string `json:"relevanceQueries"` RelevanceQueriesAlt []string `json:"relevance_queries"` RecencyQueries []string `json:"recencyQueries"` RecencyQueriesAlt []string `json:"recency_queries"` } type rawSynthOutput struct { Nodes []rawSynthNode `json:"nodes"` Edges []struct { From string `json:"from"` To string `json:"to"` Relation string `json:"relation"` } `json:"edges"` } var codeFenceRE = regexp.MustCompile("(?s)```(?:json)?\\s*([\\s\\S]*?)```") func BuildUserPrompt(in SynthInput) (string, error) { var sources strings.Builder limit := len(in.Sources) if limit > 30 { limit = 30 } for i := 0; i < limit; i++ { src := in.Sources[i] fmt.Fprintf(&sources, "[%d] query=%s\nurl=%s\ntitle=%s\nsnippet=%s\n\n", i+1, src.Query, src.URL, src.Title, src.Snippet) } vars := map[string]string{ "brand_line": optionalLine("品牌", in.BrandDisplayName), "topic_line": optionalLine("主題名稱", in.TopicName), "product_line": optionalLine("置入產品", in.ProductLabel), "goals_line": optionalLine("置入目標", in.Goals), "seed": strings.TrimSpace(in.Seed), "product_brief_line": optionalLine("產品簡述", in.ProductBrief), "target_audience_line": optionalLine("目標受眾", in.TargetAudience), "persona_line": optionalLine("主題目標", in.Persona), "research_pillars_line": bulletLine("內容支柱(延伸知識要往這些方向廣泛展開)", in.ResearchPillars), "research_questions_line": bulletLine("受眾提問方向(可衍生成更多周邊節點)", in.ResearchQuestions), "sources": strings.TrimSpace(sources.String()), } return libprompt.KnowledgeGraphUser(vars) } func optionalLine(label, value string) string { return OptionalPromptLine(label, value) } func BulletPromptLine(title string, items []string) string { return bulletLine(title, items) } func bulletLine(title string, items []string) string { lines := make([]string, 0, len(items)) for _, item := range items { item = strings.TrimSpace(item) if item == "" { continue } lines = append(lines, "- "+item) } if len(lines) == 0 { return "" } return title + ":\n" + strings.Join(lines, "\n") + "\n" } func OptionalPromptLine(label, value string) string { value = strings.TrimSpace(value) if value == "" { return "" } return label + ":" + value + "\n" } func ParseSynthOutput(raw string, in SynthInput, sources []BraveSource) (Graph, error) { payload, err := extractJSONObject(raw) if err != nil { return Graph{}, err } var out rawSynthOutput if err := json.Unmarshal(payload, &out); err != nil { return Graph{}, fmt.Errorf("parse knowledge graph json: %w", err) } seed := strings.TrimSpace(in.Seed) graph := Graph{ Seed: seed, BraveSources: sources, Nodes: []Node{}, Edges: []Edge{}, } sourceByURL := map[string]BraveSource{} for _, src := range sources { if src.URL != "" { sourceByURL[src.URL] = src } } hasCore := false for _, item := range out.Nodes { label := strings.TrimSpace(item.Label) if label == "" { continue } layer := item.Layer nodeType := strings.TrimSpace(item.Type) nodeKind := firstNonEmpty(item.NodeKind, item.NodeKindSnake) if layer == 0 || nodeType == "core" { layer = 0 nodeType = "core" if nodeKind == "" { nodeKind = "pain" } hasCore = true } if nodeKind == "" { if layer >= 2 { nodeKind = "cause" } else if layer == 1 { nodeKind = "symptom" } else { nodeKind = "knowledge" } } evidenceURLs := item.EvidenceURLs if len(evidenceURLs) == 0 { evidenceURLs = item.EvidenceURLsAlt } evidence := make([]Evidence, 0, len(evidenceURLs)) for _, u := range evidenceURLs { u = strings.TrimSpace(u) if u == "" { continue } ev := Evidence{URL: u} if src, ok := sourceByURL[u]; ok { ev.Snippet = src.Snippet ev.Query = src.Query } evidence = append(evidence, ev) } fit := item.ProductFitScore if fit <= 0 { fit = item.ProductFitScoreAlt } if fit <= 0 { fit = defaultProductFit(nodeKind, layer) } placementValue := firstNonEmpty(item.PlacementValue, item.PlacementValueAlt) graph.Nodes = append(graph.Nodes, Node{ ID: uuid.NewString(), Label: label, NodeKind: nodeKind, Type: nodeType, Layer: layer, Relation: strings.TrimSpace(item.Relation), PlacementValue: normalizePlacementReason(placementValue, item.Relation, nodeKind, fit), ProductFitScore: fit, PatrolRelevance: mergeStringLists(item.RelevanceQueries, item.RelevanceQueriesAlt), PatrolRecency: mergeStringLists(item.RecencyQueries, item.RecencyQueriesAlt), Evidence: evidence, }) } if !hasCore && seed != "" { graph.Nodes = append([]Node{{ ID: uuid.NewString(), Label: seed, NodeKind: "pain", Type: "core", Layer: 0, Relation: "核心種子主題", PlacementValue: "核心痛點帖最常求推薦,適合以產品使用經驗自然回覆", ProductFitScore: 90, }}, graph.Nodes...) } labelToID := map[string]string{} for _, node := range graph.Nodes { labelToID[strings.ToLower(strings.TrimSpace(node.Label))] = node.ID } for _, edge := range out.Edges { from := resolveNodeRef(edge.From, labelToID, graph.Nodes) to := resolveNodeRef(edge.To, labelToID, graph.Nodes) if from == "" || to == "" || from == to { continue } graph.Edges = append(graph.Edges, Edge{ From: from, To: to, Relation: strings.TrimSpace(edge.Relation), }) } return graph, nil } func firstNonEmpty(values ...string) string { for _, value := range values { if strings.TrimSpace(value) != "" { return strings.TrimSpace(value) } } return "" } func mergeStringLists(groups ...[]string) []string { out := []string{} seen := map[string]struct{}{} for _, group := range groups { for _, item := range group { item = strings.TrimSpace(item) if item == "" { continue } if _, ok := seen[item]; ok { continue } seen[item] = struct{}{} out = append(out, item) } } return out } func defaultProductFit(nodeKind string, layer int) int { switch nodeKind { case "pain": if layer == 0 { return 90 } return 80 case "symptom", "cause": return 70 default: return 50 } } func normalizePlacementReason(value, relation, nodeKind string, productFit int) string { value = strings.TrimSpace(value) if value != "" { switch strings.ToLower(value) { case "high": return "受眾在此情境常有明確產品需求,適合自然分享使用經驗" case "medium": return "與產品使用情境相關,可輕量帶入經驗而不硬推" case "low": return "多為背景脈絡,置入需非常克制" default: return value } } relation = strings.TrimSpace(relation) if relation != "" && productFit >= 70 && IsPainNode(Node{NodeKind: nodeKind}) { return "與「" + relation + "」相關的求助帖,有機會自然帶入產品經驗" } if IsPainNode(Node{NodeKind: nodeKind}) { return "痛點類討論串,可視情境分享產品使用心得" } if productFit >= 60 { return "與產品使用情境相關,可作輕量經驗分享" } return "" } func resolveNodeRef(ref string, labelToID map[string]string, nodes []Node) string { ref = strings.TrimSpace(ref) if ref == "" { return "" } for _, node := range nodes { if node.ID == ref { return node.ID } } if id, ok := labelToID[strings.ToLower(ref)]; ok { return id } return "" } func extractJSONObject(raw string) ([]byte, error) { text := strings.TrimSpace(raw) if text == "" { return nil, fmt.Errorf("empty LLM response") } if m := codeFenceRE.FindStringSubmatch(text); len(m) == 2 { text = strings.TrimSpace(m[1]) } start := strings.Index(text, "{") if start < 0 { return nil, fmt.Errorf("LLM response does not contain JSON object") } end, ok := matchJSONObjectEnd(text, start) if !ok { return nil, fmt.Errorf("LLM response does not contain complete JSON object") } return []byte(text[start : end+1]), nil } func matchJSONObjectEnd(text string, start int) (int, bool) { depth := 0 inString := false escaped := false for i := start; i < len(text); i++ { ch := text[i] if inString { if escaped { escaped = false continue } switch ch { case '\\': escaped = true case '"': inString = false } continue } switch ch { case '"': inString = true case '{': depth++ case '}': depth-- if depth == 0 { return i, true } } } return 0, false }