haixunMaster/haixun-backend/internal/library/knowledge/synth.go

377 lines
9.7 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package knowledge
import (
"encoding/json"
"fmt"
"regexp"
"strings"
libprompt "haixun-backend/internal/library/prompt"
"github.com/google/uuid"
)
type SynthInput struct {
BrandDisplayName string
TopicName string
ProductLabel string
Goals string
Seed string
ProductBrief string
TargetAudience string
Persona string
ResearchPillars []string
ResearchQuestions []string
Sources []BraveSource
}
type rawSynthNode struct {
Label string `json:"label"`
NodeKind string `json:"nodeKind"`
NodeKindSnake string `json:"node_kind"`
Type string `json:"type"`
Layer int `json:"layer"`
Relation string `json:"relation"`
PlacementValue string `json:"placementValue"`
PlacementValueAlt string `json:"placement_value"`
ProductFitScore int `json:"productFitScore"`
ProductFitScoreAlt int `json:"product_fit_score"`
EvidenceURLs []string `json:"evidenceUrls"`
EvidenceURLsAlt []string `json:"evidence_urls"`
RelevanceQueries []string `json:"relevanceQueries"`
RelevanceQueriesAlt []string `json:"relevance_queries"`
RecencyQueries []string `json:"recencyQueries"`
RecencyQueriesAlt []string `json:"recency_queries"`
}
type rawSynthOutput struct {
Nodes []rawSynthNode `json:"nodes"`
Edges []struct {
From string `json:"from"`
To string `json:"to"`
Relation string `json:"relation"`
} `json:"edges"`
}
var codeFenceRE = regexp.MustCompile("(?s)```(?:json)?\\s*([\\s\\S]*?)```")
func BuildUserPrompt(in SynthInput) (string, error) {
var sources strings.Builder
limit := len(in.Sources)
if limit > 30 {
limit = 30
}
for i := 0; i < limit; i++ {
src := in.Sources[i]
fmt.Fprintf(&sources, "[%d] query=%s\nurl=%s\ntitle=%s\nsnippet=%s\n\n",
i+1, src.Query, src.URL, src.Title, src.Snippet)
}
vars := map[string]string{
"brand_line": optionalLine("品牌", in.BrandDisplayName),
"topic_line": optionalLine("主題名稱", in.TopicName),
"product_line": optionalLine("置入產品", in.ProductLabel),
"goals_line": optionalLine("置入目標", in.Goals),
"seed": strings.TrimSpace(in.Seed),
"product_brief_line": optionalLine("產品簡述", in.ProductBrief),
"target_audience_line": optionalLine("目標受眾", in.TargetAudience),
"persona_line": optionalLine("主題目標", in.Persona),
"research_pillars_line": bulletLine("內容支柱(延伸知識要往這些方向廣泛展開)", in.ResearchPillars),
"research_questions_line": bulletLine("受眾提問方向(可衍生成更多周邊節點)", in.ResearchQuestions),
"sources": strings.TrimSpace(sources.String()),
}
return libprompt.KnowledgeGraphUser(vars)
}
func optionalLine(label, value string) string {
return OptionalPromptLine(label, value)
}
func BulletPromptLine(title string, items []string) string {
return bulletLine(title, items)
}
func bulletLine(title string, items []string) string {
lines := make([]string, 0, len(items))
for _, item := range items {
item = strings.TrimSpace(item)
if item == "" {
continue
}
lines = append(lines, "- "+item)
}
if len(lines) == 0 {
return ""
}
return title + "\n" + strings.Join(lines, "\n") + "\n"
}
func OptionalPromptLine(label, value string) string {
value = strings.TrimSpace(value)
if value == "" {
return ""
}
return label + "" + value + "\n"
}
func ParseSynthOutput(raw string, in SynthInput, sources []BraveSource) (Graph, error) {
payload, err := extractJSONObject(raw)
if err != nil {
return Graph{}, err
}
var out rawSynthOutput
if err := json.Unmarshal(payload, &out); err != nil {
return Graph{}, fmt.Errorf("parse knowledge graph json: %w", err)
}
seed := strings.TrimSpace(in.Seed)
graph := Graph{
Seed: seed,
BraveSources: sources,
Nodes: []Node{},
Edges: []Edge{},
}
sourceByURL := map[string]BraveSource{}
for _, src := range sources {
if src.URL != "" {
sourceByURL[src.URL] = src
}
}
hasCore := false
for _, item := range out.Nodes {
label := strings.TrimSpace(item.Label)
if label == "" {
continue
}
layer := item.Layer
nodeType := strings.TrimSpace(item.Type)
nodeKind := firstNonEmpty(item.NodeKind, item.NodeKindSnake)
if layer == 0 || nodeType == "core" {
layer = 0
nodeType = "core"
if nodeKind == "" {
nodeKind = "pain"
}
hasCore = true
}
if nodeKind == "" {
if layer >= 2 {
nodeKind = "cause"
} else if layer == 1 {
nodeKind = "symptom"
} else {
nodeKind = "knowledge"
}
}
evidenceURLs := item.EvidenceURLs
if len(evidenceURLs) == 0 {
evidenceURLs = item.EvidenceURLsAlt
}
evidence := make([]Evidence, 0, len(evidenceURLs))
for _, u := range evidenceURLs {
u = strings.TrimSpace(u)
if u == "" {
continue
}
ev := Evidence{URL: u}
if src, ok := sourceByURL[u]; ok {
ev.Snippet = src.Snippet
ev.Query = src.Query
}
evidence = append(evidence, ev)
}
fit := item.ProductFitScore
if fit <= 0 {
fit = item.ProductFitScoreAlt
}
if fit <= 0 {
fit = defaultProductFit(nodeKind, layer)
}
placementValue := firstNonEmpty(item.PlacementValue, item.PlacementValueAlt)
graph.Nodes = append(graph.Nodes, Node{
ID: uuid.NewString(),
Label: label,
NodeKind: nodeKind,
Type: nodeType,
Layer: layer,
Relation: strings.TrimSpace(item.Relation),
PlacementValue: normalizePlacementReason(placementValue, item.Relation, nodeKind, fit),
ProductFitScore: fit,
PatrolRelevance: mergeStringLists(item.RelevanceQueries, item.RelevanceQueriesAlt),
PatrolRecency: mergeStringLists(item.RecencyQueries, item.RecencyQueriesAlt),
Evidence: evidence,
})
}
if !hasCore && seed != "" {
graph.Nodes = append([]Node{{
ID: uuid.NewString(),
Label: seed,
NodeKind: "pain",
Type: "core",
Layer: 0,
Relation: "核心種子主題",
PlacementValue: "核心痛點帖最常求推薦,適合以產品使用經驗自然回覆",
ProductFitScore: 90,
}}, graph.Nodes...)
}
labelToID := map[string]string{}
for _, node := range graph.Nodes {
labelToID[strings.ToLower(strings.TrimSpace(node.Label))] = node.ID
}
for _, edge := range out.Edges {
from := resolveNodeRef(edge.From, labelToID, graph.Nodes)
to := resolveNodeRef(edge.To, labelToID, graph.Nodes)
if from == "" || to == "" || from == to {
continue
}
graph.Edges = append(graph.Edges, Edge{
From: from,
To: to,
Relation: strings.TrimSpace(edge.Relation),
})
}
return graph, nil
}
func firstNonEmpty(values ...string) string {
for _, value := range values {
if strings.TrimSpace(value) != "" {
return strings.TrimSpace(value)
}
}
return ""
}
func mergeStringLists(groups ...[]string) []string {
out := []string{}
seen := map[string]struct{}{}
for _, group := range groups {
for _, item := range group {
item = strings.TrimSpace(item)
if item == "" {
continue
}
if _, ok := seen[item]; ok {
continue
}
seen[item] = struct{}{}
out = append(out, item)
}
}
return out
}
func defaultProductFit(nodeKind string, layer int) int {
switch nodeKind {
case "pain":
if layer == 0 {
return 90
}
return 80
case "symptom", "cause":
return 70
default:
return 50
}
}
func normalizePlacementReason(value, relation, nodeKind string, productFit int) string {
value = strings.TrimSpace(value)
if value != "" {
switch strings.ToLower(value) {
case "high":
return "受眾在此情境常有明確產品需求,適合自然分享使用經驗"
case "medium":
return "與產品使用情境相關,可輕量帶入經驗而不硬推"
case "low":
return "多為背景脈絡,置入需非常克制"
default:
return value
}
}
relation = strings.TrimSpace(relation)
if relation != "" && productFit >= 70 && IsPainNode(Node{NodeKind: nodeKind}) {
return "與「" + relation + "」相關的求助帖,有機會自然帶入產品經驗"
}
if IsPainNode(Node{NodeKind: nodeKind}) {
return "痛點類討論串,可視情境分享產品使用心得"
}
if productFit >= 60 {
return "與產品使用情境相關,可作輕量經驗分享"
}
return ""
}
func resolveNodeRef(ref string, labelToID map[string]string, nodes []Node) string {
ref = strings.TrimSpace(ref)
if ref == "" {
return ""
}
for _, node := range nodes {
if node.ID == ref {
return node.ID
}
}
if id, ok := labelToID[strings.ToLower(ref)]; ok {
return id
}
return ""
}
func extractJSONObject(raw string) ([]byte, error) {
text := strings.TrimSpace(raw)
if text == "" {
return nil, fmt.Errorf("empty LLM response")
}
if m := codeFenceRE.FindStringSubmatch(text); len(m) == 2 {
text = strings.TrimSpace(m[1])
}
start := strings.Index(text, "{")
if start < 0 {
return nil, fmt.Errorf("LLM response does not contain JSON object")
}
end, ok := matchJSONObjectEnd(text, start)
if !ok {
return nil, fmt.Errorf("LLM response does not contain complete JSON object")
}
return []byte(text[start : end+1]), nil
}
func matchJSONObjectEnd(text string, start int) (int, bool) {
depth := 0
inString := false
escaped := false
for i := start; i < len(text); i++ {
ch := text[i]
if inString {
if escaped {
escaped = false
continue
}
switch ch {
case '\\':
escaped = true
case '"':
inString = false
}
continue
}
switch ch {
case '"':
inString = true
case '{':
depth++
case '}':
depth--
if depth == 0 {
return i, true
}
}
}
return 0, false
}