thread-master/backend/internal/library/knowledge/queries.go

415 lines
9.4 KiB
Go
Raw Permalink Normal View History

2026-06-26 08:37:04 +00:00
package knowledge
import (
"encoding/json"
"fmt"
"strings"
"sync"
libprompt "haixun-backend/internal/library/prompt"
)
type queryConfig struct {
MaxPlanQueries int `json:"max_plan_queries"`
HybridMaxPlanQueries int `json:"hybrid_max_plan_queries"`
MaxSupplemental int `json:"max_supplemental_queries"`
HybridMaxSupplemental int `json:"hybrid_max_supplemental_queries"`
ResultsPerQuery int `json:"results_per_query"`
MinSourcesBeforeStop int `json:"min_sources_before_stop"`
MaxSourcesCap int `json:"max_sources_cap"`
BraveCollectConcurrency int `json:"brave_collect_concurrency"`
MaxPatrolKeywordQueries int `json:"max_patrol_keyword_queries"`
MaxQuestionQueries int `json:"max_question_queries"`
MaxPillarQueries int `json:"max_pillar_queries"`
MaxPlanBaseQueries int `json:"max_plan_base_queries"`
MaxPeripheralQueries int `json:"max_peripheral_queries"`
MaxL1Labels int `json:"max_l1_labels"`
MinPainTagCandidates int `json:"min_pain_tag_candidates"`
MinTotalTagCandidates int `json:"min_total_tag_candidates"`
PlanBase []string `json:"plan_base"`
PlanPeripheral []string `json:"plan_peripheral"`
PlanAudience string `json:"plan_audience"`
PlanL1Cause string `json:"plan_l1_cause"`
PlanL1Pain string `json:"plan_l1_pain"`
PlanPillar string `json:"plan_pillar"`
PlanQuestion string `json:"plan_question"`
Supplemental []string `json:"supplemental"`
SupplementalL1 string `json:"supplemental_l1"`
SupplementalPillar string `json:"supplemental_pillar"`
RecencySuffix string `json:"recency_suffix"`
RecencyHelpMarkers string `json:"recency_help_markers"`
}
var (
queryCfgOnce sync.Once
queryCfg queryConfig
queryCfgErr error
)
func loadQueryConfig() (queryConfig, error) {
queryCfgOnce.Do(func() {
raw, err := libprompt.KnowledgeGraphQueryConfig()
if err != nil {
queryCfgErr = err
return
}
payload, err := json.Marshal(raw)
if err != nil {
queryCfgErr = err
return
}
queryCfgErr = json.Unmarshal(payload, &queryCfg)
})
return queryCfg, queryCfgErr
}
func MaxPlanQueriesPerRound() int {
cfg, err := loadQueryConfig()
if err != nil || cfg.MaxPlanQueries <= 0 {
return 15
}
return cfg.MaxPlanQueries
}
func MaxSupplementalQueries() int {
cfg, err := loadQueryConfig()
if err != nil || cfg.MaxSupplemental <= 0 {
return 5
}
return cfg.MaxSupplemental
}
func MinPainTagCandidates() int {
cfg, err := loadQueryConfig()
if err != nil || cfg.MinPainTagCandidates <= 0 {
return 8
}
return cfg.MinPainTagCandidates
}
type PlanInput struct {
Seed string
TargetAudience string
ProductBrief string
Pillars []string
Questions []string
PatrolKeywords []string
L1Labels []string
Supplemental bool
Strategy ExpandStrategy
}
func PlanQueries(in PlanInput) []string {
cfg, err := loadQueryConfig()
if err != nil {
return nil
}
seed := strings.TrimSpace(in.Seed)
if seed == "" {
return nil
}
if in.Supplemental {
return supplementalQueries(cfg, in)
}
return planPrimaryQueries(cfg, in)
}
func queryBudget(cfg queryConfig, strategy ExpandStrategy, supplemental bool) int {
if supplemental {
if strategy == ExpandStrategyHybrid {
if cfg.HybridMaxSupplemental > 0 {
return cfg.HybridMaxSupplemental
}
return 0
}
max := cfg.MaxSupplemental
if max <= 0 {
return 4
}
return max
}
if strategy == ExpandStrategyHybrid {
max := cfg.HybridMaxPlanQueries
if max <= 0 {
return 5
}
return max
}
max := cfg.MaxPlanQueries
if max <= 0 {
return 10
}
return max
}
func planPrimaryQueries(cfg queryConfig, in PlanInput) []string {
seed := strings.TrimSpace(in.Seed)
budget := queryBudget(cfg, in.Strategy, false)
if budget <= 0 {
return nil
}
seen := map[string]struct{}{}
out := make([]string, 0, budget)
add := func(q string) bool {
q = strings.TrimSpace(q)
if q == "" {
return false
}
if _, ok := seen[q]; ok {
return false
}
seen[q] = struct{}{}
out = append(out, q)
return len(out) >= budget
}
vars := map[string]string{"seed": seed, "audience": strings.TrimSpace(in.TargetAudience)}
patrolLimit := cfg.MaxPatrolKeywordQueries
if patrolLimit <= 0 {
patrolLimit = 4
}
for i, keyword := range in.PatrolKeywords {
if i >= patrolLimit {
break
}
if add(keyword) {
return out
}
}
questionLimit := cfg.MaxQuestionQueries
if questionLimit <= 0 {
questionLimit = 3
}
for i, question := range in.Questions {
if i >= questionLimit {
break
}
question = strings.TrimSpace(question)
if question == "" {
continue
}
if tpl := strings.TrimSpace(cfg.PlanQuestion); tpl != "" {
if add(renderQueryTemplate(tpl, map[string]string{"question": question})) {
return out
}
} else if add(question) {
return out
}
}
pillarLimit := cfg.MaxPillarQueries
if pillarLimit <= 0 {
pillarLimit = 2
}
for i, pillar := range in.Pillars {
if i >= pillarLimit {
break
}
pillar = strings.TrimSpace(pillar)
if pillar == "" {
continue
}
if tpl := strings.TrimSpace(cfg.PlanPillar); tpl != "" {
if add(renderQueryTemplate(tpl, map[string]string{"pillar": pillar})) {
return out
}
} else if add(pillar + " 請問") {
return out
}
}
baseLimit := cfg.MaxPlanBaseQueries
if baseLimit <= 0 {
baseLimit = 3
}
for i, tpl := range cfg.PlanBase {
if i >= baseLimit {
break
}
if add(renderQueryTemplate(tpl, vars)) {
return out
}
}
if vars["audience"] != "" && strings.TrimSpace(cfg.PlanAudience) != "" {
if add(renderQueryTemplate(cfg.PlanAudience, vars)) {
return out
}
}
peripheralLimit := cfg.MaxPeripheralQueries
if in.Strategy == ExpandStrategyHybrid && peripheralLimit > 1 {
peripheralLimit = 1
}
if peripheralLimit <= 0 {
peripheralLimit = 2
}
for i, tpl := range cfg.PlanPeripheral {
if i >= peripheralLimit {
break
}
if add(renderQueryTemplate(tpl, vars)) {
return out
}
}
l1Limit := cfg.MaxL1Labels
if l1Limit <= 0 {
l1Limit = 2
}
for i, label := range in.L1Labels {
if i >= l1Limit {
break
}
label = strings.TrimSpace(label)
if label == "" || label == seed {
continue
}
l1vars := map[string]string{"seed": seed, "label": label}
if add(renderQueryTemplate(cfg.PlanL1Pain, l1vars)) {
return out
}
}
return out
}
func supplementalQueries(cfg queryConfig, in PlanInput) []string {
seed := strings.TrimSpace(in.Seed)
if seed == "" {
return nil
}
budget := queryBudget(cfg, in.Strategy, true)
if budget <= 0 {
return nil
}
seen := map[string]struct{}{}
out := make([]string, 0, budget)
add := func(q string) {
q = strings.TrimSpace(q)
if q == "" {
return
}
if _, ok := seen[q]; ok {
return
}
seen[q] = struct{}{}
out = append(out, q)
}
vars := map[string]string{"seed": seed}
for _, tpl := range cfg.Supplemental {
add(renderQueryTemplate(tpl, vars))
}
for _, pillar := range in.Pillars {
pillar = strings.TrimSpace(pillar)
if pillar == "" {
continue
}
if tpl := strings.TrimSpace(cfg.SupplementalPillar); tpl != "" {
add(renderQueryTemplate(tpl, map[string]string{"pillar": pillar}))
}
if len(out) >= budget {
return capQueries(out, budget)
}
}
for _, label := range in.L1Labels {
label = strings.TrimSpace(label)
if label == "" {
continue
}
add(renderQueryTemplate(cfg.SupplementalL1, map[string]string{"seed": seed, "label": label}))
if len(out) >= budget {
break
}
}
return capQueries(out, budget)
}
func PlanBreadthQueries(in PlanInput) []string {
in.Supplemental = true
return PlanQueries(in)
}
// PlanBootstrapQueries builds Brave queries that do not depend on a generated research map.
func PlanBootstrapQueries(in PlanInput) []string {
bootstrap := in
bootstrap.Pillars = nil
bootstrap.Questions = nil
bootstrap.L1Labels = nil
bootstrap.Supplemental = false
return PlanQueries(bootstrap)
}
// QueriesExcept returns planned queries that were not already executed.
func QueriesExcept(planned, executed []string) []string {
done := map[string]struct{}{}
for _, q := range executed {
q = strings.TrimSpace(q)
if q == "" {
continue
}
done[q] = struct{}{}
}
out := make([]string, 0, len(planned))
for _, q := range planned {
q = strings.TrimSpace(q)
if q == "" {
continue
}
if _, ok := done[q]; ok {
continue
}
out = append(out, q)
}
return out
}
func BuildRecencyQuery(label string) string {
cfg, err := loadQueryConfig()
if err != nil {
return ""
}
label = strings.TrimSpace(label)
if label == "" {
return ""
}
if strings.ContainsAny(label, cfg.RecencyHelpMarkers) {
return label
}
suffix := strings.TrimSpace(cfg.RecencySuffix)
if suffix == "" {
suffix = "請問"
}
return fmt.Sprintf("%s %s", label, suffix)
}
func renderQueryTemplate(tpl string, vars map[string]string) string {
out := tpl
for key, value := range vars {
out = strings.ReplaceAll(out, "{{"+key+"}}", value)
}
return strings.TrimSpace(out)
}
func capQueries(items []string, max int) []string {
if max <= 0 || len(items) <= max {
return items
}
return items[:max]
}
func L1LabelsFromNodes(nodes []Node) []string {
out := make([]string, 0, len(nodes))
for _, node := range nodes {
if node.Layer != 1 {
continue
}
label := strings.TrimSpace(node.Label)
if label != "" {
out = append(out, label)
}
}
return out
}