haixunMaster/lib/search/orchestrator.ts

115 lines
3.7 KiB
TypeScript
Raw Permalink Normal View History

2026-06-21 12:50:31 +00:00
import type { ThreadsApiCredentials } from "@/lib/threads-api/types";
import { dedupeKeyForResult, filterNotifyDuplicates, markNotified } from "./dedupe";
import { logSearchEvent } from "./logger";
import { filterNotifiableResults, type NotifyRuleContext } from "./notify-rules";
import { getSearchConfig } from "./config";
import { runOrchestratedSearch, type OrchestratedSearchResult } from "./orchestrator-search";
import { BraveSearchProvider } from "./providers/brave-provider";
import { CrawlerSearchProvider } from "./providers/crawler-provider";
import { ThreadsApiSearchProvider } from "./providers/threads-provider";
import type { SearchSourceMode } from "./source-mode";
import type { KeywordInput, SearchProvider, SearchResult } from "./types";
export type { OrchestratedSearchResult };
export interface SearchOrchestratorOptions {
threadsCredentials?: ThreadsApiCredentials | null;
crawlerStorageState?: string | null;
/** 已有爬蟲結果時不再觸發 crawler */
skipCrawler?: boolean;
/** 海巡搜尋來源模式 */
sourceMode?: SearchSourceMode;
/** 測試用:注入 mock provider */
providers?: {
threads?: SearchProvider;
brave?: SearchProvider;
crawler?: SearchProvider;
};
}
export class SearchOrchestrator {
private threads: SearchProvider;
private brave: SearchProvider;
private crawler: SearchProvider;
private skipCrawler: boolean;
private sourceMode: SearchSourceMode;
constructor(options: SearchOrchestratorOptions = {}) {
this.threads =
options.providers?.threads ??
new ThreadsApiSearchProvider(options.threadsCredentials ?? null);
this.brave = options.providers?.brave ?? new BraveSearchProvider();
this.crawler =
options.providers?.crawler ??
new CrawlerSearchProvider(options.crawlerStorageState ?? null);
this.skipCrawler = options.skipCrawler ?? false;
this.sourceMode = options.sourceMode ?? "mixed";
}
async search(keyword: KeywordInput): Promise<OrchestratedSearchResult> {
return runOrchestratedSearch(
{
threads: this.threads,
brave: this.brave,
crawler: this.crawler,
},
keyword,
{ skipCrawler: this.skipCrawler, sourceMode: this.sourceMode }
);
}
/** 篩選可通知結果(規則 + 24h 去重) */
filterForNotify(
results: SearchResult[],
ctx: NotifyRuleContext
): Array<SearchResult & { matchedRules: string[] }> {
const cfg = getSearchConfig();
const matched = filterNotifiableResults(results, ctx);
const freshKeys = new Set<string>();
const fresh = matched.filter((r) => {
const key = dedupeKeyForResult(r);
if (filterNotifyDuplicates([r]).length === 0) {
logSearchEvent({
kind: "provider",
provider: "dedupe",
status: "skipped",
reason: key.startsWith("thread:") ? "duplicated thread_id" : "duplicated url_hash",
keyword: key,
});
return false;
}
freshKeys.add(key);
return true;
});
for (const r of results) {
const key = dedupeKeyForResult(r);
if (freshKeys.has(key)) continue;
const isMatched = matched.some((m) => dedupeKeyForResult(m) === key);
if (!isMatched) {
logSearchEvent({
kind: "provider",
provider: "notify",
status: "skipped",
reason: "rule_not_matched",
keyword: r.url,
});
}
}
for (const r of fresh) {
markNotified(dedupeKeyForResult(r), cfg.dedupe.notifyTtlMs);
logSearchEvent({
kind: "provider",
provider: "notify",
status: "sent",
keyword: r.url,
reason: r.matchedRules.join(","),
});
}
return fresh;
}
}