115 lines
3.7 KiB
TypeScript
115 lines
3.7 KiB
TypeScript
import type { ThreadsApiCredentials } from "@/lib/threads-api/types";
|
|
import { dedupeKeyForResult, filterNotifyDuplicates, markNotified } from "./dedupe";
|
|
import { logSearchEvent } from "./logger";
|
|
import { filterNotifiableResults, type NotifyRuleContext } from "./notify-rules";
|
|
import { getSearchConfig } from "./config";
|
|
import { runOrchestratedSearch, type OrchestratedSearchResult } from "./orchestrator-search";
|
|
import { BraveSearchProvider } from "./providers/brave-provider";
|
|
import { CrawlerSearchProvider } from "./providers/crawler-provider";
|
|
import { ThreadsApiSearchProvider } from "./providers/threads-provider";
|
|
import type { SearchSourceMode } from "./source-mode";
|
|
import type { KeywordInput, SearchProvider, SearchResult } from "./types";
|
|
|
|
export type { OrchestratedSearchResult };
|
|
|
|
export interface SearchOrchestratorOptions {
|
|
threadsCredentials?: ThreadsApiCredentials | null;
|
|
crawlerStorageState?: string | null;
|
|
/** 已有爬蟲結果時不再觸發 crawler */
|
|
skipCrawler?: boolean;
|
|
/** 海巡搜尋來源模式 */
|
|
sourceMode?: SearchSourceMode;
|
|
/** 測試用:注入 mock provider */
|
|
providers?: {
|
|
threads?: SearchProvider;
|
|
brave?: SearchProvider;
|
|
crawler?: SearchProvider;
|
|
};
|
|
}
|
|
|
|
export class SearchOrchestrator {
|
|
private threads: SearchProvider;
|
|
private brave: SearchProvider;
|
|
private crawler: SearchProvider;
|
|
private skipCrawler: boolean;
|
|
private sourceMode: SearchSourceMode;
|
|
|
|
constructor(options: SearchOrchestratorOptions = {}) {
|
|
this.threads =
|
|
options.providers?.threads ??
|
|
new ThreadsApiSearchProvider(options.threadsCredentials ?? null);
|
|
this.brave = options.providers?.brave ?? new BraveSearchProvider();
|
|
this.crawler =
|
|
options.providers?.crawler ??
|
|
new CrawlerSearchProvider(options.crawlerStorageState ?? null);
|
|
this.skipCrawler = options.skipCrawler ?? false;
|
|
this.sourceMode = options.sourceMode ?? "mixed";
|
|
}
|
|
|
|
async search(keyword: KeywordInput): Promise<OrchestratedSearchResult> {
|
|
return runOrchestratedSearch(
|
|
{
|
|
threads: this.threads,
|
|
brave: this.brave,
|
|
crawler: this.crawler,
|
|
},
|
|
keyword,
|
|
{ skipCrawler: this.skipCrawler, sourceMode: this.sourceMode }
|
|
);
|
|
}
|
|
|
|
/** 篩選可通知結果(規則 + 24h 去重) */
|
|
filterForNotify(
|
|
results: SearchResult[],
|
|
ctx: NotifyRuleContext
|
|
): Array<SearchResult & { matchedRules: string[] }> {
|
|
const cfg = getSearchConfig();
|
|
const matched = filterNotifiableResults(results, ctx);
|
|
const freshKeys = new Set<string>();
|
|
|
|
const fresh = matched.filter((r) => {
|
|
const key = dedupeKeyForResult(r);
|
|
if (filterNotifyDuplicates([r]).length === 0) {
|
|
logSearchEvent({
|
|
kind: "provider",
|
|
provider: "dedupe",
|
|
status: "skipped",
|
|
reason: key.startsWith("thread:") ? "duplicated thread_id" : "duplicated url_hash",
|
|
keyword: key,
|
|
});
|
|
return false;
|
|
}
|
|
freshKeys.add(key);
|
|
return true;
|
|
});
|
|
|
|
for (const r of results) {
|
|
const key = dedupeKeyForResult(r);
|
|
if (freshKeys.has(key)) continue;
|
|
const isMatched = matched.some((m) => dedupeKeyForResult(m) === key);
|
|
if (!isMatched) {
|
|
logSearchEvent({
|
|
kind: "provider",
|
|
provider: "notify",
|
|
status: "skipped",
|
|
reason: "rule_not_matched",
|
|
keyword: r.url,
|
|
});
|
|
}
|
|
}
|
|
|
|
for (const r of fresh) {
|
|
markNotified(dedupeKeyForResult(r), cfg.dedupe.notifyTtlMs);
|
|
logSearchEvent({
|
|
kind: "provider",
|
|
provider: "notify",
|
|
status: "sent",
|
|
keyword: r.url,
|
|
reason: r.matchedRules.join(","),
|
|
});
|
|
}
|
|
|
|
return fresh;
|
|
}
|
|
}
|