haixunMaster/lib/search/providers/crawler-provider.ts

63 lines
2.0 KiB
TypeScript
Raw Normal View History

2026-06-21 12:50:31 +00:00
import { search as browserSearch } from "@/lib/threads-browser/search";
import { cacheGet, cacheSet } from "../cache";
import { getSearchConfig } from "../config";
import { logSearchEvent } from "../logger";
import type { SearchProvider, SearchRequest, SearchResponse, SearchResult } from "../types";
export class CrawlerSearchProvider implements SearchProvider {
constructor(private storageState: string | null) {}
name() {
return "crawler" as const;
}
enabled(): boolean {
return getSearchConfig().crawler.enabled && !!this.storageState?.trim();
}
async search(req: SearchRequest): Promise<SearchResponse> {
if (!this.enabled() || !this.storageState) {
return { provider: "crawler", results: [], status: "unavailable" };
}
const cfg = getSearchConfig();
const cacheKey = `crawler:threads:${req.keyword}:${req.limit}`;
const cached = cacheGet<SearchResult[]>("crawler", cacheKey);
if (cached) {
return { provider: "crawler", results: cached, status: "success", skipReason: "cache_hit" };
}
try {
const posts = await browserSearch(this.storageState, req.keyword, req.limit);
const results: SearchResult[] = posts.map((p) => ({
title: p.text.slice(0, 120),
url: p.permalink ?? "",
snippet: p.text,
author: p.authorName ?? "",
publishedAt: p.postedAt,
source: "crawler" as const,
threadId: p.externalId,
}));
cacheSet("crawler", cacheKey, results, cfg.crawler.cacheTtlMs);
logSearchEvent({
kind: "provider",
provider: "crawler",
status: "success",
keyword: req.keyword,
count: results.length,
});
return { provider: "crawler", results, status: "success" };
} catch {
logSearchEvent({
kind: "provider",
provider: "crawler",
status: "unavailable",
keyword: req.keyword,
});
return { provider: "crawler", results: [], status: "unavailable" };
}
}
}