61 lines
1.7 KiB
TypeScript
61 lines
1.7 KiB
TypeScript
|
|
import crypto from "crypto";
|
||
|
|
import { cacheGet, cacheSet } from "./cache";
|
||
|
|
import type { SearchResult } from "./types";
|
||
|
|
|
||
|
|
const DEDUPE_NS = "dedupe";
|
||
|
|
|
||
|
|
export function normalizeThreadsUrl(url: string): string {
|
||
|
|
try {
|
||
|
|
const u = new URL(url);
|
||
|
|
u.hostname = u.hostname.replace(/^www\./, "");
|
||
|
|
u.search = "";
|
||
|
|
u.hash = "";
|
||
|
|
const pathname = u.pathname.replace(/\/+$/, "") || "/";
|
||
|
|
u.pathname = pathname;
|
||
|
|
return u.toString();
|
||
|
|
} catch {
|
||
|
|
return url.trim();
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
export function extractThreadId(url: string): string | undefined {
|
||
|
|
const m = url.match(/threads\.com\/@[^/]+\/post\/([^/?#]+)/i);
|
||
|
|
return m?.[1];
|
||
|
|
}
|
||
|
|
|
||
|
|
export function dedupeKeyForResult(result: SearchResult): string {
|
||
|
|
const threadId = result.threadId ?? extractThreadId(result.url);
|
||
|
|
if (threadId) return `thread:${threadId}`;
|
||
|
|
const normalized = normalizeThreadsUrl(result.url);
|
||
|
|
const hash = crypto.createHash("sha256").update(normalized).digest("hex").slice(0, 16);
|
||
|
|
return `url_hash:${hash}`;
|
||
|
|
}
|
||
|
|
|
||
|
|
export function dedupeResults(results: SearchResult[]): SearchResult[] {
|
||
|
|
const seen = new Set<string>();
|
||
|
|
const out: SearchResult[] = [];
|
||
|
|
for (const r of results) {
|
||
|
|
const key = dedupeKeyForResult(r);
|
||
|
|
if (seen.has(key)) continue;
|
||
|
|
seen.add(key);
|
||
|
|
out.push(r);
|
||
|
|
}
|
||
|
|
return out;
|
||
|
|
}
|
||
|
|
|
||
|
|
export function isNotifyDuplicate(key: string): boolean {
|
||
|
|
return cacheGet<boolean>(DEDUPE_NS, `notify:${key}`) === true;
|
||
|
|
}
|
||
|
|
|
||
|
|
export function markNotified(key: string, ttlMs: number): void {
|
||
|
|
cacheSet(DEDUPE_NS, `notify:${key}`, true, ttlMs);
|
||
|
|
}
|
||
|
|
|
||
|
|
export function filterNotifyDuplicates(results: SearchResult[]): SearchResult[] {
|
||
|
|
return results.filter((r) => {
|
||
|
|
const key = dedupeKeyForResult(r);
|
||
|
|
if (isNotifyDuplicate(key)) return false;
|
||
|
|
return true;
|
||
|
|
});
|
||
|
|
}
|