96 lines
3.6 KiB
TypeScript
96 lines
3.6 KiB
TypeScript
import { canUseDailyQuota, getDailyQuotaUsed, incrementDailyQuota } from "@/lib/search/quota";
|
||
import type { ScanTask } from "@/lib/services/scan-tasks";
|
||
import type { Page } from "playwright";
|
||
import { captureDebugStep, createDebugRun } from "./debug";
|
||
|
||
let crawlerBlockedUntil = 0;
|
||
|
||
function blockCooldownMs(): number {
|
||
return envInt("CRAWLER_BLOCK_COOLDOWN_MINUTES", 180) * 60_000;
|
||
}
|
||
|
||
function assertCrawlerCooldown(): void {
|
||
if (Date.now() < crawlerBlockedUntil) {
|
||
const minutes = Math.max(1, Math.ceil((crawlerBlockedUntil - Date.now()) / 60_000));
|
||
throw new Error(`Threads 爬蟲已進入保護冷卻,約 ${minutes} 分鐘後再試。期間可改用官方 API 或 Brave。`);
|
||
}
|
||
}
|
||
|
||
function envInt(key: string, fallback: number): number {
|
||
const value = Number.parseInt(process.env[key] ?? "", 10);
|
||
return Number.isFinite(value) && value > 0 ? value : fallback;
|
||
}
|
||
|
||
export function getCrawlerSafetyLimits() {
|
||
return {
|
||
maxTasksPerScan: Math.min(envInt("CRAWLER_MAX_TASKS_PER_SCAN", 4), 6),
|
||
maxPostsPerTask: Math.min(envInt("CRAWLER_MAX_POSTS_PER_TASK", 12), 15),
|
||
dailyPageLimit: Math.min(envInt("CRAWLER_DAILY_PAGE_LIMIT", 40), 60),
|
||
};
|
||
}
|
||
|
||
export function limitCrawlerTasks(tasks: ScanTask[]): ScanTask[] {
|
||
const limits = getCrawlerSafetyLimits();
|
||
return tasks.slice(0, limits.maxTasksPerScan).map((task) => ({
|
||
...task,
|
||
limit: Math.min(task.limit, limits.maxPostsPerTask),
|
||
}));
|
||
}
|
||
|
||
export function consumeCrawlerPageQuota(accountId?: string): void {
|
||
assertCrawlerCooldown();
|
||
const { dailyPageLimit } = getCrawlerSafetyLimits();
|
||
const key = `threads-crawler-${accountId || "default"}`;
|
||
if (!canUseDailyQuota(key, dailyPageLimit)) {
|
||
throw new Error(
|
||
`今日瀏覽器海巡已達安全上限(${dailyPageLimit} 頁)。請明天再繼續,或改用 Brave 補漏。`
|
||
);
|
||
}
|
||
incrementDailyQuota(key);
|
||
}
|
||
|
||
export function crawlerQuotaStatus(accountId?: string) {
|
||
const { dailyPageLimit } = getCrawlerSafetyLimits();
|
||
const key = `threads-crawler-${accountId || "default"}`;
|
||
return { used: getDailyQuotaUsed(key), limit: dailyPageLimit };
|
||
}
|
||
|
||
const BLOCKED_TEXT =
|
||
/try again later|rate limit|too many requests|suspicious activity|checkpoint|請稍後再試|操作太頻繁|異常活動|暫時限制|驗證你的身分/i;
|
||
|
||
export async function assertThreadsPageSafe(
|
||
page: Page,
|
||
status: number | undefined,
|
||
bodyText: string
|
||
): Promise<void> {
|
||
const url = page.url();
|
||
const loginRequired = /\/login(?:[/?#]|$)/i.test(url);
|
||
const blocked = status === 429 || status === 403 || BLOCKED_TEXT.test(bodyText) || /checkpoint/i.test(url);
|
||
if (!blocked && !loginRequired) return;
|
||
|
||
let runId: string | null = null;
|
||
try {
|
||
const run = await createDebugRun(
|
||
loginRequired ? "threads-login-required" : "threads-blocked-or-verification",
|
||
{ force: true }
|
||
);
|
||
runId = run?.id ?? null;
|
||
await captureDebugStep(page, run, loginRequired ? "login-required" : "blocked-or-verification", {
|
||
status,
|
||
bodyPreview: bodyText.slice(0, 500),
|
||
});
|
||
} catch (error) {
|
||
console.warn("[threads-safety] failure screenshot could not be saved", error);
|
||
}
|
||
|
||
const screenshotHint = runId
|
||
? ` 已自動截圖,請到「瀏覽器 Debug」查看最新紀錄(${runId})。`
|
||
: " 截圖儲存失敗,請手動開啟 Threads 確認狀態。";
|
||
|
||
if (blocked) {
|
||
crawlerBlockedUntil = Math.max(crawlerBlockedUntil, Date.now() + blockCooldownMs());
|
||
throw new Error(`Threads 顯示限流或帳號驗證,已立即停止海巡並進入保護冷卻。${screenshotHint}`);
|
||
}
|
||
throw new Error(`Session 已失效,請到設定頁重新登入 Threads。${screenshotHint}`);
|
||
}
|