96 lines
3.6 KiB
TypeScript
96 lines
3.6 KiB
TypeScript
|
|
import { canUseDailyQuota, getDailyQuotaUsed, incrementDailyQuota } from "@/lib/search/quota";
|
|||
|
|
import type { ScanTask } from "@/lib/services/scan-tasks";
|
|||
|
|
import type { Page } from "playwright";
|
|||
|
|
import { captureDebugStep, createDebugRun } from "./debug";
|
|||
|
|
|
|||
|
|
let crawlerBlockedUntil = 0;
|
|||
|
|
|
|||
|
|
function blockCooldownMs(): number {
|
|||
|
|
return envInt("CRAWLER_BLOCK_COOLDOWN_MINUTES", 180) * 60_000;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function assertCrawlerCooldown(): void {
|
|||
|
|
if (Date.now() < crawlerBlockedUntil) {
|
|||
|
|
const minutes = Math.max(1, Math.ceil((crawlerBlockedUntil - Date.now()) / 60_000));
|
|||
|
|
throw new Error(`Threads 爬蟲已進入保護冷卻,約 ${minutes} 分鐘後再試。期間可改用官方 API 或 Brave。`);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function envInt(key: string, fallback: number): number {
|
|||
|
|
const value = Number.parseInt(process.env[key] ?? "", 10);
|
|||
|
|
return Number.isFinite(value) && value > 0 ? value : fallback;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
export function getCrawlerSafetyLimits() {
|
|||
|
|
return {
|
|||
|
|
maxTasksPerScan: Math.min(envInt("CRAWLER_MAX_TASKS_PER_SCAN", 4), 6),
|
|||
|
|
maxPostsPerTask: Math.min(envInt("CRAWLER_MAX_POSTS_PER_TASK", 12), 15),
|
|||
|
|
dailyPageLimit: Math.min(envInt("CRAWLER_DAILY_PAGE_LIMIT", 40), 60),
|
|||
|
|
};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
export function limitCrawlerTasks(tasks: ScanTask[]): ScanTask[] {
|
|||
|
|
const limits = getCrawlerSafetyLimits();
|
|||
|
|
return tasks.slice(0, limits.maxTasksPerScan).map((task) => ({
|
|||
|
|
...task,
|
|||
|
|
limit: Math.min(task.limit, limits.maxPostsPerTask),
|
|||
|
|
}));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
export function consumeCrawlerPageQuota(accountId?: string): void {
|
|||
|
|
assertCrawlerCooldown();
|
|||
|
|
const { dailyPageLimit } = getCrawlerSafetyLimits();
|
|||
|
|
const key = `threads-crawler-${accountId || "default"}`;
|
|||
|
|
if (!canUseDailyQuota(key, dailyPageLimit)) {
|
|||
|
|
throw new Error(
|
|||
|
|
`今日瀏覽器海巡已達安全上限(${dailyPageLimit} 頁)。請明天再繼續,或改用 Brave 補漏。`
|
|||
|
|
);
|
|||
|
|
}
|
|||
|
|
incrementDailyQuota(key);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
export function crawlerQuotaStatus(accountId?: string) {
|
|||
|
|
const { dailyPageLimit } = getCrawlerSafetyLimits();
|
|||
|
|
const key = `threads-crawler-${accountId || "default"}`;
|
|||
|
|
return { used: getDailyQuotaUsed(key), limit: dailyPageLimit };
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
const BLOCKED_TEXT =
|
|||
|
|
/try again later|rate limit|too many requests|suspicious activity|checkpoint|請稍後再試|操作太頻繁|異常活動|暫時限制|驗證你的身分/i;
|
|||
|
|
|
|||
|
|
export async function assertThreadsPageSafe(
|
|||
|
|
page: Page,
|
|||
|
|
status: number | undefined,
|
|||
|
|
bodyText: string
|
|||
|
|
): Promise<void> {
|
|||
|
|
const url = page.url();
|
|||
|
|
const loginRequired = /\/login(?:[/?#]|$)/i.test(url);
|
|||
|
|
const blocked = status === 429 || status === 403 || BLOCKED_TEXT.test(bodyText) || /checkpoint/i.test(url);
|
|||
|
|
if (!blocked && !loginRequired) return;
|
|||
|
|
|
|||
|
|
let runId: string | null = null;
|
|||
|
|
try {
|
|||
|
|
const run = await createDebugRun(
|
|||
|
|
loginRequired ? "threads-login-required" : "threads-blocked-or-verification",
|
|||
|
|
{ force: true }
|
|||
|
|
);
|
|||
|
|
runId = run?.id ?? null;
|
|||
|
|
await captureDebugStep(page, run, loginRequired ? "login-required" : "blocked-or-verification", {
|
|||
|
|
status,
|
|||
|
|
bodyPreview: bodyText.slice(0, 500),
|
|||
|
|
});
|
|||
|
|
} catch (error) {
|
|||
|
|
console.warn("[threads-safety] failure screenshot could not be saved", error);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
const screenshotHint = runId
|
|||
|
|
? ` 已自動截圖,請到「瀏覽器 Debug」查看最新紀錄(${runId})。`
|
|||
|
|
: " 截圖儲存失敗,請手動開啟 Threads 確認狀態。";
|
|||
|
|
|
|||
|
|
if (blocked) {
|
|||
|
|
crawlerBlockedUntil = Math.max(crawlerBlockedUntil, Date.now() + blockCooldownMs());
|
|||
|
|
throw new Error(`Threads 顯示限流或帳號驗證,已立即停止海巡並進入保護冷卻。${screenshotHint}`);
|
|||
|
|
}
|
|||
|
|
throw new Error(`Session 已失效,請到設定頁重新登入 Threads。${screenshotHint}`);
|
|||
|
|
}
|