93 lines
3.1 KiB
TypeScript
93 lines
3.1 KiB
TypeScript
|
|
import type { ThreadsPost, ThreadsReply } from "./types";
|
||
|
|
|
||
|
|
function walkJson(data: unknown, visit: (obj: Record<string, unknown>) => void): void {
|
||
|
|
if (!data || typeof data !== "object") return;
|
||
|
|
if (Array.isArray(data)) {
|
||
|
|
for (const item of data) walkJson(item, visit);
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
const obj = data as Record<string, unknown>;
|
||
|
|
visit(obj);
|
||
|
|
for (const value of Object.values(obj)) {
|
||
|
|
if (value && typeof value === "object") walkJson(value, visit);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
function getPostText(obj: Record<string, unknown>): string | undefined {
|
||
|
|
return (
|
||
|
|
(obj.caption as { text?: string } | undefined)?.text ??
|
||
|
|
(obj.text_post_app_info as { text?: string } | undefined)?.text ??
|
||
|
|
(typeof obj.text === "string" ? obj.text : undefined)
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
export function extractPostsFromJson(data: unknown, posts: ThreadsPost[]): void {
|
||
|
|
walkJson(data, (obj) => {
|
||
|
|
const text = getPostText(obj);
|
||
|
|
if (!text || text.length < 5) return;
|
||
|
|
|
||
|
|
const code = (obj.code as string) ?? (obj.pk as string) ?? (obj.id as string);
|
||
|
|
const username =
|
||
|
|
(obj.user as { username?: string } | undefined)?.username ??
|
||
|
|
(obj.owner as { username?: string } | undefined)?.username;
|
||
|
|
|
||
|
|
posts.push({
|
||
|
|
externalId: code ? String(code) : undefined,
|
||
|
|
text,
|
||
|
|
permalink:
|
||
|
|
code && username ? `https://www.threads.com/@${username}/post/${code}` : undefined,
|
||
|
|
authorName: username,
|
||
|
|
likeCount: (obj.like_count as number) ?? undefined,
|
||
|
|
replyCount:
|
||
|
|
(obj.text_post_app_info as { direct_reply_count?: number } | undefined)
|
||
|
|
?.direct_reply_count ?? (obj.reply_count as number),
|
||
|
|
postedAt: obj.taken_at ? new Date((obj.taken_at as number) * 1000) : undefined,
|
||
|
|
});
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
export function extractRepliesFromJson(data: unknown, replies: ThreadsReply[]): void {
|
||
|
|
walkJson(data, (obj) => {
|
||
|
|
const text = getPostText(obj);
|
||
|
|
if (!text || text.length < 2) return;
|
||
|
|
|
||
|
|
const isReply =
|
||
|
|
obj.reply_to_author !== undefined ||
|
||
|
|
(obj.text_post_app_info as { reply_to_author?: unknown } | undefined)?.reply_to_author !==
|
||
|
|
undefined;
|
||
|
|
|
||
|
|
if (!isReply && obj.reply_count !== undefined) return;
|
||
|
|
|
||
|
|
replies.push({
|
||
|
|
text,
|
||
|
|
authorName:
|
||
|
|
(obj.user as { username?: string } | undefined)?.username ??
|
||
|
|
(obj.owner as { username?: string } | undefined)?.username,
|
||
|
|
likeCount: (obj.like_count as number) ?? 0,
|
||
|
|
postedAt: obj.taken_at ? new Date((obj.taken_at as number) * 1000) : undefined,
|
||
|
|
});
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
export async function extractFromPageScripts(page: import("playwright").Page): Promise<{
|
||
|
|
posts: ThreadsPost[];
|
||
|
|
replies: ThreadsReply[];
|
||
|
|
}> {
|
||
|
|
const posts: ThreadsPost[] = [];
|
||
|
|
const replies: ThreadsReply[] = [];
|
||
|
|
|
||
|
|
const scripts = await page.locator('script[type="application/json"][data-sjs]').all();
|
||
|
|
for (const script of scripts) {
|
||
|
|
try {
|
||
|
|
const raw = await script.textContent();
|
||
|
|
if (!raw || !raw.includes("thread_items")) continue;
|
||
|
|
const json = JSON.parse(raw);
|
||
|
|
extractPostsFromJson(json, posts);
|
||
|
|
extractRepliesFromJson(json, replies);
|
||
|
|
} catch {
|
||
|
|
// skip malformed script
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return { posts, replies };
|
||
|
|
}
|