diff --git a/haixun-backend/web/src/types/api.ts b/haixun-backend/web/src/types/api.ts
index 5e0151a..9653a2a 100644
--- a/haixun-backend/web/src/types/api.ts
+++ b/haixun-backend/web/src/types/api.ts
@@ -124,16 +124,30 @@ export interface JobTemplateData {
}>
}
+export interface CopyResearchMapData {
+ audience_summary?: string
+ content_goal?: string
+ questions?: string[]
+ pillars?: string[]
+ exclusions?: string[]
+ suggested_tags?: string[]
+ benchmark_notes?: string
+}
+
+export function hasCopyResearchMap(persona: PersonaData | null | undefined): boolean {
+ const map = persona?.copy_research_map
+ return !!(map?.audience_summary || (map?.suggested_tags?.length ?? 0) > 0)
+}
+
export interface PersonaData {
id: string
display_name?: string
persona?: string
brief?: string
- product_brief?: string
- target_audience?: string
- goals?: string
style_profile?: string
style_benchmark?: string
+ seed_query?: string
+ copy_research_map?: CopyResearchMapData
create_at: number
update_at: number
}
@@ -148,6 +162,45 @@ export interface StartPersonaStyleAnalysisData {
message?: string
}
+export interface ViralScanPostData {
+ id: string
+ search_tag: string
+ permalink: string
+ author: string
+ text: string
+ like_count: number
+ reply_count: number
+ engagement_score: number
+ source: string
+ scan_job_id: string
+ replies?: ScanReplyData[]
+ create_at: number
+}
+
+export interface CopyDraftData {
+ id: string
+ persona_id: string
+ scan_post_id?: string
+ draft_type: string
+ text: string
+ angle?: string
+ hook?: string
+ rationale?: string
+ reference_notes?: string
+ sources?: string[]
+ status?: string
+ create_at: number
+}
+
+export interface ScanReplyData {
+ external_id?: string
+ author?: string
+ text: string
+ permalink?: string
+ like_count?: number
+ posted_at?: string
+}
+
export interface ThreadsAccountData {
id: string
display_name?: string
diff --git a/haixun-backend/web/src/types/brand.ts b/haixun-backend/web/src/types/brand.ts
new file mode 100644
index 0000000..019fb11
--- /dev/null
+++ b/haixun-backend/web/src/types/brand.ts
@@ -0,0 +1,37 @@
+export interface ResearchMapData {
+ audience_summary?: string
+ content_goal?: string
+ questions?: string[]
+ pillars?: string[]
+ exclusions?: string[]
+}
+
+export interface BrandData {
+ id: string
+ display_name?: string
+ seed_query?: string
+ brief?: string
+ product_brief?: string
+ product_context?: string
+ target_audience?: string
+ goals?: string
+ research_map?: ResearchMapData
+ create_at: number
+ update_at: number
+}
+
+export interface ListBrandsData {
+ list: BrandData[]
+}
+
+export interface CreatePlacementTopicData {
+ brand: BrandData
+ job_id: string
+ status: string
+ message?: string
+}
+
+export function hasResearchMap(brand: BrandData | null | undefined): boolean {
+ const map = brand?.research_map
+ return !!(map?.audience_summary || (map?.questions?.length ?? 0) > 0)
+}
\ No newline at end of file
diff --git a/haixun-backend/worker/style-8d-worker.ts b/haixun-backend/worker/style-8d-worker.ts
index 1c12316..89738d6 100644
--- a/haixun-backend/worker/style-8d-worker.ts
+++ b/haixun-backend/worker/style-8d-worker.ts
@@ -240,7 +240,7 @@ async function analyzeStyle8DWithGo(job: JobData, username: string, posts: Scrap
style_profile: string
style_benchmark: string
}>(
- `/api/v1/internal/workers/jobs/${encodeURIComponent(job.id)}/analyze-style-8d`,
+ `/api/v1/internal/workers/jobs/${encodeURIComponent(job.id)}/analyze-style8d`,
{
worker_id: WORKER_ID,
tenant_id: tenantID,
diff --git a/haixun-backend/worker/threads-keyword-search-cli.ts b/haixun-backend/worker/threads-keyword-search-cli.ts
new file mode 100644
index 0000000..9df12a8
--- /dev/null
+++ b/haixun-backend/worker/threads-keyword-search-cli.ts
@@ -0,0 +1,32 @@
+import { searchKeywords } from './threads-keyword-search'
+
+type CliInput = {
+ storage_state: string
+ query: string
+ limit?: number
+}
+
+async function readStdin(): Promise {
+ const chunks: Buffer[] = []
+ for await (const chunk of process.stdin) {
+ chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk))
+ }
+ return Buffer.concat(chunks).toString('utf8')
+}
+
+async function main() {
+ const raw = await readStdin()
+ const input = JSON.parse(raw) as CliInput
+ const posts = await searchKeywords(
+ input.storage_state ?? '',
+ input.query ?? '',
+ input.limit ?? 12,
+ )
+ process.stdout.write(JSON.stringify({ posts }))
+}
+
+main().catch((error) => {
+ const message = error instanceof Error ? error.message : String(error)
+ process.stderr.write(message)
+ process.exit(1)
+})
\ No newline at end of file
diff --git a/haixun-backend/worker/threads-keyword-search.ts b/haixun-backend/worker/threads-keyword-search.ts
new file mode 100644
index 0000000..5dad9fc
--- /dev/null
+++ b/haixun-backend/worker/threads-keyword-search.ts
@@ -0,0 +1,249 @@
+import { chromium, type BrowserContext, type BrowserContextOptions, type Page } from 'playwright'
+
+export type KeywordSearchPost = {
+ text: string
+ permalink?: string
+ externalId?: string
+ authorName?: string
+ likeCount?: number
+ replyCount?: number
+}
+
+const USER_AGENT =
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
+
+const BROWSER_ARGS = [
+ '--disable-blink-features=AutomationControlled',
+ '--no-first-run',
+ '--no-default-browser-check',
+]
+
+const STEALTH_INIT_SCRIPT = () => {
+ Object.defineProperty(navigator, 'webdriver', { get: () => false })
+}
+
+type ThreadsPost = KeywordSearchPost
+
+function walkJson(data: unknown, visit: (obj: Record) => void): void {
+ if (!data || typeof data !== 'object') return
+ if (Array.isArray(data)) {
+ for (const item of data) walkJson(item, visit)
+ return
+ }
+ const obj = data as Record
+ visit(obj)
+ for (const value of Object.values(obj)) {
+ if (value && typeof value === 'object') walkJson(value, visit)
+ }
+}
+
+function getPostText(obj: Record): string | undefined {
+ return (
+ (obj.caption as { text?: string } | undefined)?.text ??
+ (obj.text_post_app_info as { text?: string } | undefined)?.text ??
+ (typeof obj.text === 'string' ? obj.text : undefined)
+ )
+}
+
+function extractPostsFromJson(data: unknown, posts: ThreadsPost[]): void {
+ walkJson(data, (obj) => {
+ const text = getPostText(obj)
+ if (!text || text.length < 5) return
+
+ const code = (obj.code as string) ?? (obj.pk as string) ?? (obj.id as string)
+ const username =
+ (obj.user as { username?: string } | undefined)?.username ??
+ (obj.owner as { username?: string } | undefined)?.username
+
+ posts.push({
+ externalId: code ? String(code) : undefined,
+ text,
+ permalink:
+ code && username ? `https://www.threads.com/@${username}/post/${code}` : undefined,
+ authorName: username,
+ likeCount: (obj.like_count as number) ?? undefined,
+ replyCount:
+ (obj.text_post_app_info as { direct_reply_count?: number } | undefined)?.direct_reply_count ??
+ (obj.reply_count as number),
+ })
+ })
+}
+
+async function extractFromPageScripts(page: Page): Promise {
+ const posts: ThreadsPost[] = []
+ const scripts = await page.locator('script[type="application/json"][data-sjs]').all()
+ for (const script of scripts) {
+ try {
+ const raw = await script.textContent()
+ if (!raw || !raw.includes('thread_items')) continue
+ const json = JSON.parse(raw)
+ extractPostsFromJson(json, posts)
+ } catch {
+ // skip malformed script
+ }
+ }
+ return posts
+}
+
+function attachCollector(page: Page, collected: ThreadsPost[]) {
+ page.on('response', async (response) => {
+ const url = response.url()
+ if (!url.includes('graphql') && !url.includes('threads') && !url.includes('instagram')) return
+ try {
+ const contentType = response.headers()['content-type'] ?? ''
+ if (!contentType.includes('json')) return
+ const json = await response.json()
+ extractPostsFromJson(json, collected)
+ } catch {
+ // ignore
+ }
+ })
+}
+
+async function scrapeSearchDom(page: Page): Promise {
+ const posts: ThreadsPost[] = []
+ const seen = new Set()
+ const links = page.locator('a[href*="/post/"]')
+ const count = await links.count()
+
+ for (let i = 0; i < Math.min(count, 40); i++) {
+ const link = links.nth(i)
+ try {
+ const href = await link.getAttribute('href', { timeout: 1200 })
+ if (!href || seen.has(href)) continue
+ seen.add(href)
+ const permalink = href.startsWith('http') ? href : `https://www.threads.com${href}`
+ const authorName = href.match(/@([^/]+)\/post/)?.[1]
+ const container = link.locator("xpath=ancestor::*[contains(@data-pressable-container,'true')][1]")
+ const scope =
+ (await container.count()) > 0 ? container : link.locator('xpath=ancestor::div[position()<=6]').first()
+ const text = await scope
+ .locator('div[dir="auto"], span[dir="auto"]')
+ .first()
+ .innerText({ timeout: 1200 })
+ .catch(() => '')
+ if (!text || text.length < 5) continue
+ posts.push({
+ text: text.trim(),
+ permalink,
+ authorName,
+ externalId: href.match(/\/post\/([^/?]+)/)?.[1],
+ })
+ } catch {
+ // skip
+ }
+ }
+ return posts
+}
+
+async function humanLandingPause(page: Page) {
+ await page.waitForTimeout(1200 + Math.floor(Math.random() * 1800))
+}
+
+async function humanScrollPage(page: Page) {
+ const passes = 2 + Math.floor(Math.random() * 3)
+ for (let i = 0; i < passes; i++) {
+ await page.mouse.wheel(0, 500 + Math.floor(Math.random() * 700))
+ await page.waitForTimeout(800 + Math.floor(Math.random() * 1200))
+ }
+}
+
+function dedupePosts(posts: ThreadsPost[], limit: number): KeywordSearchPost[] {
+ const seen = new Set()
+ const out: KeywordSearchPost[] = []
+ for (const post of posts) {
+ const key = post.externalId ?? post.permalink ?? post.text.slice(0, 120)
+ if (seen.has(key)) continue
+ seen.add(key)
+ out.push({
+ text: post.text,
+ permalink: post.permalink,
+ externalId: post.externalId,
+ authorName: post.authorName,
+ likeCount: post.likeCount,
+ replyCount: post.replyCount,
+ })
+ if (out.length >= limit) break
+ }
+ return out
+}
+
+async function createBrowserContext(storageState: string): Promise<{ context: BrowserContext; close: () => Promise }> {
+ const browser = await chromium.launch({
+ headless: process.env.PLAYWRIGHT_HEADLESS !== 'false',
+ args: BROWSER_ARGS,
+ })
+ let parsedState: unknown
+ try {
+ parsedState = JSON.parse(storageState)
+ } catch {
+ await browser.close()
+ throw new Error('瀏覽器 session 資料損毀,請到連線設定重新同步 Chrome extension')
+ }
+
+ const context = await browser.newContext({
+ storageState: parsedState as BrowserContextOptions['storageState'],
+ userAgent: USER_AGENT,
+ viewport: { width: 1280, height: 900 },
+ locale: 'zh-TW',
+ timezoneId: 'Asia/Taipei',
+ })
+ await context.addInitScript(STEALTH_INIT_SCRIPT)
+ return {
+ context,
+ close: async () => {
+ await context.close()
+ await browser.close()
+ },
+ }
+}
+
+/** Playwright keyword search on threads.com (dev_mode crawler). */
+export async function searchKeywords(
+ storageState: string,
+ query: string,
+ limit = 12,
+): Promise {
+ const keyword = query.trim()
+ if (!keyword) return []
+ if (!storageState.trim()) {
+ throw new Error('找不到 Chrome session,請先到連線頁同步 Threads 登入態')
+ }
+
+ const { context, close } = await createBrowserContext(storageState)
+ try {
+ const page = await context.newPage()
+ const collected: ThreadsPost[] = []
+ attachCollector(page, collected)
+
+ await page.goto('https://www.threads.com/', { waitUntil: 'domcontentloaded', timeout: 45_000 })
+ await humanLandingPause(page)
+ const homeText = await page.locator('body').innerText().catch(() => '')
+ if (page.url().includes('/login') || homeText.includes('登入')) {
+ throw new Error('Session 已失效,請到連線頁重新同步 Chrome Session')
+ }
+
+ const searchUrl = `https://www.threads.com/search?q=${encodeURIComponent(keyword)}&serp_type=default`
+ await page.goto(searchUrl, { waitUntil: 'domcontentloaded', timeout: 45_000 })
+ await humanLandingPause(page)
+
+ const bodyText = await page.locator('body').innerText()
+ if (bodyText.includes('走丟') || bodyText.includes('頁面不存在')) {
+ return []
+ }
+
+ await page.waitForSelector('a[href*="/post/"]', { timeout: 12_000 }).catch(() => undefined)
+ await humanScrollPage(page)
+
+ if (collected.length < 3) {
+ collected.push(...(await extractFromPageScripts(page)))
+ }
+ if (collected.length < 3) {
+ collected.push(...(await scrapeSearchDom(page)))
+ }
+
+ return dedupePosts(collected, limit)
+ } finally {
+ await close()
+ }
+}
\ No newline at end of file