// 公司研究:多來源抓取(台灣/國際新聞、簡介、10-K 供應鏈線索、管理層動態) import { yahooQuoteSummary, yahooFinanceSearchNews } from './yahoo-session.js'; import { cleanNewsPlain, cleanGoogleNewsTitle, parseGoogleRssDescription, normalizeNewsItem, } from './news-text.js'; const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124 Safari/537.36'; const SEC_UA = 'EmmyInvestDashboard/1.0 (personal learning tool; contact@example.com)'; async function text(url, headers = {}, ms = 14000) { const ctrl = new AbortController(); const timer = setTimeout(() => ctrl.abort(), ms); try { const res = await fetch(url, { headers: { 'User-Agent': UA, ...headers }, signal: ctrl.signal }); if (!res.ok) throw new Error(`HTTP ${res.status}`); return await res.text(); } finally { clearTimeout(timer); } } async function json(url, headers = {}, ms = 14000) { return JSON.parse(await text(url, { Accept: 'application/json,text/plain,*/*', ...headers }, ms)); } const strip = (s) => cleanNewsPlain(s); const tag = (block, name) => block.match(new RegExp(`<${name}[^>]*>([\\s\\S]*?)<\\/${name}>`, 'i'))?.[1]?.trim() || ''; function parseGoogleRss(xml, region, limit = 12) { const items = [...String(xml || '').matchAll(/([\s\S]*?)<\/item>/gi)] .map(m => m[1]) .slice(0, limit); return items.map(block => { const title = cleanGoogleNewsTitle(tag(block, 'title')); const link = tag(block, 'link') || (block.match(/]*>([^<]+)<\/link>/i)?.[1] || '').trim(); const pub = tag(block, 'pubDate'); const { anchorText, fontPub } = parseGoogleRssDescription(tag(block, 'description')); const sourceName = cleanNewsPlain(tag(block, 'source')); const publisher = sourceName || fontPub || 'Google 新聞'; let description = ''; if (anchorText && anchorText !== title && anchorText.length > 6 && !/news\.google\.com/i.test(anchorText)) { description = anchorText; } return normalizeNewsItem({ title, titleZh: title, description: description.slice(0, 400), descriptionZh: description.slice(0, 400), url: link, publisher, created: pub ? new Date(pub).toISOString().slice(0, 10) : null, region, source: region === 'tw' ? 'Google 新聞(台灣)' : 'Google 新聞(國際)', }); }).filter(n => n.titleZh && n.url); } export async function fetchTaiwanNews(symbol, companyName) { const queries = [ /NVDA/i.test(symbol) ? '輝達' : null, `${symbol} 台股`, `${symbol} 美股`, companyName && /[\u4e00-\u9fff]/.test(companyName) ? companyName : null, ].filter(Boolean); const seen = new Set(); const out = []; for (const q of queries) { try { const url = `https://news.google.com/rss/search?q=${encodeURIComponent(q)}&hl=zh-TW&gl=TW&ceid=TW:zh-Hant`; const xml = await text(url, { Accept: 'application/rss+xml, application/xml, text/xml, */*' }, 10000); for (const item of parseGoogleRss(xml, 'tw', 15)) { const key = item.url; if (seen.has(key)) continue; seen.add(key); out.push(item); } } catch { /* next query */ } if (out.length >= 12) break; } return out.slice(0, 12); } export async function fetchGlobalNews(symbol) { const out = []; const seen = new Set(); try { const yNews = await yahooFinanceSearchNews(symbol, 14); for (const n of yNews) { const item = normalizeNewsItem({ title: n.title, titleZh: n.title, description: strip(n.summary || ''), descriptionZh: strip(n.summary || ''), url: n.link, publisher: n.publisher || 'Yahoo Finance', created: n.providerPublishTime ? new Date(n.providerPublishTime * 1000).toISOString().slice(0, 10) : null, region: 'global', source: 'Yahoo Finance', }); if (item.url && !seen.has(item.url)) { seen.add(item.url); out.push(item); } } } catch { /* */ } try { const y = await json(`https://query1.finance.yahoo.com/v1/finance/search?q=${encodeURIComponent(symbol)}&newsCount=12"esCount=0`); for (const n of y.news || []) { const item = normalizeNewsItem({ title: n.title, titleZh: n.title, description: strip(n.summary || ''), descriptionZh: strip(n.summary || ''), url: n.link, publisher: n.publisher || 'Yahoo Finance', created: n.providerPublishTime ? new Date(n.providerPublishTime * 1000).toISOString().slice(0, 10) : null, region: 'global', source: 'Yahoo Finance', }); if (item.url && !seen.has(item.url)) { seen.add(item.url); out.push(item); } } } catch { /* */ } for (const q of [`${symbol} stock`, `${symbol} earnings CEO`]) { try { const url = `https://news.google.com/rss/search?q=${encodeURIComponent(q)}&hl=en-US&gl=US&ceid=US:en`; const xml = await text(url, {}, 10000); for (const item of parseGoogleRss(xml, 'global', 10)) { if (seen.has(item.url)) continue; seen.add(item.url); out.push(item); } } catch { /* */ } if (out.length >= 14) break; } try { const d = await json(`https://api.nasdaq.com/api/news/topic/articlebysymbol?q=${encodeURIComponent(symbol)}|stocks&offset=0&limit=8&fallback=true`, { Accept: 'application/json', Origin: 'https://www.nasdaq.com', Referer: 'https://www.nasdaq.com/', }); for (const r of d?.data?.rows || []) { const url = r.url ? (r.url.startsWith('http') ? r.url : `https://www.nasdaq.com${r.url}`) : null; if (!url || seen.has(url)) continue; seen.add(url); out.push(normalizeNewsItem({ title: r.title, titleZh: r.title, description: strip(r.description || ''), descriptionZh: strip(r.description || ''), url, publisher: r.publisher || 'Nasdaq', created: r.created || r.ago, region: 'global', source: 'Nasdaq', })); } } catch { /* */ } return out.slice(0, 14); } let _tickerMap = null; async function tickerToCik(symbol) { if (!_tickerMap) { const d = await json('https://www.sec.gov/files/company_tickers.json', { 'User-Agent': SEC_UA }); _tickerMap = {}; for (const k of Object.keys(d)) _tickerMap[String(d[k].ticker).toUpperCase()] = { cik: String(d[k].cik_str).padStart(10, '0'), name: d[k].title }; } return _tickerMap[symbol] || null; } export async function fetchCompanyProfileExtended(symbol, seed = {}) { if (seed.longBusinessSummary && seed.sector) { return { symbol, longBusinessSummary: seed.longBusinessSummary, website: seed.website || null, sector: seed.sector, industry: seed.industry || null, country: seed.country || null, employees: seed.fullTimeEmployees ?? null, peers: seed.peers || [], source: seed.source || 'Yahoo assetProfile', }; } let profile = { symbol, longBusinessSummary: null, website: null, sector: null, industry: null, country: null, employees: null, peers: [] }; try { const d = await yahooQuoteSummary(symbol, 'assetProfile,summaryProfile,peer'); const p = d?.assetProfile || {}; const sp = d?.summaryProfile || {}; const peers = (d?.peer?.symbols || []) .map(s => String(s).split('.').pop()?.toUpperCase()).filter(s => s && s !== symbol); profile = { symbol, longBusinessSummary: p.longBusinessSummary || sp.longBusinessSummary || null, website: p.website || sp.website || null, sector: p.sector || sp.sector || null, industry: p.industry || sp.industry || null, country: p.country || sp.country || null, employees: p.fullTimeEmployees ?? sp.fullTimeEmployees ?? null, peers: [...new Set(peers)].slice(0, 12), source: 'Yahoo quoteSummary', }; } catch { /* */ } return profile; } function extractNamedEntities(section) { const names = new Set(); const patterns = [ /(?:customers?|clients?|suppliers?|competitors?|partners?)[^.]{0,400}/gi, /\b([A-Z][A-Za-z0-9&.\- ]{2,40}(?:Inc\.|Corp\.|Corporation|Ltd\.|LLC|Co\.))/g, ]; for (const re of patterns) { for (const m of section.matchAll(re)) { const chunk = m[1] || m[0]; const hits = chunk.match(/\b([A-Z][A-Za-z0-9&.\- ]{2,35}(?:Inc\.|Corp\.|Corporation|Ltd\.|LLC|Co\.))/g) || []; for (const h of hits) { const n = h.trim(); if (n.length > 3 && n.length < 50) names.add(n); } } } return [...names].slice(0, 15); } function extract10kSuppliers(plain) { const names = new Set(); const chunks = [ plain.match(/(?:suppliers?|supply\s+chain|sole\s+supplier|third[- ]party\s+manufactur)[^.]{0,2000}/gi) || [], plain.match(/(?:we\s+(?:rely|depend)\s+(?:on|upon)\s+)[^.]{0,800}/gi) || [], plain.match(/(?:contract\s+manufactur|foundry)[^.]{0,1200}/gi) || [], ].flat(); for (const block of chunks) { for (const n of extractNamedEntities(block)) names.add(n); for (const m of block.matchAll(/\b(TSMC|Taiwan Semiconductor|Samsung|SK\s*Hynix|Micron|ASML|Synopsys|Cadence|Foxconn|Hon\s*Hai)\b/gi)) { names.add(m[1].trim()); } } return [...names].slice(0, 18); } function extract10kCustomers(plain) { const names = new Set(); const chunks = plain.match(/(?:major\s+customers?|principal\s+customers?|customers?\s+include|accounted\s+for\s+\d+%)[^.]{0,2000}/gi) || []; for (const block of chunks) { for (const n of extractNamedEntities(block)) names.add(n); for (const m of block.matchAll(/\b(Microsoft|Amazon|Google|Alphabet|Meta|Apple|Tesla|Oracle)\b/gi)) { names.add(m[1].trim()); } for (const m of block.matchAll(/\b(Dell\s+Technologies|Hewlett[\s-]?Packard\s+Enterprise|Super\s*Micro\s+Computer|Lenovo|Cisco)\b/gi)) { names.add(m[1].trim()); } } return [...names].slice(0, 18); } export async function fetch10kChainHints(symbol) { const hit = await tickerToCik(symbol); if (!hit) return { excerpt: null, customers: [], suppliers: [], competitors: [] }; const sub = await json(`https://data.sec.gov/submissions/CIK${hit.cik}.json`, { 'User-Agent': SEC_UA }); const f = sub.filings?.recent || {}; let accn = null; let primary = null; for (let i = 0; i < (f.form || []).length; i++) { if (f.form[i] === '10-K') { accn = f.accessionNumber[i]; primary = f.primaryDocument?.[i]; break; } } if (!accn || !primary) return { excerpt: null, customers: [], suppliers: [], competitors: [] }; const accNo = accn.replace(/-/g, ''); const url = `https://www.sec.gov/Archives/edgar/data/${Number(hit.cik)}/${accNo}/${primary}`; const html = await text(url, { 'User-Agent': SEC_UA }, 28000); const plain = strip(html).slice(0, 180000); const custSec = plain.match(/(?:major customers?|principal customers?|customers? include)[^.]{0,1200}/i)?.[0] || ''; const supSec = plain.match(/(?:suppliers?|supply chain|manufacturing)[^.]{0,1200}/i)?.[0] || ''; const compSec = plain.match(/(?:competition|competitors?)[^.]{0,1200}/i)?.[0] || ''; const bizSec = plain.match(/(?:business overview|description of business)[^.]{0,2500}/i)?.[0] || plain.slice(0, 2500); const customers = [...new Set([...extractNamedEntities(custSec), ...extract10kCustomers(plain)])]; const suppliers = [...new Set([...extractNamedEntities(supSec), ...extract10kSuppliers(plain)])]; return { excerpt: bizSec.slice(0, 2000), customers, suppliers, competitors: extractNamedEntities(compSec), source: 'SEC 10-K', filingUrl: url, companyName: hit.name, }; } const MGMT_KW = /chief executive|ceo|cfo|coo|president|board|director|executive|resign|appoint|compensation|guidance|layoff|restructur|merger|acquisition|investigation|subpoena|執行長|財務長|董事|人事|裁員|併購|收購|指引|調查/i; export function filterManagementNews(news) { return (news || []).filter(n => MGMT_KW.test(`${n.title} ${n.description}`)).slice(0, 10); } export async function fetchRecent8kHeadlines(symbol) { const hit = await tickerToCik(symbol); if (!hit) return []; const sub = await json(`https://data.sec.gov/submissions/CIK${hit.cik}.json`, { 'User-Agent': SEC_UA }); const f = sub.filings?.recent || {}; const out = []; for (let i = 0; i < (f.form || []).length && out.length < 8; i++) { if (!/^8-K/i.test(f.form[i])) continue; out.push({ form: f.form[i], filedDate: f.filingDate[i], description: f.primaryDocDescription?.[i] || '', accession: f.accessionNumber[i], url: `https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=${hit.cik}&type=8-K&dateb=&owner=include&count=40`, }); } return out; } export async function gatherIntelSources(symbol, profile = {}) { symbol = String(symbol || '').trim().toUpperCase(); const [profileExt, hints, headlines] = await Promise.all([ fetchCompanyProfileExtended(symbol, profile).catch(() => ({})), fetch10kChainHints(symbol).catch(() => ({})), fetchRecent8kHeadlines(symbol).catch(() => []), ]); const companyName = profile.name || profile.companyName || hints?.companyName || null; const [newsTw, newsGlobal] = await Promise.all([ fetchTaiwanNews(symbol, companyName).catch(() => []), fetchGlobalNews(symbol).catch(() => []), ]); const mgmtRaw = filterManagementNews([...newsTw, ...newsGlobal]); return { symbol, gatheredAt: new Date().toISOString(), profileExt, hints, headlines8k: headlines, newsTw, newsGlobal, managementNewsRaw: mgmtRaw, companyName: companyName || hints?.companyName || profileExt?.symbol, }; }