// ═══════════════════════════════════════════════════════════ // companyintel.js — 公司研究資料:管理層、內部人交易、新聞、產業鏈 // ═══════════════════════════════════════════════════════════ import { getCompanyIntelCustom, getCompanyIntelEnriched } from './db.js'; import { localizeIntel, mergeCustomIntel, sanitizeOfficers, isOfficerRow, looksLikePersonName, looksLikeExecutiveTitle } from './companyintel-i18n.js'; import { gatherIntelSources, fetch10kChainHints } from './companyintel-sources.js'; import { mergeIndustryChainWithHints, buildCompanyResources } from './companyintel-links.js'; import { mergeNewsIntoChain, finalizeIndustryChain, layoutPeersIntoGrid, sanitizeChainExcerpt, ensureDownstreamBuyers, } from './companyintel-chain.js'; import { applyEnrichedToIntel, syncCompanyIntelEnriched, attachIntelSyncStatus } from './companyintel-ai.js'; import { normalizeNewsList } from './news-text.js'; /** API 快取命中時仍清理新聞欄位(舊快取可能含 Google RSS 跳脫 HTML) */ export function sanitizeIntelNewsPayload(payload) { if (!payload || typeof payload !== 'object') return payload; const newsTw = normalizeNewsList(payload.newsTw); const newsGlobal = normalizeNewsList(payload.newsGlobal); return { ...payload, newsTw, newsGlobal, news: normalizeNewsList(payload.news?.length ? payload.news : [...newsTw, ...newsGlobal]).slice(0, 20), }; } import { yahooQuoteSummary, resetYahooAuth, sleep } from './yahoo-session.js'; const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124 Safari/537.36'; const SEC_UA = 'EmmyInvestDashboard/1.0 (personal learning tool; contact@example.com)'; async function text(url, headers = {}, ms = 12000) { const ctrl = new AbortController(); const timer = setTimeout(() => ctrl.abort(), ms); try { const res = await fetch(url, { headers: { 'User-Agent': UA, ...headers }, signal: ctrl.signal }); if (!res.ok) throw new Error(`HTTP ${res.status}`); return await res.text(); } finally { clearTimeout(timer); } } async function json(url, headers = {}, ms = 12000) { return JSON.parse(await text(url, { Accept: 'application/json,text/plain,*/*', ...headers }, ms)); } const strip = (s) => String(s || '').replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim(); const num = (s) => { if (s == null) return null; const n = Number(String(s).replace(/[$,%\s,]/g, '')); return Number.isFinite(n) ? n : null; }; const tag = (src, name) => src.match(new RegExp(`<${name}>([\\s\S]*?)<\\/${name}>`, 'i'))?.[1]?.trim() || null; let _tickerMap = null; async function tickerToCik(symbol) { if (!_tickerMap) { const d = await json('https://www.sec.gov/files/company_tickers.json', { 'User-Agent': SEC_UA }); _tickerMap = {}; for (const k of Object.keys(d)) _tickerMap[String(d[k].ticker).toUpperCase()] = { cik: String(d[k].cik_str).padStart(10, '0'), name: d[k].title }; } return _tickerMap[symbol] || null; } async function fetchManagement(symbol) { try { const r = await yahooQuoteSummary(symbol, 'assetProfile'); const p = r?.assetProfile || {}; return { sector: p.sector || null, industry: p.industry || null, website: p.website || null, fullTimeEmployees: p.fullTimeEmployees ?? null, longBusinessSummary: p.longBusinessSummary || null, officers: sanitizeOfficers((p.companyOfficers || []).slice(0, 12).map(o => ({ name: o.name || '', title: o.title || '', age: o.age ?? null, fiscalYear: o.fiscalYear ?? null, totalPay: o.totalPay?.raw ?? null, }))).filter(o => o.name), source: 'Yahoo assetProfile', }; } catch { return { officers: [], source: null }; } } /** Yahoo 限流時重試;仍失敗則用 SEC 10-K(僅美股) */ async function resolveManagement(symbol) { let m = await fetchManagement(symbol); if ((m.officers || []).length >= 2) return m; await sleep(800); resetYahooAuth(); const retry = await fetchManagement(symbol); if ((retry.officers || []).length > (m.officers || []).length) m = retry; if ((m.officers || []).length >= 2) return m; const secOfficers = sanitizeOfficers(await fetchOfficersFromSec10k(symbol).catch(() => [])); if (secOfficers.length) { return { ...m, officers: secOfficers, source: 'SEC 10-K' }; } const defOfficers = await fetchOfficersFromDef14a(symbol).catch(() => []); if (defOfficers.length) { return { ...m, officers: defOfficers, source: 'SEC DEF 14A' }; } return m; } /** 從股東會說明書(DEF 14A)抓高管:Yahoo/10-K 都失敗時用(例如 AAPL) */ async function fetchOfficersFromDef14a(symbol) { const hit = await tickerToCik(symbol); if (!hit) return []; const sub = await json(`https://data.sec.gov/submissions/CIK${hit.cik}.json`, { 'User-Agent': SEC_UA }); const f = sub.filings?.recent || {}; let accn = null; let primary = null; for (let i = 0; i < (f.form || []).length; i++) { if (f.form[i] === 'DEF 14A') { accn = f.accessionNumber[i]; primary = f.primaryDocument?.[i]; break; } } if (!accn || !primary) return []; const accNo = accn.replace(/-/g, ''); const html = await text(`https://www.sec.gov/Archives/edgar/data/${Number(hit.cik)}/${accNo}/${primary}`, { 'User-Agent': SEC_UA }, 28000); const uniq = new Map(); const addPair = (name, title) => { if (!isOfficerRow(name, title)) return; uniq.set(name.toLowerCase(), { name, title: stripHtml(title), source: 'SEC DEF 14A' }); }; const election = html.match(/Election of Directors:\s*([^<]{20,400})/i)?.[1]; if (election) { for (const name of election.split(',').map(s => stripHtml(s)).filter(Boolean)) { if (!looksLikePersonName(name)) continue; addPair(name, 'Director'); } } for (const label of ['Chief Executive Officer', 'Chief Financial Officer', 'Chief Operating Officer', 'Senior Vice President', 'General Counsel']) { let idx = 0; while (uniq.size < 14) { idx = html.indexOf(label, idx); if (idx < 0) break; const before = stripHtml(html.slice(Math.max(0, idx - 160), idx)); const nameM = before.match(/([A-Z][a-z]+(?:\s+[A-Z]\.?)?\s+[A-Z][a-z]+)\s*$/); if (nameM) addPair(nameM[1], label); idx += label.length; } } for (const name of [...uniq.keys()]) { const display = uniq.get(name).name; const pos = html.indexOf(display); if (pos < 0) continue; const chunk = html.slice(pos, pos + 520); const titleM = chunk.match(/((?:Former\s+)?(?:Senior|Executive|Chief|General)[\s\S]{8,120}?)(?=\s* o.title !== 'Director' || uniq.size <= 4).slice(0, 12)); } const stripHtml = (s) => String(s || '').replace(/<[^>]+>/g, ' ').replace(/ /g, ' ').replace(/\s+/g, ' ').trim(); async function fetchOfficersFromSec10k(symbol) { const hit = await tickerToCik(symbol); if (!hit) return []; const sub = await json(`https://data.sec.gov/submissions/CIK${hit.cik}.json`, { 'User-Agent': SEC_UA }); const f = sub.filings?.recent || {}; let accn = null; let primary = null; for (let i = 0; i < (f.form || []).length; i++) { if (f.form[i] === '10-K') { accn = f.accessionNumber[i]; primary = f.primaryDocument?.[i]; break; } } if (!accn || !primary) return []; const accNo = accn.replace(/-/g, ''); const url = `https://www.sec.gov/Archives/edgar/data/${Number(hit.cik)}/${accNo}/${primary}`; const html = await text(url, { 'User-Agent': SEC_UA }, 25000); const item10 = html.search(/Item\s*10[\s\S]{0,120}(Executive Officers|Directors)/i); const slice = item10 >= 0 ? html.slice(item10, item10 + 120000) : html.slice(0, 120000); const rows = [...slice.matchAll(/]*>([\s\S]*?)<\/tr>/gi)].map(m => m[1]); const officers = []; for (const row of rows) { const cells = [...row.matchAll(/]*>([\s\S]*?)<\/t[dh]>/gi)].map(m => stripHtml(m[1])); if (cells.length < 2) continue; const title = cells.find(c => /Chief|President|Officer|Counsel|Operations|Financial|Accounting|Field/i.test(c) && c.length < 140); const name = cells.find(c => c.length > 3 && c.length < 70 && !/Chief|President|Officer|Director|Age|Name|Title|NVIDIA|Common|Stock|Item|Action/i.test(c) && /[A-Za-z]/.test(c), ); if (!isOfficerRow(name, title)) continue; officers.push({ name, title, source: 'SEC 10-K' }); } const uniq = new Map(); for (const o of officers) { const key = o.name.toLowerCase(); if (!uniq.has(key)) uniq.set(key, o); } return [...uniq.values()].slice(0, 12); } function parseForm4(txt, filing) { const xml = txt.slice(txt.indexOf('([\s\S]*?)<\/reportingOwner>/i)?.[1] || ''; const issuerBlock = xml.match(/([\s\S]*?)<\/issuer>/i)?.[1] || ''; const relBlock = ownerBlock.match(/([\s\S]*?)<\/reportingOwnerRelationship>/i)?.[1] || ''; const txBlocks = [...xml.matchAll(/([\s\S]*?)<\/nonDerivativeTransaction>/gi)].map(m => m[1]); const transactions = txBlocks.slice(0, 8).map(b => ({ date: tag(b, 'transactionDate') ? tag(tag(b, 'transactionDate'), 'value') : null, code: tag(b, 'transactionCode') || null, acquiredDisposed: tag(tag(b, 'transactionAcquiredDisposedCode') || '', 'value'), shares: num(tag(tag(b, 'transactionShares') || '', 'value')), price: num(tag(tag(b, 'transactionPricePerShare') || '', 'value')), ownedAfter: num(tag(tag(b, 'sharesOwnedFollowingTransaction') || '', 'value')), })).filter(t => t.shares != null || t.code); const acquired = transactions.filter(t => t.acquiredDisposed === 'A').reduce((a, t) => a + (t.shares || 0), 0); const disposed = transactions.filter(t => t.acquiredDisposed === 'D').reduce((a, t) => a + (t.shares || 0), 0); return { filingDate: filing.date, reportDate: tag(xml, 'periodOfReport'), owner: tag(ownerBlock, 'rptOwnerName') || strip(txt.match(/COMPANY CONFORMED NAME:\s*([^\n]+)/)?.[1]), issuer: tag(issuerBlock, 'issuerName'), title: tag(relBlock, 'officerTitle') || (tag(relBlock, 'isDirector') === '1' ? 'Director' : ''), isDirector: tag(relBlock, 'isDirector') === '1', isOfficer: tag(relBlock, 'isOfficer') === '1', acquired, disposed, signal: acquired > disposed ? 'acquire' : disposed > acquired ? 'dispose' : 'mixed', transactions, url: filing.url, }; } async function fetchInsiderTransactions(symbol) { const hit = await tickerToCik(symbol); if (!hit) return []; const sub = await json(`https://data.sec.gov/submissions/CIK${hit.cik}.json`, { 'User-Agent': SEC_UA }); const f = sub.filings?.recent || {}; const filings = []; for (let i = 0; i < (f.form || []).length && filings.length < 8; i++) { if (f.form[i] !== '4') continue; const accn = f.accessionNumber[i]; const accNo = accn.replace(/-/g, ''); filings.push({ date: f.filingDate[i], accn, url: `https://www.sec.gov/Archives/edgar/data/${Number(hit.cik)}/${accNo}/${accn}.txt`, }); } const out = []; for (const filing of filings.slice(0, 5)) { try { out.push(parseForm4(await text(filing.url, { 'User-Agent': SEC_UA }), filing)); } catch { /* keep going */ } } return out; } function industryChainFallback(symbol, profile = {}) { const industry = `${profile.industry || ''} ${profile.sector || ''}`.toLowerCase(); const maps = [ { match: /semiconductor|chip|accelerated|technology/, upstream: ['EDA/IP 軟體', '晶圓代工', '先進封裝', 'HBM/記憶體', '半導體設備', 'ABF/載板'], upstreamNamed: ['TSM', 'ASML', 'AMAT', 'LRCX', 'KLAC', 'MU', 'SNPS', 'CDNS'], peers: ['AMD', 'AVGO', 'QCOM', 'MRVL', 'TSM', 'ASML', 'MU'], downstream: ['雲端資料中心', '企業 AI 軟體', '自駕車/機器人', '遊戲與工作站'], downstreamNamed: [ { label: 'AI 伺服器 OEM', entities: ['DELL', 'HPE', 'SMCI'], note: '採購 GPU 組裝銷售' }, { label: '雲端與大型企業', entities: ['MSFT', 'AMZN', 'GOOGL', 'META'], note: '資料中心 GPU 需求' }, ], midstream: { role: '晶片設計/GPU 平台', segments: ['資料中心 GPU', '遊戲 GPU', '軟體 CUDA'] }, }, { match: /software|internet|communication|media/, upstream: ['雲端基礎設施', '資料中心', '廣告技術', '內容/資料供應商'], peers: ['MSFT', 'GOOGL', 'META', 'AMZN', 'CRM', 'ORCL'], downstream: ['企業客戶', '消費者流量', '開發者生態', '廣告主'], midstream: { role: '軟體/平台', segments: ['訂閱', '廣告', '雲端服務'] }, }, ]; const hit = maps.find(m => m.match.test(industry)) || { upstream: ['原物料/零組件', '設備與服務供應商'], upstreamNamed: [], peers: [], downstream: ['終端客戶', '企業採購', '通路夥伴'], downstreamNamed: [], midstream: { role: profile.industry || '核心業務', segments: [] }, }; const upDetail = hit.upstreamNamed?.length ? [{ label: '供應商', entities: hit.upstreamNamed, note: '產業鏈慣例' }, ...hit.upstream.map(u => ({ label: u, entities: [u], note: '' }))] : hit.upstream.map(u => ({ label: u, entities: [u], note: '' })); return { upstream: hit.upstream, upstreamDetail: upDetail, downstream: hit.downstream, downstreamDetail: (hit.downstreamNamed?.length ? hit.downstreamNamed.map(d => ({ label: d.label || '購買方', entities: d.entities || [], note: d.note || '產業鏈慣例', })) : []).concat(hit.downstream.map(d => ({ label: d, entities: [d], note: '' }))), peers: hit.peers.filter(s => s !== symbol), }; } /** 完整同步:多來源新聞 + AI 結構化 + 寫入 DB */ export async function runCompanyIntelSync(symbol, profile = {}, opts = {}) { const management = await resolveManagement(symbol); return syncCompanyIntelEnriched(symbol, { ...profile, ...management }, { force: opts.force === true, useAI: opts.useAI !== false, management, }); } function buildDataHealth(fields) { const notes = []; if (!fields.officers) notes.push('管理層名單未取得(可按「強制更新」重試)'); if (!fields.newsTw && !fields.newsGlobal) notes.push('新聞來源暫時無回應'); if (!fields.insiders && fields.usListing) notes.push('近期無 SEC Form 4 或 CIK 對應失敗'); if (!fields.insiders && !fields.usListing) notes.push('非美股標的,無 SEC 內部人申報'); if (!fields.profileDesc) notes.push('公司簡介待同步後整理為中文'); return { ...fields, notes }; } export async function getCompanyIntel(symbol, profile = {}, opts = {}) { symbol = String(symbol || '').trim().toUpperCase(); const management = await resolveManagement(symbol); const usListing = /^[A-Z][A-Z0-9.\-]{0,7}$/.test(symbol) && !symbol.includes('.'); let bundle = null; let enrichedRow = getCompanyIntelEnriched(symbol); if (opts.sync) { const sync = await runCompanyIntelSync(symbol, { ...profile, ...management }, { force: opts.force, useAI: opts.useAI }); bundle = sync.bundle; enrichedRow = { data: sync.enriched, sources: sync.sources, updatedAt: Date.now() }; } else if (!enrichedRow) { bundle = await gatherIntelSources(symbol, { ...profile, name: profile.name, ...management }).catch(() => null); } const insiders = usListing ? await fetchInsiderTransactions(symbol).catch(() => []) : []; let newsTw = bundle?.newsTw || []; let newsGlobal = bundle?.newsGlobal || []; if (!newsTw.length && !newsGlobal.length && !opts.sync) { const b = await gatherIntelSources(symbol, { ...profile, ...management }).catch(() => null); if (b) { newsTw = b.newsTw || []; newsGlobal = b.newsGlobal || []; bundle = b; } } const custom = getCompanyIntelCustom(symbol); let industryChain = industryChainFallback(symbol, { ...profile, ...management }); const hints = bundle?.hints || (usListing && !opts.sync ? await fetch10kChainHints(symbol).catch(() => ({})) : {}); if (hints && Object.keys(hints).length) { industryChain = mergeIndustryChainWithHints( symbol, industryChain, hints, bundle?.profileExt || {}, { ...profile, ...management }, ); } let profileZh = management.longBusinessSummary ? { description: management.longBusinessSummary.slice(0, 500), businessModel: management.industry || profile.industry || '' } : (bundle?.profileExt?.longBusinessSummary ? { description: bundle.profileExt.longBusinessSummary.slice(0, 500), businessModel: bundle.profileExt.industry || '' } : null); const raw = { symbol, updatedAt: new Date().toISOString(), profileZh, management: { ...management, searches: [] }, insiders, news: normalizeNewsList([...newsTw, ...newsGlobal]).slice(0, 20), newsTw: normalizeNewsList(newsTw), newsGlobal: normalizeNewsList(newsGlobal), managementBrief: (bundle?.managementNewsRaw || []).slice(0, 6).map(n => ({ date: n.created, headline: n.titleZh || n.title, summary: (n.descriptionZh || n.description || '').slice(0, 160), impact: 'neutral', source: n.publisher, url: n.url, })), industryChain, sources: [ management.source || 'Yahoo assetProfile', 'SEC Form 4', 'Google 新聞(台灣)', 'Google 新聞(國際)', 'Nasdaq / Yahoo Finance', ...(enrichedRow?.sources || []), ...(custom ? ['本機自訂'] : []), ].filter(Boolean), customUpdatedAt: custom?.updatedAt ? new Date(custom.updatedAt).toISOString() : null, enrichedAt: enrichedRow?.updatedAt ? new Date(enrichedRow.updatedAt).toISOString() : null, aiEnriched: enrichedRow?.data?.aiUsed || false, enrichSources: enrichedRow?.sources || [], }; let intel = mergeCustomIntel(localizeIntel(raw), custom?.data); if (enrichedRow?.data) { intel = applyEnrichedToIntel(intel, { ...enrichedRow.data, sources: enrichedRow.sources }); } intel.newsTw = normalizeNewsList(intel.newsTw); intel.newsGlobal = normalizeNewsList(intel.newsGlobal); intel.news = normalizeNewsList(intel.news?.length ? intel.news : [...intel.newsTw, ...intel.newsGlobal]).slice(0, 20); if (hints && Object.keys(hints).length) { intel.industryChain = mergeIndustryChainWithHints( symbol, intel.industryChain, hints, bundle?.profileExt || {}, { ...profile, ...management }, ); } const allNews = [...(intel.newsTw || []), ...(intel.newsGlobal || []), ...(intel.news || [])]; intel.industryChain = ensureDownstreamBuyers( layoutPeersIntoGrid( finalizeIndustryChain(mergeNewsIntoChain(intel.industryChain, allNews, symbol), symbol), symbol, ), symbol, { ...profile, ...management }, ); if (intel.industryChain.tenKExcerpt) { intel.industryChain.tenKExcerpt = sanitizeChainExcerpt(intel.industryChain.tenKExcerpt); } const resources = usListing ? await buildCompanyResources(symbol, { ...profile, website: management.website }, management).catch(() => []) : []; if (hints?.filingUrl) { resources.unshift({ labelZh: '10-K 年報全文', url: hints.filingUrl, source: 'SEC' }); } const seenUrl = new Set(); intel.resources = resources.filter(l => { if (!l?.url || seenUrl.has(l.url)) return false; seenUrl.add(l.url); return true; }); intel.management = { ...intel.management, searches: [], resources }; intel.chainLayout = enrichedRow?.data?.chainLayout || 'upstream_downstream_v2'; intel = attachIntelSyncStatus(intel, symbol); intel.dataHealth = buildDataHealth({ officers: (intel.management?.officers || []).length > 0, newsTw: (intel.newsTw || []).length > 0, newsGlobal: (intel.newsGlobal || []).length > 0, insiders: insiders.length > 0, profileDesc: !!(intel.profileZh?.description?.length > 40), enriched: !!(intel.enrichedAt || intel.aiEnriched), usListing, }); return sanitizeIntelNewsPayload(intel); }