finance-dashboard/lib/companyintel.js

465 lines
20 KiB
JavaScript
Raw Permalink Normal View History

2026-06-03 16:42:07 +00:00
// ═══════════════════════════════════════════════════════════
2026-06-04 09:32:28 +00:00
// companyintel.js — 公司研究資料:管理層、內部人交易、新聞、產業鏈
2026-06-03 16:42:07 +00:00
// ═══════════════════════════════════════════════════════════
2026-06-04 09:32:28 +00:00
import { getCompanyIntelCustom, getCompanyIntelEnriched } from './db.js';
import { localizeIntel, mergeCustomIntel, sanitizeOfficers, isOfficerRow, looksLikePersonName, looksLikeExecutiveTitle } from './companyintel-i18n.js';
import { gatherIntelSources, fetch10kChainHints } from './companyintel-sources.js';
import { mergeIndustryChainWithHints, buildCompanyResources } from './companyintel-links.js';
import {
mergeNewsIntoChain, finalizeIndustryChain, layoutPeersIntoGrid, sanitizeChainExcerpt, ensureDownstreamBuyers,
} from './companyintel-chain.js';
import { applyEnrichedToIntel, syncCompanyIntelEnriched, attachIntelSyncStatus } from './companyintel-ai.js';
import { normalizeNewsList } from './news-text.js';
/** API 快取命中時仍清理新聞欄位(舊快取可能含 Google RSS 跳脫 HTML */
export function sanitizeIntelNewsPayload(payload) {
if (!payload || typeof payload !== 'object') return payload;
const newsTw = normalizeNewsList(payload.newsTw);
const newsGlobal = normalizeNewsList(payload.newsGlobal);
return {
...payload,
newsTw,
newsGlobal,
news: normalizeNewsList(payload.news?.length ? payload.news : [...newsTw, ...newsGlobal]).slice(0, 20),
};
}
import { yahooQuoteSummary, resetYahooAuth, sleep } from './yahoo-session.js';
2026-06-03 16:42:07 +00:00
const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124 Safari/537.36';
const SEC_UA = 'EmmyInvestDashboard/1.0 (personal learning tool; contact@example.com)';
async function text(url, headers = {}, ms = 12000) {
const ctrl = new AbortController();
const timer = setTimeout(() => ctrl.abort(), ms);
try {
const res = await fetch(url, { headers: { 'User-Agent': UA, ...headers }, signal: ctrl.signal });
if (!res.ok) throw new Error(`HTTP ${res.status}`);
return await res.text();
} finally { clearTimeout(timer); }
}
async function json(url, headers = {}, ms = 12000) {
return JSON.parse(await text(url, { Accept: 'application/json,text/plain,*/*', ...headers }, ms));
}
const strip = (s) => String(s || '').replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
const num = (s) => {
if (s == null) return null;
const n = Number(String(s).replace(/[$,%\s,]/g, ''));
return Number.isFinite(n) ? n : null;
};
2026-06-04 09:32:28 +00:00
const tag = (src, name) => src.match(new RegExp(`<${name}>([\\s\S]*?)<\\/${name}>`, 'i'))?.[1]?.trim() || null;
2026-06-03 16:42:07 +00:00
let _tickerMap = null;
async function tickerToCik(symbol) {
if (!_tickerMap) {
const d = await json('https://www.sec.gov/files/company_tickers.json', { 'User-Agent': SEC_UA });
_tickerMap = {};
for (const k of Object.keys(d)) _tickerMap[String(d[k].ticker).toUpperCase()] = { cik: String(d[k].cik_str).padStart(10, '0'), name: d[k].title };
}
return _tickerMap[symbol] || null;
}
async function fetchManagement(symbol) {
try {
2026-06-04 09:32:28 +00:00
const r = await yahooQuoteSummary(symbol, 'assetProfile');
const p = r?.assetProfile || {};
2026-06-03 16:42:07 +00:00
return {
sector: p.sector || null,
industry: p.industry || null,
website: p.website || null,
fullTimeEmployees: p.fullTimeEmployees ?? null,
2026-06-04 09:32:28 +00:00
longBusinessSummary: p.longBusinessSummary || null,
officers: sanitizeOfficers((p.companyOfficers || []).slice(0, 12).map(o => ({
2026-06-03 16:42:07 +00:00
name: o.name || '',
title: o.title || '',
age: o.age ?? null,
fiscalYear: o.fiscalYear ?? null,
totalPay: o.totalPay?.raw ?? null,
2026-06-04 09:32:28 +00:00
}))).filter(o => o.name),
2026-06-03 16:42:07 +00:00
source: 'Yahoo assetProfile',
};
} catch {
return { officers: [], source: null };
}
}
2026-06-04 09:32:28 +00:00
/** Yahoo 限流時重試;仍失敗則用 SEC 10-K僅美股 */
async function resolveManagement(symbol) {
let m = await fetchManagement(symbol);
if ((m.officers || []).length >= 2) return m;
await sleep(800);
resetYahooAuth();
const retry = await fetchManagement(symbol);
if ((retry.officers || []).length > (m.officers || []).length) m = retry;
if ((m.officers || []).length >= 2) return m;
const secOfficers = sanitizeOfficers(await fetchOfficersFromSec10k(symbol).catch(() => []));
if (secOfficers.length) {
return { ...m, officers: secOfficers, source: 'SEC 10-K' };
}
const defOfficers = await fetchOfficersFromDef14a(symbol).catch(() => []);
if (defOfficers.length) {
return { ...m, officers: defOfficers, source: 'SEC DEF 14A' };
}
return m;
}
/** 從股東會說明書DEF 14A抓高管Yahoo10-K 都失敗時用(例如 AAPL */
async function fetchOfficersFromDef14a(symbol) {
const hit = await tickerToCik(symbol);
if (!hit) return [];
const sub = await json(`https://data.sec.gov/submissions/CIK${hit.cik}.json`, { 'User-Agent': SEC_UA });
const f = sub.filings?.recent || {};
let accn = null;
let primary = null;
for (let i = 0; i < (f.form || []).length; i++) {
if (f.form[i] === 'DEF 14A') {
accn = f.accessionNumber[i];
primary = f.primaryDocument?.[i];
break;
}
}
if (!accn || !primary) return [];
const accNo = accn.replace(/-/g, '');
const html = await text(`https://www.sec.gov/Archives/edgar/data/${Number(hit.cik)}/${accNo}/${primary}`, { 'User-Agent': SEC_UA }, 28000);
const uniq = new Map();
const addPair = (name, title) => {
if (!isOfficerRow(name, title)) return;
uniq.set(name.toLowerCase(), { name, title: stripHtml(title), source: 'SEC DEF 14A' });
};
const election = html.match(/Election of Directors:\s*([^<]{20,400})/i)?.[1];
if (election) {
for (const name of election.split(',').map(s => stripHtml(s)).filter(Boolean)) {
if (!looksLikePersonName(name)) continue;
addPair(name, 'Director');
}
}
for (const label of ['Chief Executive Officer', 'Chief Financial Officer', 'Chief Operating Officer', 'Senior Vice President', 'General Counsel']) {
let idx = 0;
while (uniq.size < 14) {
idx = html.indexOf(label, idx);
if (idx < 0) break;
const before = stripHtml(html.slice(Math.max(0, idx - 160), idx));
const nameM = before.match(/([A-Z][a-z]+(?:\s+[A-Z]\.?)?\s+[A-Z][a-z]+)\s*$/);
if (nameM) addPair(nameM[1], label);
idx += label.length;
}
}
for (const name of [...uniq.keys()]) {
const display = uniq.get(name).name;
const pos = html.indexOf(display);
if (pos < 0) continue;
const chunk = html.slice(pos, pos + 520);
const titleM = chunk.match(/((?:Former\s+)?(?:Senior|Executive|Chief|General)[\s\S]{8,120}?)(?=\s*<p|\s*<td|\s*<div|$)/i);
const better = stripHtml(titleM?.[1] || '');
if (better && looksLikeExecutiveTitle(better)) {
uniq.set(name, { name: display, title: better, source: 'SEC DEF 14A' });
}
}
return sanitizeOfficers([...uniq.values()].filter(o => o.title !== 'Director' || uniq.size <= 4).slice(0, 12));
}
const stripHtml = (s) => String(s || '').replace(/<[^>]+>/g, ' ').replace(/&#160;/g, ' ').replace(/\s+/g, ' ').trim();
async function fetchOfficersFromSec10k(symbol) {
const hit = await tickerToCik(symbol);
if (!hit) return [];
const sub = await json(`https://data.sec.gov/submissions/CIK${hit.cik}.json`, { 'User-Agent': SEC_UA });
const f = sub.filings?.recent || {};
let accn = null;
let primary = null;
for (let i = 0; i < (f.form || []).length; i++) {
if (f.form[i] === '10-K') {
accn = f.accessionNumber[i];
primary = f.primaryDocument?.[i];
break;
}
}
if (!accn || !primary) return [];
const accNo = accn.replace(/-/g, '');
const url = `https://www.sec.gov/Archives/edgar/data/${Number(hit.cik)}/${accNo}/${primary}`;
const html = await text(url, { 'User-Agent': SEC_UA }, 25000);
const item10 = html.search(/Item\s*10[\s\S]{0,120}(Executive Officers|Directors)/i);
const slice = item10 >= 0 ? html.slice(item10, item10 + 120000) : html.slice(0, 120000);
const rows = [...slice.matchAll(/<tr[^>]*>([\s\S]*?)<\/tr>/gi)].map(m => m[1]);
const officers = [];
for (const row of rows) {
const cells = [...row.matchAll(/<t[dh][^>]*>([\s\S]*?)<\/t[dh]>/gi)].map(m => stripHtml(m[1]));
if (cells.length < 2) continue;
const title = cells.find(c => /Chief|President|Officer|Counsel|Operations|Financial|Accounting|Field/i.test(c) && c.length < 140);
const name = cells.find(c =>
c.length > 3 && c.length < 70
&& !/Chief|President|Officer|Director|Age|Name|Title|NVIDIA|Common|Stock|Item|Action/i.test(c)
&& /[A-Za-z]/.test(c),
);
if (!isOfficerRow(name, title)) continue;
officers.push({ name, title, source: 'SEC 10-K' });
}
const uniq = new Map();
for (const o of officers) {
const key = o.name.toLowerCase();
if (!uniq.has(key)) uniq.set(key, o);
}
return [...uniq.values()].slice(0, 12);
}
2026-06-03 16:42:07 +00:00
function parseForm4(txt, filing) {
const xml = txt.slice(txt.indexOf('<ownershipDocument'));
const ownerBlock = xml.match(/<reportingOwner>([\s\S]*?)<\/reportingOwner>/i)?.[1] || '';
const issuerBlock = xml.match(/<issuer>([\s\S]*?)<\/issuer>/i)?.[1] || '';
const relBlock = ownerBlock.match(/<reportingOwnerRelationship>([\s\S]*?)<\/reportingOwnerRelationship>/i)?.[1] || '';
const txBlocks = [...xml.matchAll(/<nonDerivativeTransaction>([\s\S]*?)<\/nonDerivativeTransaction>/gi)].map(m => m[1]);
const transactions = txBlocks.slice(0, 8).map(b => ({
date: tag(b, 'transactionDate') ? tag(tag(b, 'transactionDate'), 'value') : null,
code: tag(b, 'transactionCode') || null,
acquiredDisposed: tag(tag(b, 'transactionAcquiredDisposedCode') || '', 'value'),
shares: num(tag(tag(b, 'transactionShares') || '', 'value')),
price: num(tag(tag(b, 'transactionPricePerShare') || '', 'value')),
ownedAfter: num(tag(tag(b, 'sharesOwnedFollowingTransaction') || '', 'value')),
})).filter(t => t.shares != null || t.code);
const acquired = transactions.filter(t => t.acquiredDisposed === 'A').reduce((a, t) => a + (t.shares || 0), 0);
const disposed = transactions.filter(t => t.acquiredDisposed === 'D').reduce((a, t) => a + (t.shares || 0), 0);
return {
filingDate: filing.date,
reportDate: tag(xml, 'periodOfReport'),
owner: tag(ownerBlock, 'rptOwnerName') || strip(txt.match(/COMPANY CONFORMED NAME:\s*([^\n]+)/)?.[1]),
issuer: tag(issuerBlock, 'issuerName'),
title: tag(relBlock, 'officerTitle') || (tag(relBlock, 'isDirector') === '1' ? 'Director' : ''),
isDirector: tag(relBlock, 'isDirector') === '1',
isOfficer: tag(relBlock, 'isOfficer') === '1',
acquired, disposed,
signal: acquired > disposed ? 'acquire' : disposed > acquired ? 'dispose' : 'mixed',
transactions,
url: filing.url,
};
}
2026-06-04 09:32:28 +00:00
2026-06-03 16:42:07 +00:00
async function fetchInsiderTransactions(symbol) {
const hit = await tickerToCik(symbol);
if (!hit) return [];
const sub = await json(`https://data.sec.gov/submissions/CIK${hit.cik}.json`, { 'User-Agent': SEC_UA });
const f = sub.filings?.recent || {};
const filings = [];
for (let i = 0; i < (f.form || []).length && filings.length < 8; i++) {
if (f.form[i] !== '4') continue;
const accn = f.accessionNumber[i];
const accNo = accn.replace(/-/g, '');
filings.push({
date: f.filingDate[i],
accn,
url: `https://www.sec.gov/Archives/edgar/data/${Number(hit.cik)}/${accNo}/${accn}.txt`,
});
}
const out = [];
for (const filing of filings.slice(0, 5)) {
try { out.push(parseForm4(await text(filing.url, { 'User-Agent': SEC_UA }), filing)); }
catch { /* keep going */ }
}
return out;
}
2026-06-04 09:32:28 +00:00
function industryChainFallback(symbol, profile = {}) {
2026-06-03 16:42:07 +00:00
const industry = `${profile.industry || ''} ${profile.sector || ''}`.toLowerCase();
const maps = [
{
match: /semiconductor|chip|accelerated|technology/,
upstream: ['EDA/IP 軟體', '晶圓代工', '先進封裝', 'HBM/記憶體', '半導體設備', 'ABF/載板'],
2026-06-04 09:32:28 +00:00
upstreamNamed: ['TSM', 'ASML', 'AMAT', 'LRCX', 'KLAC', 'MU', 'SNPS', 'CDNS'],
2026-06-03 16:42:07 +00:00
peers: ['AMD', 'AVGO', 'QCOM', 'MRVL', 'TSM', 'ASML', 'MU'],
2026-06-04 09:32:28 +00:00
downstream: ['雲端資料中心', '企業 AI 軟體', '自駕車/機器人', '遊戲與工作站'],
downstreamNamed: [
{ label: 'AI 伺服器 OEM', entities: ['DELL', 'HPE', 'SMCI'], note: '採購 GPU 組裝銷售' },
{ label: '雲端與大型企業', entities: ['MSFT', 'AMZN', 'GOOGL', 'META'], note: '資料中心 GPU 需求' },
],
midstream: { role: '晶片設計GPU 平台', segments: ['資料中心 GPU', '遊戲 GPU', '軟體 CUDA'] },
2026-06-03 16:42:07 +00:00
},
{
match: /software|internet|communication|media/,
upstream: ['雲端基礎設施', '資料中心', '廣告技術', '內容/資料供應商'],
peers: ['MSFT', 'GOOGL', 'META', 'AMZN', 'CRM', 'ORCL'],
downstream: ['企業客戶', '消費者流量', '開發者生態', '廣告主'],
2026-06-04 09:32:28 +00:00
midstream: { role: '軟體/平台', segments: ['訂閱', '廣告', '雲端服務'] },
2026-06-03 16:42:07 +00:00
},
];
const hit = maps.find(m => m.match.test(industry)) || {
2026-06-04 09:32:28 +00:00
upstream: ['原物料/零組件', '設備與服務供應商'],
upstreamNamed: [],
2026-06-03 16:42:07 +00:00
peers: [],
2026-06-04 09:32:28 +00:00
downstream: ['終端客戶', '企業採購', '通路夥伴'],
downstreamNamed: [],
midstream: { role: profile.industry || '核心業務', segments: [] },
2026-06-03 16:42:07 +00:00
};
2026-06-04 09:32:28 +00:00
const upDetail = hit.upstreamNamed?.length
? [{ label: '供應商', entities: hit.upstreamNamed, note: '產業鏈慣例' },
...hit.upstream.map(u => ({ label: u, entities: [u], note: '' }))]
: hit.upstream.map(u => ({ label: u, entities: [u], note: '' }));
2026-06-03 16:42:07 +00:00
return {
upstream: hit.upstream,
2026-06-04 09:32:28 +00:00
upstreamDetail: upDetail,
2026-06-03 16:42:07 +00:00
downstream: hit.downstream,
2026-06-04 09:32:28 +00:00
downstreamDetail: (hit.downstreamNamed?.length
? hit.downstreamNamed.map(d => ({
label: d.label || '購買方',
entities: d.entities || [],
note: d.note || '產業鏈慣例',
}))
: []).concat(hit.downstream.map(d => ({ label: d, entities: [d], note: '' }))),
peers: hit.peers.filter(s => s !== symbol),
2026-06-03 16:42:07 +00:00
};
}
2026-06-04 09:32:28 +00:00
/** 完整同步:多來源新聞 + AI 結構化 + 寫入 DB */
export async function runCompanyIntelSync(symbol, profile = {}, opts = {}) {
const management = await resolveManagement(symbol);
return syncCompanyIntelEnriched(symbol, { ...profile, ...management }, {
force: opts.force === true,
useAI: opts.useAI !== false,
management,
});
}
function buildDataHealth(fields) {
const notes = [];
if (!fields.officers) notes.push('管理層名單未取得(可按「強制更新」重試)');
if (!fields.newsTw && !fields.newsGlobal) notes.push('新聞來源暫時無回應');
if (!fields.insiders && fields.usListing) notes.push('近期無 SEC Form 4 或 CIK 對應失敗');
if (!fields.insiders && !fields.usListing) notes.push('非美股標的,無 SEC 內部人申報');
if (!fields.profileDesc) notes.push('公司簡介待同步後整理為中文');
return { ...fields, notes };
}
export async function getCompanyIntel(symbol, profile = {}, opts = {}) {
2026-06-03 16:42:07 +00:00
symbol = String(symbol || '').trim().toUpperCase();
2026-06-04 09:32:28 +00:00
const management = await resolveManagement(symbol);
const usListing = /^[A-Z][A-Z0-9.\-]{0,7}$/.test(symbol) && !symbol.includes('.');
let bundle = null;
let enrichedRow = getCompanyIntelEnriched(symbol);
if (opts.sync) {
const sync = await runCompanyIntelSync(symbol, { ...profile, ...management }, { force: opts.force, useAI: opts.useAI });
bundle = sync.bundle;
enrichedRow = { data: sync.enriched, sources: sync.sources, updatedAt: Date.now() };
} else if (!enrichedRow) {
bundle = await gatherIntelSources(symbol, { ...profile, name: profile.name, ...management }).catch(() => null);
}
const insiders = usListing
? await fetchInsiderTransactions(symbol).catch(() => [])
: [];
let newsTw = bundle?.newsTw || [];
let newsGlobal = bundle?.newsGlobal || [];
if (!newsTw.length && !newsGlobal.length && !opts.sync) {
const b = await gatherIntelSources(symbol, { ...profile, ...management }).catch(() => null);
if (b) {
newsTw = b.newsTw || [];
newsGlobal = b.newsGlobal || [];
bundle = b;
}
}
const custom = getCompanyIntelCustom(symbol);
let industryChain = industryChainFallback(symbol, { ...profile, ...management });
const hints = bundle?.hints || (usListing && !opts.sync
? await fetch10kChainHints(symbol).catch(() => ({}))
: {});
if (hints && Object.keys(hints).length) {
industryChain = mergeIndustryChainWithHints(
symbol,
industryChain,
hints,
bundle?.profileExt || {},
{ ...profile, ...management },
);
}
let profileZh = management.longBusinessSummary
? { description: management.longBusinessSummary.slice(0, 500), businessModel: management.industry || profile.industry || '' }
: (bundle?.profileExt?.longBusinessSummary
? { description: bundle.profileExt.longBusinessSummary.slice(0, 500), businessModel: bundle.profileExt.industry || '' }
: null);
const raw = {
2026-06-03 16:42:07 +00:00
symbol,
updatedAt: new Date().toISOString(),
2026-06-04 09:32:28 +00:00
profileZh,
management: { ...management, searches: [] },
2026-06-03 16:42:07 +00:00
insiders,
2026-06-04 09:32:28 +00:00
news: normalizeNewsList([...newsTw, ...newsGlobal]).slice(0, 20),
newsTw: normalizeNewsList(newsTw),
newsGlobal: normalizeNewsList(newsGlobal),
managementBrief: (bundle?.managementNewsRaw || []).slice(0, 6).map(n => ({
date: n.created,
headline: n.titleZh || n.title,
summary: (n.descriptionZh || n.description || '').slice(0, 160),
impact: 'neutral',
source: n.publisher,
url: n.url,
})),
industryChain,
sources: [
management.source || 'Yahoo assetProfile',
'SEC Form 4',
'Google 新聞(台灣)',
'Google 新聞(國際)',
'Nasdaq / Yahoo Finance',
...(enrichedRow?.sources || []),
...(custom ? ['本機自訂'] : []),
].filter(Boolean),
customUpdatedAt: custom?.updatedAt ? new Date(custom.updatedAt).toISOString() : null,
enrichedAt: enrichedRow?.updatedAt ? new Date(enrichedRow.updatedAt).toISOString() : null,
aiEnriched: enrichedRow?.data?.aiUsed || false,
enrichSources: enrichedRow?.sources || [],
2026-06-03 16:42:07 +00:00
};
2026-06-04 09:32:28 +00:00
let intel = mergeCustomIntel(localizeIntel(raw), custom?.data);
if (enrichedRow?.data) {
intel = applyEnrichedToIntel(intel, { ...enrichedRow.data, sources: enrichedRow.sources });
}
intel.newsTw = normalizeNewsList(intel.newsTw);
intel.newsGlobal = normalizeNewsList(intel.newsGlobal);
intel.news = normalizeNewsList(intel.news?.length ? intel.news : [...intel.newsTw, ...intel.newsGlobal]).slice(0, 20);
if (hints && Object.keys(hints).length) {
intel.industryChain = mergeIndustryChainWithHints(
symbol,
intel.industryChain,
hints,
bundle?.profileExt || {},
{ ...profile, ...management },
);
}
const allNews = [...(intel.newsTw || []), ...(intel.newsGlobal || []), ...(intel.news || [])];
intel.industryChain = ensureDownstreamBuyers(
layoutPeersIntoGrid(
finalizeIndustryChain(mergeNewsIntoChain(intel.industryChain, allNews, symbol), symbol),
symbol,
),
symbol,
{ ...profile, ...management },
);
if (intel.industryChain.tenKExcerpt) {
intel.industryChain.tenKExcerpt = sanitizeChainExcerpt(intel.industryChain.tenKExcerpt);
}
const resources = usListing
? await buildCompanyResources(symbol, { ...profile, website: management.website }, management).catch(() => [])
: [];
if (hints?.filingUrl) {
resources.unshift({ labelZh: '10-K 年報全文', url: hints.filingUrl, source: 'SEC' });
}
const seenUrl = new Set();
intel.resources = resources.filter(l => {
if (!l?.url || seenUrl.has(l.url)) return false;
seenUrl.add(l.url);
return true;
});
intel.management = { ...intel.management, searches: [], resources };
intel.chainLayout = enrichedRow?.data?.chainLayout || 'upstream_downstream_v2';
intel = attachIntelSyncStatus(intel, symbol);
intel.dataHealth = buildDataHealth({
officers: (intel.management?.officers || []).length > 0,
newsTw: (intel.newsTw || []).length > 0,
newsGlobal: (intel.newsGlobal || []).length > 0,
insiders: insiders.length > 0,
profileDesc: !!(intel.profileZh?.description?.length > 40),
enriched: !!(intel.enrichedAt || intel.aiEnriched),
usListing,
});
return sanitizeIntelNewsPayload(intel);
}