finance-dashboard/lib/companyintel.js

465 lines
20 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// ═══════════════════════════════════════════════════════════
// companyintel.js — 公司研究資料:管理層、內部人交易、新聞、產業鏈
// ═══════════════════════════════════════════════════════════
import { getCompanyIntelCustom, getCompanyIntelEnriched } from './db.js';
import { localizeIntel, mergeCustomIntel, sanitizeOfficers, isOfficerRow, looksLikePersonName, looksLikeExecutiveTitle } from './companyintel-i18n.js';
import { gatherIntelSources, fetch10kChainHints } from './companyintel-sources.js';
import { mergeIndustryChainWithHints, buildCompanyResources } from './companyintel-links.js';
import {
mergeNewsIntoChain, finalizeIndustryChain, layoutPeersIntoGrid, sanitizeChainExcerpt, ensureDownstreamBuyers,
} from './companyintel-chain.js';
import { applyEnrichedToIntel, syncCompanyIntelEnriched, attachIntelSyncStatus } from './companyintel-ai.js';
import { normalizeNewsList } from './news-text.js';
/** API 快取命中時仍清理新聞欄位(舊快取可能含 Google RSS 跳脫 HTML */
export function sanitizeIntelNewsPayload(payload) {
if (!payload || typeof payload !== 'object') return payload;
const newsTw = normalizeNewsList(payload.newsTw);
const newsGlobal = normalizeNewsList(payload.newsGlobal);
return {
...payload,
newsTw,
newsGlobal,
news: normalizeNewsList(payload.news?.length ? payload.news : [...newsTw, ...newsGlobal]).slice(0, 20),
};
}
import { yahooQuoteSummary, resetYahooAuth, sleep } from './yahoo-session.js';
const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124 Safari/537.36';
const SEC_UA = 'EmmyInvestDashboard/1.0 (personal learning tool; contact@example.com)';
async function text(url, headers = {}, ms = 12000) {
const ctrl = new AbortController();
const timer = setTimeout(() => ctrl.abort(), ms);
try {
const res = await fetch(url, { headers: { 'User-Agent': UA, ...headers }, signal: ctrl.signal });
if (!res.ok) throw new Error(`HTTP ${res.status}`);
return await res.text();
} finally { clearTimeout(timer); }
}
async function json(url, headers = {}, ms = 12000) {
return JSON.parse(await text(url, { Accept: 'application/json,text/plain,*/*', ...headers }, ms));
}
const strip = (s) => String(s || '').replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
const num = (s) => {
if (s == null) return null;
const n = Number(String(s).replace(/[$,%\s,]/g, ''));
return Number.isFinite(n) ? n : null;
};
const tag = (src, name) => src.match(new RegExp(`<${name}>([\\s\S]*?)<\\/${name}>`, 'i'))?.[1]?.trim() || null;
let _tickerMap = null;
async function tickerToCik(symbol) {
if (!_tickerMap) {
const d = await json('https://www.sec.gov/files/company_tickers.json', { 'User-Agent': SEC_UA });
_tickerMap = {};
for (const k of Object.keys(d)) _tickerMap[String(d[k].ticker).toUpperCase()] = { cik: String(d[k].cik_str).padStart(10, '0'), name: d[k].title };
}
return _tickerMap[symbol] || null;
}
async function fetchManagement(symbol) {
try {
const r = await yahooQuoteSummary(symbol, 'assetProfile');
const p = r?.assetProfile || {};
return {
sector: p.sector || null,
industry: p.industry || null,
website: p.website || null,
fullTimeEmployees: p.fullTimeEmployees ?? null,
longBusinessSummary: p.longBusinessSummary || null,
officers: sanitizeOfficers((p.companyOfficers || []).slice(0, 12).map(o => ({
name: o.name || '',
title: o.title || '',
age: o.age ?? null,
fiscalYear: o.fiscalYear ?? null,
totalPay: o.totalPay?.raw ?? null,
}))).filter(o => o.name),
source: 'Yahoo assetProfile',
};
} catch {
return { officers: [], source: null };
}
}
/** Yahoo 限流時重試;仍失敗則用 SEC 10-K僅美股 */
async function resolveManagement(symbol) {
let m = await fetchManagement(symbol);
if ((m.officers || []).length >= 2) return m;
await sleep(800);
resetYahooAuth();
const retry = await fetchManagement(symbol);
if ((retry.officers || []).length > (m.officers || []).length) m = retry;
if ((m.officers || []).length >= 2) return m;
const secOfficers = sanitizeOfficers(await fetchOfficersFromSec10k(symbol).catch(() => []));
if (secOfficers.length) {
return { ...m, officers: secOfficers, source: 'SEC 10-K' };
}
const defOfficers = await fetchOfficersFromDef14a(symbol).catch(() => []);
if (defOfficers.length) {
return { ...m, officers: defOfficers, source: 'SEC DEF 14A' };
}
return m;
}
/** 從股東會說明書DEF 14A抓高管Yahoo10-K 都失敗時用(例如 AAPL */
async function fetchOfficersFromDef14a(symbol) {
const hit = await tickerToCik(symbol);
if (!hit) return [];
const sub = await json(`https://data.sec.gov/submissions/CIK${hit.cik}.json`, { 'User-Agent': SEC_UA });
const f = sub.filings?.recent || {};
let accn = null;
let primary = null;
for (let i = 0; i < (f.form || []).length; i++) {
if (f.form[i] === 'DEF 14A') {
accn = f.accessionNumber[i];
primary = f.primaryDocument?.[i];
break;
}
}
if (!accn || !primary) return [];
const accNo = accn.replace(/-/g, '');
const html = await text(`https://www.sec.gov/Archives/edgar/data/${Number(hit.cik)}/${accNo}/${primary}`, { 'User-Agent': SEC_UA }, 28000);
const uniq = new Map();
const addPair = (name, title) => {
if (!isOfficerRow(name, title)) return;
uniq.set(name.toLowerCase(), { name, title: stripHtml(title), source: 'SEC DEF 14A' });
};
const election = html.match(/Election of Directors:\s*([^<]{20,400})/i)?.[1];
if (election) {
for (const name of election.split(',').map(s => stripHtml(s)).filter(Boolean)) {
if (!looksLikePersonName(name)) continue;
addPair(name, 'Director');
}
}
for (const label of ['Chief Executive Officer', 'Chief Financial Officer', 'Chief Operating Officer', 'Senior Vice President', 'General Counsel']) {
let idx = 0;
while (uniq.size < 14) {
idx = html.indexOf(label, idx);
if (idx < 0) break;
const before = stripHtml(html.slice(Math.max(0, idx - 160), idx));
const nameM = before.match(/([A-Z][a-z]+(?:\s+[A-Z]\.?)?\s+[A-Z][a-z]+)\s*$/);
if (nameM) addPair(nameM[1], label);
idx += label.length;
}
}
for (const name of [...uniq.keys()]) {
const display = uniq.get(name).name;
const pos = html.indexOf(display);
if (pos < 0) continue;
const chunk = html.slice(pos, pos + 520);
const titleM = chunk.match(/((?:Former\s+)?(?:Senior|Executive|Chief|General)[\s\S]{8,120}?)(?=\s*<p|\s*<td|\s*<div|$)/i);
const better = stripHtml(titleM?.[1] || '');
if (better && looksLikeExecutiveTitle(better)) {
uniq.set(name, { name: display, title: better, source: 'SEC DEF 14A' });
}
}
return sanitizeOfficers([...uniq.values()].filter(o => o.title !== 'Director' || uniq.size <= 4).slice(0, 12));
}
const stripHtml = (s) => String(s || '').replace(/<[^>]+>/g, ' ').replace(/&#160;/g, ' ').replace(/\s+/g, ' ').trim();
async function fetchOfficersFromSec10k(symbol) {
const hit = await tickerToCik(symbol);
if (!hit) return [];
const sub = await json(`https://data.sec.gov/submissions/CIK${hit.cik}.json`, { 'User-Agent': SEC_UA });
const f = sub.filings?.recent || {};
let accn = null;
let primary = null;
for (let i = 0; i < (f.form || []).length; i++) {
if (f.form[i] === '10-K') {
accn = f.accessionNumber[i];
primary = f.primaryDocument?.[i];
break;
}
}
if (!accn || !primary) return [];
const accNo = accn.replace(/-/g, '');
const url = `https://www.sec.gov/Archives/edgar/data/${Number(hit.cik)}/${accNo}/${primary}`;
const html = await text(url, { 'User-Agent': SEC_UA }, 25000);
const item10 = html.search(/Item\s*10[\s\S]{0,120}(Executive Officers|Directors)/i);
const slice = item10 >= 0 ? html.slice(item10, item10 + 120000) : html.slice(0, 120000);
const rows = [...slice.matchAll(/<tr[^>]*>([\s\S]*?)<\/tr>/gi)].map(m => m[1]);
const officers = [];
for (const row of rows) {
const cells = [...row.matchAll(/<t[dh][^>]*>([\s\S]*?)<\/t[dh]>/gi)].map(m => stripHtml(m[1]));
if (cells.length < 2) continue;
const title = cells.find(c => /Chief|President|Officer|Counsel|Operations|Financial|Accounting|Field/i.test(c) && c.length < 140);
const name = cells.find(c =>
c.length > 3 && c.length < 70
&& !/Chief|President|Officer|Director|Age|Name|Title|NVIDIA|Common|Stock|Item|Action/i.test(c)
&& /[A-Za-z]/.test(c),
);
if (!isOfficerRow(name, title)) continue;
officers.push({ name, title, source: 'SEC 10-K' });
}
const uniq = new Map();
for (const o of officers) {
const key = o.name.toLowerCase();
if (!uniq.has(key)) uniq.set(key, o);
}
return [...uniq.values()].slice(0, 12);
}
function parseForm4(txt, filing) {
const xml = txt.slice(txt.indexOf('<ownershipDocument'));
const ownerBlock = xml.match(/<reportingOwner>([\s\S]*?)<\/reportingOwner>/i)?.[1] || '';
const issuerBlock = xml.match(/<issuer>([\s\S]*?)<\/issuer>/i)?.[1] || '';
const relBlock = ownerBlock.match(/<reportingOwnerRelationship>([\s\S]*?)<\/reportingOwnerRelationship>/i)?.[1] || '';
const txBlocks = [...xml.matchAll(/<nonDerivativeTransaction>([\s\S]*?)<\/nonDerivativeTransaction>/gi)].map(m => m[1]);
const transactions = txBlocks.slice(0, 8).map(b => ({
date: tag(b, 'transactionDate') ? tag(tag(b, 'transactionDate'), 'value') : null,
code: tag(b, 'transactionCode') || null,
acquiredDisposed: tag(tag(b, 'transactionAcquiredDisposedCode') || '', 'value'),
shares: num(tag(tag(b, 'transactionShares') || '', 'value')),
price: num(tag(tag(b, 'transactionPricePerShare') || '', 'value')),
ownedAfter: num(tag(tag(b, 'sharesOwnedFollowingTransaction') || '', 'value')),
})).filter(t => t.shares != null || t.code);
const acquired = transactions.filter(t => t.acquiredDisposed === 'A').reduce((a, t) => a + (t.shares || 0), 0);
const disposed = transactions.filter(t => t.acquiredDisposed === 'D').reduce((a, t) => a + (t.shares || 0), 0);
return {
filingDate: filing.date,
reportDate: tag(xml, 'periodOfReport'),
owner: tag(ownerBlock, 'rptOwnerName') || strip(txt.match(/COMPANY CONFORMED NAME:\s*([^\n]+)/)?.[1]),
issuer: tag(issuerBlock, 'issuerName'),
title: tag(relBlock, 'officerTitle') || (tag(relBlock, 'isDirector') === '1' ? 'Director' : ''),
isDirector: tag(relBlock, 'isDirector') === '1',
isOfficer: tag(relBlock, 'isOfficer') === '1',
acquired, disposed,
signal: acquired > disposed ? 'acquire' : disposed > acquired ? 'dispose' : 'mixed',
transactions,
url: filing.url,
};
}
async function fetchInsiderTransactions(symbol) {
const hit = await tickerToCik(symbol);
if (!hit) return [];
const sub = await json(`https://data.sec.gov/submissions/CIK${hit.cik}.json`, { 'User-Agent': SEC_UA });
const f = sub.filings?.recent || {};
const filings = [];
for (let i = 0; i < (f.form || []).length && filings.length < 8; i++) {
if (f.form[i] !== '4') continue;
const accn = f.accessionNumber[i];
const accNo = accn.replace(/-/g, '');
filings.push({
date: f.filingDate[i],
accn,
url: `https://www.sec.gov/Archives/edgar/data/${Number(hit.cik)}/${accNo}/${accn}.txt`,
});
}
const out = [];
for (const filing of filings.slice(0, 5)) {
try { out.push(parseForm4(await text(filing.url, { 'User-Agent': SEC_UA }), filing)); }
catch { /* keep going */ }
}
return out;
}
function industryChainFallback(symbol, profile = {}) {
const industry = `${profile.industry || ''} ${profile.sector || ''}`.toLowerCase();
const maps = [
{
match: /semiconductor|chip|accelerated|technology/,
upstream: ['EDA/IP 軟體', '晶圓代工', '先進封裝', 'HBM/記憶體', '半導體設備', 'ABF/載板'],
upstreamNamed: ['TSM', 'ASML', 'AMAT', 'LRCX', 'KLAC', 'MU', 'SNPS', 'CDNS'],
peers: ['AMD', 'AVGO', 'QCOM', 'MRVL', 'TSM', 'ASML', 'MU'],
downstream: ['雲端資料中心', '企業 AI 軟體', '自駕車/機器人', '遊戲與工作站'],
downstreamNamed: [
{ label: 'AI 伺服器 OEM', entities: ['DELL', 'HPE', 'SMCI'], note: '採購 GPU 組裝銷售' },
{ label: '雲端與大型企業', entities: ['MSFT', 'AMZN', 'GOOGL', 'META'], note: '資料中心 GPU 需求' },
],
midstream: { role: '晶片設計GPU 平台', segments: ['資料中心 GPU', '遊戲 GPU', '軟體 CUDA'] },
},
{
match: /software|internet|communication|media/,
upstream: ['雲端基礎設施', '資料中心', '廣告技術', '內容/資料供應商'],
peers: ['MSFT', 'GOOGL', 'META', 'AMZN', 'CRM', 'ORCL'],
downstream: ['企業客戶', '消費者流量', '開發者生態', '廣告主'],
midstream: { role: '軟體/平台', segments: ['訂閱', '廣告', '雲端服務'] },
},
];
const hit = maps.find(m => m.match.test(industry)) || {
upstream: ['原物料/零組件', '設備與服務供應商'],
upstreamNamed: [],
peers: [],
downstream: ['終端客戶', '企業採購', '通路夥伴'],
downstreamNamed: [],
midstream: { role: profile.industry || '核心業務', segments: [] },
};
const upDetail = hit.upstreamNamed?.length
? [{ label: '供應商', entities: hit.upstreamNamed, note: '產業鏈慣例' },
...hit.upstream.map(u => ({ label: u, entities: [u], note: '' }))]
: hit.upstream.map(u => ({ label: u, entities: [u], note: '' }));
return {
upstream: hit.upstream,
upstreamDetail: upDetail,
downstream: hit.downstream,
downstreamDetail: (hit.downstreamNamed?.length
? hit.downstreamNamed.map(d => ({
label: d.label || '購買方',
entities: d.entities || [],
note: d.note || '產業鏈慣例',
}))
: []).concat(hit.downstream.map(d => ({ label: d, entities: [d], note: '' }))),
peers: hit.peers.filter(s => s !== symbol),
};
}
/** 完整同步:多來源新聞 + AI 結構化 + 寫入 DB */
export async function runCompanyIntelSync(symbol, profile = {}, opts = {}) {
const management = await resolveManagement(symbol);
return syncCompanyIntelEnriched(symbol, { ...profile, ...management }, {
force: opts.force === true,
useAI: opts.useAI !== false,
management,
});
}
function buildDataHealth(fields) {
const notes = [];
if (!fields.officers) notes.push('管理層名單未取得(可按「強制更新」重試)');
if (!fields.newsTw && !fields.newsGlobal) notes.push('新聞來源暫時無回應');
if (!fields.insiders && fields.usListing) notes.push('近期無 SEC Form 4 或 CIK 對應失敗');
if (!fields.insiders && !fields.usListing) notes.push('非美股標的,無 SEC 內部人申報');
if (!fields.profileDesc) notes.push('公司簡介待同步後整理為中文');
return { ...fields, notes };
}
export async function getCompanyIntel(symbol, profile = {}, opts = {}) {
symbol = String(symbol || '').trim().toUpperCase();
const management = await resolveManagement(symbol);
const usListing = /^[A-Z][A-Z0-9.\-]{0,7}$/.test(symbol) && !symbol.includes('.');
let bundle = null;
let enrichedRow = getCompanyIntelEnriched(symbol);
if (opts.sync) {
const sync = await runCompanyIntelSync(symbol, { ...profile, ...management }, { force: opts.force, useAI: opts.useAI });
bundle = sync.bundle;
enrichedRow = { data: sync.enriched, sources: sync.sources, updatedAt: Date.now() };
} else if (!enrichedRow) {
bundle = await gatherIntelSources(symbol, { ...profile, name: profile.name, ...management }).catch(() => null);
}
const insiders = usListing
? await fetchInsiderTransactions(symbol).catch(() => [])
: [];
let newsTw = bundle?.newsTw || [];
let newsGlobal = bundle?.newsGlobal || [];
if (!newsTw.length && !newsGlobal.length && !opts.sync) {
const b = await gatherIntelSources(symbol, { ...profile, ...management }).catch(() => null);
if (b) {
newsTw = b.newsTw || [];
newsGlobal = b.newsGlobal || [];
bundle = b;
}
}
const custom = getCompanyIntelCustom(symbol);
let industryChain = industryChainFallback(symbol, { ...profile, ...management });
const hints = bundle?.hints || (usListing && !opts.sync
? await fetch10kChainHints(symbol).catch(() => ({}))
: {});
if (hints && Object.keys(hints).length) {
industryChain = mergeIndustryChainWithHints(
symbol,
industryChain,
hints,
bundle?.profileExt || {},
{ ...profile, ...management },
);
}
let profileZh = management.longBusinessSummary
? { description: management.longBusinessSummary.slice(0, 500), businessModel: management.industry || profile.industry || '' }
: (bundle?.profileExt?.longBusinessSummary
? { description: bundle.profileExt.longBusinessSummary.slice(0, 500), businessModel: bundle.profileExt.industry || '' }
: null);
const raw = {
symbol,
updatedAt: new Date().toISOString(),
profileZh,
management: { ...management, searches: [] },
insiders,
news: normalizeNewsList([...newsTw, ...newsGlobal]).slice(0, 20),
newsTw: normalizeNewsList(newsTw),
newsGlobal: normalizeNewsList(newsGlobal),
managementBrief: (bundle?.managementNewsRaw || []).slice(0, 6).map(n => ({
date: n.created,
headline: n.titleZh || n.title,
summary: (n.descriptionZh || n.description || '').slice(0, 160),
impact: 'neutral',
source: n.publisher,
url: n.url,
})),
industryChain,
sources: [
management.source || 'Yahoo assetProfile',
'SEC Form 4',
'Google 新聞(台灣)',
'Google 新聞(國際)',
'Nasdaq / Yahoo Finance',
...(enrichedRow?.sources || []),
...(custom ? ['本機自訂'] : []),
].filter(Boolean),
customUpdatedAt: custom?.updatedAt ? new Date(custom.updatedAt).toISOString() : null,
enrichedAt: enrichedRow?.updatedAt ? new Date(enrichedRow.updatedAt).toISOString() : null,
aiEnriched: enrichedRow?.data?.aiUsed || false,
enrichSources: enrichedRow?.sources || [],
};
let intel = mergeCustomIntel(localizeIntel(raw), custom?.data);
if (enrichedRow?.data) {
intel = applyEnrichedToIntel(intel, { ...enrichedRow.data, sources: enrichedRow.sources });
}
intel.newsTw = normalizeNewsList(intel.newsTw);
intel.newsGlobal = normalizeNewsList(intel.newsGlobal);
intel.news = normalizeNewsList(intel.news?.length ? intel.news : [...intel.newsTw, ...intel.newsGlobal]).slice(0, 20);
if (hints && Object.keys(hints).length) {
intel.industryChain = mergeIndustryChainWithHints(
symbol,
intel.industryChain,
hints,
bundle?.profileExt || {},
{ ...profile, ...management },
);
}
const allNews = [...(intel.newsTw || []), ...(intel.newsGlobal || []), ...(intel.news || [])];
intel.industryChain = ensureDownstreamBuyers(
layoutPeersIntoGrid(
finalizeIndustryChain(mergeNewsIntoChain(intel.industryChain, allNews, symbol), symbol),
symbol,
),
symbol,
{ ...profile, ...management },
);
if (intel.industryChain.tenKExcerpt) {
intel.industryChain.tenKExcerpt = sanitizeChainExcerpt(intel.industryChain.tenKExcerpt);
}
const resources = usListing
? await buildCompanyResources(symbol, { ...profile, website: management.website }, management).catch(() => [])
: [];
if (hints?.filingUrl) {
resources.unshift({ labelZh: '10-K 年報全文', url: hints.filingUrl, source: 'SEC' });
}
const seenUrl = new Set();
intel.resources = resources.filter(l => {
if (!l?.url || seenUrl.has(l.url)) return false;
seenUrl.add(l.url);
return true;
});
intel.management = { ...intel.management, searches: [], resources };
intel.chainLayout = enrichedRow?.data?.chainLayout || 'upstream_downstream_v2';
intel = attachIntelSyncStatus(intel, symbol);
intel.dataHealth = buildDataHealth({
officers: (intel.management?.officers || []).length > 0,
newsTw: (intel.newsTw || []).length > 0,
newsGlobal: (intel.newsGlobal || []).length > 0,
insiders: insiders.length > 0,
profileDesc: !!(intel.profileZh?.description?.length > 40),
enriched: !!(intel.enrichedAt || intel.aiEnriched),
usListing,
});
return sanitizeIntelNewsPayload(intel);
}