finance-dashboard/lib/companyintel-chain.js

463 lines
18 KiB
JavaScript
Raw Permalink Normal View History

2026-06-04 09:32:28 +00:00
// 產業鏈:新聞萃取、代號解析、實體可點擊結構
const UP_KW = /供應商|供應|上游|代工|材料|零件|設備|晶圓|封裝|HBM|EDA|IP|vendor|supplier|supply|manufactur|foundry|TSMC/i;
const DOWN_KW = /客戶|下游|訂單|採購|部署|採用|合作|需求|customer|deploy|adopt|partner|cloud|data\s*center|hyperscale|server|伺服器|OEM|ODM|rack/i;
const OEM_BUYER_CTX = /server|伺服器|OEM|ODM|rack|AI\s*server|GPU\s*server|AI\s*infrastructure|資料中心/i;
const GPU_NEWS_CTX = /NVIDIA|NVDA|輝達|英偉達|GPU|Blackwell|H100|B200|accelerator/i;
const DOWNSTREAM_BUYER_SYMS = new Set([
'DELL', 'HPE', 'HPQ', 'SMCI', 'CSCO', 'MSFT', 'AMZN', 'GOOGL', 'META', 'ORCL', 'AAPL', 'TSLA',
'2317.TW', '2382.TW', 'LENOVO',
]);
/** 公司名/中文簡稱 → 可切換代號 */
const NAME_ALIASES = {
台積電: 'TSM', 台积电: 'TSM', TSMC: 'TSM', 'Taiwan Semiconductor': 'TSM',
輝達: 'NVDA', 英偉達: 'NVDA', NVIDIA: 'NVDA',
超微: 'AMD', AMD: 'AMD',
高通: 'QCOM', Qualcomm: 'QCOM',
博通: 'AVGO', Broadcom: 'AVGO',
聯發科: '2454.TW', MediaTek: '2454.TW',
日月光: '3711.TW', ASE: '3711.TW',
鴻海: '2317.TW', Foxconn: '2317.TW', 富士康: '2317.TW',
廣達: '2382.TW', Quanta: '2382.TW',
聯電: 'UMC', 'United Microelectronics': 'UMC',
台塑: '1301.TW', 台塑化: '6505.TW', 中石化: '6505.TW',
中油: '6505.TW',
微軟: 'MSFT', Microsoft: 'MSFT',
谷歌: 'GOOGL', Google: 'GOOGL', Alphabet: 'GOOGL',
亞馬遜: 'AMZN', Amazon: 'AMZN',
蘋果: 'AAPL', Apple: 'AAPL',
Meta: 'META', 臉書: 'META', Facebook: 'META',
特斯拉: 'TSLA', Tesla: 'TSLA',
Synopsys: 'SNPS', Cadence: 'CDNS',
ASML: 'ASML', 'Applied Materials': 'AMAT', Lam: 'LRCX', KLA: 'KLAC',
美光: 'MU', Micron: 'MU',
三星: '005930.KS', Samsung: '005930.KS',
英特爾: 'INTC', Intel: 'INTC',
甲骨文: 'ORCL', Oracle: 'ORCL',
思科: 'CSCO', Cisco: 'CSCO',
戴爾: 'DELL', Dell: 'DELL',
惠普: 'HPE', HP: 'HPQ',
'Hewlett Packard Enterprise': 'HPE', 'Hewlett-Packard Enterprise': 'HPE',
超微電腦: 'SMCI', 'Super Micro': 'SMCI', Supermicro: 'SMCI',
'Dell Technologies': 'DELL',
亞馬遜雲: 'AMZN', AWS: 'AMZN',
微軟Azure: 'MSFT', Azure: 'MSFT',
};
function isUsTicker(s) {
return /^[A-Z]{1,5}$/.test(s);
}
function isTwTicker(s) {
return /^\d{4}(\.TW)?$/i.test(s);
}
const TICKER_BLOCKLIST = new Set([
'AI', 'IT', 'US', 'EU', 'UK', 'CEO', 'CFO', 'COO', 'GPU', 'CPU', 'CSP', 'API', 'EPS', 'SEC', 'IPO',
'ETF', 'USD', 'EUR', 'GBP', 'JPY', 'CNY', 'TWD', 'FY', 'QOQ', 'YOY', 'AND', 'THE', 'FOR', 'INC',
]);
export function isTradableSymbol(sym) {
const s = String(sym || '').toUpperCase().trim();
if (!s || TICKER_BLOCKLIST.has(s)) return false;
if (isUsTicker(s)) return true;
if (isTwTicker(s)) return true;
if (/^\d{6}\.KS$/i.test(s)) return true;
return false;
}
export function resolveEntitySymbol(raw, focalSymbol = '') {
const text = String(raw || '').trim();
if (!text || text === '待查證' || /^原物料|終端|通路|待查/i.test(text)) return null;
const focal = String(focalSymbol || '').toUpperCase();
const paren = text.match(/\(([A-Z]{1,5})\)/);
if (paren && paren[1] !== focal) return paren[1];
const dollar = text.match(/\$([A-Z]{1,5})\b/);
if (dollar && dollar[1] !== focal) return dollar[1];
if (isUsTicker(text) && text !== focal) return text;
if (isTwTicker(text)) {
const tw = text.replace(/\.tw$/i, '');
return `${tw}.TW`;
}
if (NAME_ALIASES[text]) return NAME_ALIASES[text];
const stripped = text.replace(/\s+(Inc\.|Corp\.|Corporation|Ltd\.|LLC|Co\.|公司|股份|集團)/gi, '').trim();
if (NAME_ALIASES[stripped]) return NAME_ALIASES[stripped];
for (const [name, sym] of Object.entries(NAME_ALIASES)) {
if (name.length < 2) continue;
if (text.includes(name) && sym !== focal) return sym;
}
const inc = text.match(/^([A-Za-z][A-Za-z0-9&.\- ]{1,30})(?:\s+Inc\.|\s+Corp\.)/);
if (inc) {
const base = inc[1].trim();
if (NAME_ALIASES[base]) return NAME_ALIASES[base];
const words = base.split(/\s+/);
const last = words[words.length - 1];
if (last && NAME_ALIASES[last]) return NAME_ALIASES[last];
}
return null;
}
export function normalizeEntityItem(raw, focalSymbol = '') {
if (raw && typeof raw === 'object') {
const name = String(raw.name || raw.label || raw.symbol || '').trim();
const symbol = raw.symbol || resolveEntitySymbol(name, focalSymbol) || resolveEntitySymbol(raw.symbol, focalSymbol);
return { name: name || symbol || '—', symbol: symbol || null };
}
const name = String(raw || '').trim();
const symbol = resolveEntitySymbol(name, focalSymbol);
return { name, symbol };
}
function normalizeDetailGroups(groups, focalSymbol) {
return (groups || []).map(g => {
const entities = (g.entities || []).map(e => normalizeEntityItem(e, focalSymbol));
return { ...g, entities };
});
}
function groupFromItems(items, label, note) {
const ents = items.filter(i => i.name).slice(0, 10);
if (!ents.length) return null;
return { label, entities: ents, note };
}
/** 從近期新聞標題/摘要抽出上下游相關公司 */
export function extractChainFromNews(newsList = [], focalSymbol = '') {
const focal = String(focalSymbol || '').toUpperCase();
const upstream = [];
const downstream = [];
const related = [];
const seen = new Set([focal]);
const add = (bucket, item) => {
const sym = item.symbol || item.name;
const key = (sym || '').toUpperCase();
if (!key || seen.has(key)) return;
seen.add(key);
bucket.push(item);
};
for (const n of newsList) {
const text = `${n.title || ''} ${n.titleZh || ''} ${n.description || ''} ${n.descriptionZh || ''}`;
if (!text.trim()) continue;
const up = UP_KW.test(text);
const down = DOWN_KW.test(text);
for (const m of text.matchAll(/\$([A-Z]{1,5})\b|\(([A-Z]{1,5})\)/g)) {
const sym = (m[1] || m[2] || '').toUpperCase();
if (!isUsTicker(sym) || sym === focal) continue;
const item = normalizeEntityItem(sym, focal);
if (up && !down) add(upstream, item);
else if (down && !up) add(downstream, item);
else add(related, item);
}
for (const [name, sym] of Object.entries(NAME_ALIASES)) {
if (!text.includes(name) || sym === focal) continue;
const item = normalizeEntityItem(name, focal);
item.symbol = sym;
if (up && !down) add(upstream, item);
else if (down && !up) add(downstream, item);
else add(related, item);
}
}
for (const n of newsList) {
const text = `${n.title || ''} ${n.titleZh || ''} ${n.description || ''} ${n.descriptionZh || ''}`;
if (!/供應商|供货商|supplier|vendor|foundry|代工/i.test(text)) continue;
for (const [name, sym] of Object.entries(NAME_ALIASES)) {
if (!text.includes(name) || sym === focal) continue;
add(upstream, normalizeEntityItem(name, focal));
}
}
for (const n of newsList) {
const text = `${n.title || ''} ${n.titleZh || ''} ${n.description || ''} ${n.descriptionZh || ''}`;
const buyerCtx = OEM_BUYER_CTX.test(text) || (GPU_NEWS_CTX.test(text) && /Dell|HPE|Super|伺服器|server|OEM/i.test(text));
if (!buyerCtx) continue;
for (const [name, sym] of Object.entries(NAME_ALIASES)) {
if (!DOWNSTREAM_BUYER_SYMS.has(sym) || sym === focal || !text.includes(name)) continue;
const item = normalizeEntityItem(name, focal);
item.symbol = sym;
add(downstream, item);
}
}
return { upstream, downstream, related };
}
export const SECTOR_SUPPLIER_TICKERS = {
semiconductor: ['TSM', 'ASML', 'AMAT', 'LRCX', 'KLAC', 'MU', 'SNPS', 'CDNS'],
software: ['MSFT', 'AMZN', 'GOOGL'],
};
/** GPU加速器晶片常見下游OEM 伺服器廠 + 雲端買方 */
export const SECTOR_DOWNSTREAM_BUYERS = {
semiconductor: [
{ label: 'AI 伺服器 OEM', tickers: ['DELL', 'HPE', 'SMCI', 'CSCO'], note: '採購 GPU 組裝 AI 伺服器再銷售' },
{ label: '雲端與大型企業', tickers: ['MSFT', 'AMZN', 'GOOGL', 'META', 'ORCL'], note: '資料中心與 AI 工作負載' },
],
};
const SYMBOL_DOWNSTREAM_SECTOR = {
NVDA: 'semiconductor',
AMD: 'semiconductor',
INTC: 'semiconductor',
MRVL: 'semiconductor',
};
function isGpuSemiconductor(symbol, profile = {}) {
const sym = String(symbol || '').toUpperCase();
if (SYMBOL_DOWNSTREAM_SECTOR[sym]) return true;
const ind = `${profile.industry || ''} ${profile.sector || ''}`.toLowerCase();
return /semiconductor|chip/i.test(ind) && /graphic|gpu|accelerat|comput|processor|display/i.test(ind);
}
export function inferDownstreamGroups(symbol, profile = {}) {
const key = SYMBOL_DOWNSTREAM_SECTOR[String(symbol || '').toUpperCase()]
|| (isGpuSemiconductor(symbol, profile) ? 'semiconductor' : null);
if (!key || !SECTOR_DOWNSTREAM_BUYERS[key]) return [];
return SECTOR_DOWNSTREAM_BUYERS[key].map(b => ({
label: b.label,
entities: b.tickers.map(code => ({
name: code,
symbol: code,
confidence: 'medium',
source: 'sector_downstream',
})),
note: b.note,
confidence: 'medium',
}));
}
function downstreamHasTradableBuyers(detail, focalSymbol) {
const focal = String(focalSymbol || '').toUpperCase();
return (detail || []).some(g =>
(g.entities || []).some(e => {
const item = normalizeEntityItem(e, focal);
return item.symbol && isTradableSymbol(item.symbol) && item.symbol !== focal;
}),
);
}
function isGenericDownstreamGroup(g, focalSymbol) {
const ents = g?.entities || [];
if (!ents.length) return true;
return ents.every(e => {
const item = normalizeEntityItem(e, focalSymbol);
return !item.symbol || !isTradableSymbol(item.symbol);
});
}
export function ensureDownstreamBuyers(chain, symbol, profile = {}) {
let next = chain || {};
if (!downstreamHasTradableBuyers(next.downstreamDetail, symbol)) {
const inferred = inferDownstreamGroups(symbol, profile);
if (inferred.length) {
next = {
...next,
downstreamDetail: dedupeGroups([...inferred, ...(next.downstreamDetail || [])]),
chainSources: [...new Set([...(next.chainSources || []), '產業常見購買方'])],
};
}
}
if (downstreamHasTradableBuyers(next.downstreamDetail, symbol)) {
next = {
...next,
downstreamDetail: (next.downstreamDetail || []).filter(g => !isGenericDownstreamGroup(g, symbol)),
};
}
return finalizeIndustryChain(next, symbol);
}
/** 一律追加供應商/客戶名單(去重,不覆蓋既有分組) */
export function appendDetailNames(detail, names, label, note, focalSymbol = '') {
const incoming = (names || []).map(n => normalizeEntityItem(n, focalSymbol));
return mergeDetailGroups(detail, incoming, label, note);
}
function mergeDetailGroups(existing, incoming, label, note) {
const out = [...(existing || [])];
if (!incoming.length) return out;
const flat = out.flatMap(g => g.entities || []);
const have = new Set(flat.map(e => (e.symbol || e.name || '').toUpperCase()));
const fresh = incoming.filter(e => {
const k = (e.symbol || e.name || '').toUpperCase();
return k && !have.has(k);
});
if (!fresh.length) return out;
out.unshift({ label, entities: fresh, note });
return out.slice(0, 8);
}
/** 合併新聞萃取進產業鏈 */
export function mergeNewsIntoChain(chain, newsList, focalSymbol) {
const base = chain || {};
const { upstream, downstream, related } = extractChainFromNews(newsList, focalSymbol);
let upstreamDetail = mergeDetailGroups(base.upstreamDetail, upstream, '供應商/合作(新聞)', '近期公開新聞');
let downstreamDetail = mergeDetailGroups(base.downstreamDetail, downstream, '購買方(新聞)', '近期公開新聞');
let peers = [...(base.peers || [])];
for (const r of related) {
const sym = r.symbol;
if (sym && !peers.includes(sym) && sym !== focalSymbol) peers.push(sym);
}
peers = peers.filter(p => String(p).toUpperCase() !== focalSymbol).slice(0, 14);
return finalizeIndustryChain({
...base,
upstreamDetail,
downstreamDetail,
peers,
chainSources: [...new Set([...(base.chainSources || []), upstream.length || downstream.length ? '近期新聞' : null].filter(Boolean))],
}, focalSymbol);
}
/** 統一實體格式、補代號、重算 flat 列表 */
export function finalizeIndustryChain(chain, focalSymbol = '') {
const focal = String(focalSymbol || '').toUpperCase();
let upstreamDetail = normalizeDetailGroups(chain.upstreamDetail, focal)
.filter(g => (g.entities || []).length > 0);
let downstreamDetail = normalizeDetailGroups(chain.downstreamDetail, focal)
.filter(g => (g.entities || []).length > 0);
if (!upstreamDetail.length && Array.isArray(chain.upstream)) {
upstreamDetail = [{ label: '上游', entities: chain.upstream.map(e => normalizeEntityItem(e, focal)), note: '' }];
}
if (!downstreamDetail.length && Array.isArray(chain.downstream)) {
downstreamDetail = [{ label: '下游', entities: chain.downstream.map(e => normalizeEntityItem(e, focal)), note: '' }];
}
let peers = (chain.peers || []).map(p => {
const item = normalizeEntityItem(p, focal);
const sym = item.symbol || (isTradableSymbol(String(p)) ? String(p).toUpperCase() : null);
return isTradableSymbol(sym) ? sym : null;
}).filter(Boolean);
peers = [...new Set(peers)].filter(p => p !== focal).slice(0, 14);
const flatUp = upstreamDetail.flatMap(g => (g.entities || []).map(e => e.name)).filter(Boolean);
const flatDown = downstreamDetail.flatMap(g => (g.entities || []).map(e => e.name)).filter(Boolean);
return {
...chain,
upstream: flatUp.length ? flatUp : chain.upstream,
downstream: flatDown.length ? flatDown : chain.downstream,
upstreamDetail,
downstreamDetail,
peers,
searches: [],
};
}
const SUPPLIER_GROUP_RE = /供應|10-K|新聞|產業常見|合作/i;
const CUSTOMER_GROUP_RE = /客戶|購買|買方|OEM|ODM|伺服器|雲端|hyperscale|需求|10-K|新聞|產業/i;
function groupKey(g) {
return String(g?.label || '').trim();
}
function hasTradableEntity(g, focalSymbol = '') {
return (g?.entities || []).some(e => {
const item = normalizeEntityItem(e, focalSymbol);
return item.symbol && isTradableSymbol(item.symbol);
});
}
function isNamedSupplierGroup(g, focalSymbol = '') {
return SUPPLIER_GROUP_RE.test(g?.label || '') || hasTradableEntity(g, focalSymbol);
}
function isNamedCustomerGroup(g, focalSymbol = '') {
return CUSTOMER_GROUP_RE.test(g?.label || '') || hasTradableEntity(g, focalSymbol);
}
function dedupeGroups(groups) {
const out = [];
const seen = new Set();
for (const g of groups || []) {
const k = groupKey(g);
if (!k || seen.has(k)) continue;
seen.add(k);
out.push(g);
}
return out;
}
/** 合併 AI 產業鏈時保留已抓到的供應商/客戶分組,避免被泛稱覆蓋 */
export function mergeEnrichedChain(base = {}, enriched = {}, focalSymbol = '') {
const bUp = base.upstreamDetail || [];
const bDown = base.downstreamDetail || [];
const eUp = enriched.upstreamDetail || enriched.upstream || [];
const eDown = enriched.downstreamDetail || enriched.downstream || [];
const keepSuppliers = bUp.filter(g => isNamedSupplierGroup(g, focalSymbol));
const keepCustomers = bDown.filter(g => isNamedCustomerGroup(g, focalSymbol));
const eUpList = Array.isArray(eUp) ? eUp : [];
const eDownList = Array.isArray(eDown) ? eDown : [];
let upstreamDetail = dedupeGroups([
...keepSuppliers,
...eUpList.filter(g => !keepSuppliers.some(k => groupKey(k) === groupKey(g))),
]);
let downstreamDetail = dedupeGroups([
...keepCustomers,
...eDownList.filter(g => !keepCustomers.some(k => groupKey(k) === groupKey(g))),
]);
if (!upstreamDetail.length && eUpList.length) upstreamDetail = eUpList;
if (!downstreamDetail.length && eDownList.length) downstreamDetail = eDownList;
let chain = {
...base,
...enriched,
upstreamDetail,
downstreamDetail,
peers: [...new Set([...(base.peers || []), ...(enriched.peers || [])])],
tenKExcerpt: sanitizeChainExcerpt(enriched.tenKExcerpt || base.tenKExcerpt),
chainSources: [...new Set([...(base.chainSources || []), ...(enriched.chainSources || [])])],
};
chain = layoutPeersIntoGrid(chain, focalSymbol);
return finalizeIndustryChain(chain, focalSymbol);
}
/** 同業代號放進上游欄「同業/競爭」分組,不再堆在格子下方 */
export function layoutPeersIntoGrid(chain, focalSymbol = '') {
const focal = String(focalSymbol || '').toUpperCase();
const peers = (chain.peers || [])
.map(p => String(p).toUpperCase())
.filter(p => isTradableSymbol(p) && p !== focal);
if (!peers.length) return { ...chain, peers: [] };
const inGrid = new Set();
for (const g of [...(chain.upstreamDetail || []), ...(chain.downstreamDetail || [])]) {
for (const e of g.entities || []) {
const k = (e.symbol || e.name || '').toUpperCase();
if (k) inGrid.add(k);
}
}
const peerEntities = peers
.filter(sym => !inGrid.has(sym))
.map(sym => normalizeEntityItem(sym, focal));
if (!peerEntities.length) return { ...chain, peers: [] };
const upstreamDetail = [...(chain.upstreamDetail || [])];
const peerLabel = '同業/競爭';
const exist = upstreamDetail.find(g => groupKey(g) === peerLabel);
if (exist) {
const have = new Set((exist.entities || []).map(e => (e.symbol || e.name || '').toUpperCase()));
for (const e of peerEntities) {
const k = (e.symbol || e.name || '').toUpperCase();
if (k && !have.has(k)) { exist.entities.push(e); have.add(k); }
}
} else {
upstreamDetail.push({ label: peerLabel, entities: peerEntities, note: '同業標的' });
}
return { ...chain, upstreamDetail, peers: [] };
}
export function sanitizeChainExcerpt(text) {
const t = String(text || '').trim();
if (!t || t.length < 50) return null;
if (/^nvda-\d|000\d{7,}|\bFY\s+false\b/i.test(t.slice(0, 120))) return null;
return t.slice(0, 480);
}