finance-dashboard/lib/companyintel-chain.js

463 lines
18 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// 產業鏈:新聞萃取、代號解析、實體可點擊結構
const UP_KW = /供應商|供應|上游|代工|材料|零件|設備|晶圓|封裝|HBM|EDA|IP|vendor|supplier|supply|manufactur|foundry|TSMC/i;
const DOWN_KW = /客戶|下游|訂單|採購|部署|採用|合作|需求|customer|deploy|adopt|partner|cloud|data\s*center|hyperscale|server|伺服器|OEM|ODM|rack/i;
const OEM_BUYER_CTX = /server|伺服器|OEM|ODM|rack|AI\s*server|GPU\s*server|AI\s*infrastructure|資料中心/i;
const GPU_NEWS_CTX = /NVIDIA|NVDA|輝達|英偉達|GPU|Blackwell|H100|B200|accelerator/i;
const DOWNSTREAM_BUYER_SYMS = new Set([
'DELL', 'HPE', 'HPQ', 'SMCI', 'CSCO', 'MSFT', 'AMZN', 'GOOGL', 'META', 'ORCL', 'AAPL', 'TSLA',
'2317.TW', '2382.TW', 'LENOVO',
]);
/** 公司名/中文簡稱 → 可切換代號 */
const NAME_ALIASES = {
台積電: 'TSM', 台积电: 'TSM', TSMC: 'TSM', 'Taiwan Semiconductor': 'TSM',
輝達: 'NVDA', 英偉達: 'NVDA', NVIDIA: 'NVDA',
超微: 'AMD', AMD: 'AMD',
高通: 'QCOM', Qualcomm: 'QCOM',
博通: 'AVGO', Broadcom: 'AVGO',
聯發科: '2454.TW', MediaTek: '2454.TW',
日月光: '3711.TW', ASE: '3711.TW',
鴻海: '2317.TW', Foxconn: '2317.TW', 富士康: '2317.TW',
廣達: '2382.TW', Quanta: '2382.TW',
聯電: 'UMC', 'United Microelectronics': 'UMC',
台塑: '1301.TW', 台塑化: '6505.TW', 中石化: '6505.TW',
中油: '6505.TW',
微軟: 'MSFT', Microsoft: 'MSFT',
谷歌: 'GOOGL', Google: 'GOOGL', Alphabet: 'GOOGL',
亞馬遜: 'AMZN', Amazon: 'AMZN',
蘋果: 'AAPL', Apple: 'AAPL',
Meta: 'META', 臉書: 'META', Facebook: 'META',
特斯拉: 'TSLA', Tesla: 'TSLA',
Synopsys: 'SNPS', Cadence: 'CDNS',
ASML: 'ASML', 'Applied Materials': 'AMAT', Lam: 'LRCX', KLA: 'KLAC',
美光: 'MU', Micron: 'MU',
三星: '005930.KS', Samsung: '005930.KS',
英特爾: 'INTC', Intel: 'INTC',
甲骨文: 'ORCL', Oracle: 'ORCL',
思科: 'CSCO', Cisco: 'CSCO',
戴爾: 'DELL', Dell: 'DELL',
惠普: 'HPE', HP: 'HPQ',
'Hewlett Packard Enterprise': 'HPE', 'Hewlett-Packard Enterprise': 'HPE',
超微電腦: 'SMCI', 'Super Micro': 'SMCI', Supermicro: 'SMCI',
'Dell Technologies': 'DELL',
亞馬遜雲: 'AMZN', AWS: 'AMZN',
微軟Azure: 'MSFT', Azure: 'MSFT',
};
function isUsTicker(s) {
return /^[A-Z]{1,5}$/.test(s);
}
function isTwTicker(s) {
return /^\d{4}(\.TW)?$/i.test(s);
}
const TICKER_BLOCKLIST = new Set([
'AI', 'IT', 'US', 'EU', 'UK', 'CEO', 'CFO', 'COO', 'GPU', 'CPU', 'CSP', 'API', 'EPS', 'SEC', 'IPO',
'ETF', 'USD', 'EUR', 'GBP', 'JPY', 'CNY', 'TWD', 'FY', 'QOQ', 'YOY', 'AND', 'THE', 'FOR', 'INC',
]);
export function isTradableSymbol(sym) {
const s = String(sym || '').toUpperCase().trim();
if (!s || TICKER_BLOCKLIST.has(s)) return false;
if (isUsTicker(s)) return true;
if (isTwTicker(s)) return true;
if (/^\d{6}\.KS$/i.test(s)) return true;
return false;
}
export function resolveEntitySymbol(raw, focalSymbol = '') {
const text = String(raw || '').trim();
if (!text || text === '待查證' || /^原物料|終端|通路|待查/i.test(text)) return null;
const focal = String(focalSymbol || '').toUpperCase();
const paren = text.match(/\(([A-Z]{1,5})\)/);
if (paren && paren[1] !== focal) return paren[1];
const dollar = text.match(/\$([A-Z]{1,5})\b/);
if (dollar && dollar[1] !== focal) return dollar[1];
if (isUsTicker(text) && text !== focal) return text;
if (isTwTicker(text)) {
const tw = text.replace(/\.tw$/i, '');
return `${tw}.TW`;
}
if (NAME_ALIASES[text]) return NAME_ALIASES[text];
const stripped = text.replace(/\s+(Inc\.|Corp\.|Corporation|Ltd\.|LLC|Co\.|公司|股份|集團)/gi, '').trim();
if (NAME_ALIASES[stripped]) return NAME_ALIASES[stripped];
for (const [name, sym] of Object.entries(NAME_ALIASES)) {
if (name.length < 2) continue;
if (text.includes(name) && sym !== focal) return sym;
}
const inc = text.match(/^([A-Za-z][A-Za-z0-9&.\- ]{1,30})(?:\s+Inc\.|\s+Corp\.)/);
if (inc) {
const base = inc[1].trim();
if (NAME_ALIASES[base]) return NAME_ALIASES[base];
const words = base.split(/\s+/);
const last = words[words.length - 1];
if (last && NAME_ALIASES[last]) return NAME_ALIASES[last];
}
return null;
}
export function normalizeEntityItem(raw, focalSymbol = '') {
if (raw && typeof raw === 'object') {
const name = String(raw.name || raw.label || raw.symbol || '').trim();
const symbol = raw.symbol || resolveEntitySymbol(name, focalSymbol) || resolveEntitySymbol(raw.symbol, focalSymbol);
return { name: name || symbol || '—', symbol: symbol || null };
}
const name = String(raw || '').trim();
const symbol = resolveEntitySymbol(name, focalSymbol);
return { name, symbol };
}
function normalizeDetailGroups(groups, focalSymbol) {
return (groups || []).map(g => {
const entities = (g.entities || []).map(e => normalizeEntityItem(e, focalSymbol));
return { ...g, entities };
});
}
function groupFromItems(items, label, note) {
const ents = items.filter(i => i.name).slice(0, 10);
if (!ents.length) return null;
return { label, entities: ents, note };
}
/** 從近期新聞標題/摘要抽出上下游相關公司 */
export function extractChainFromNews(newsList = [], focalSymbol = '') {
const focal = String(focalSymbol || '').toUpperCase();
const upstream = [];
const downstream = [];
const related = [];
const seen = new Set([focal]);
const add = (bucket, item) => {
const sym = item.symbol || item.name;
const key = (sym || '').toUpperCase();
if (!key || seen.has(key)) return;
seen.add(key);
bucket.push(item);
};
for (const n of newsList) {
const text = `${n.title || ''} ${n.titleZh || ''} ${n.description || ''} ${n.descriptionZh || ''}`;
if (!text.trim()) continue;
const up = UP_KW.test(text);
const down = DOWN_KW.test(text);
for (const m of text.matchAll(/\$([A-Z]{1,5})\b|\(([A-Z]{1,5})\)/g)) {
const sym = (m[1] || m[2] || '').toUpperCase();
if (!isUsTicker(sym) || sym === focal) continue;
const item = normalizeEntityItem(sym, focal);
if (up && !down) add(upstream, item);
else if (down && !up) add(downstream, item);
else add(related, item);
}
for (const [name, sym] of Object.entries(NAME_ALIASES)) {
if (!text.includes(name) || sym === focal) continue;
const item = normalizeEntityItem(name, focal);
item.symbol = sym;
if (up && !down) add(upstream, item);
else if (down && !up) add(downstream, item);
else add(related, item);
}
}
for (const n of newsList) {
const text = `${n.title || ''} ${n.titleZh || ''} ${n.description || ''} ${n.descriptionZh || ''}`;
if (!/供應商|供货商|supplier|vendor|foundry|代工/i.test(text)) continue;
for (const [name, sym] of Object.entries(NAME_ALIASES)) {
if (!text.includes(name) || sym === focal) continue;
add(upstream, normalizeEntityItem(name, focal));
}
}
for (const n of newsList) {
const text = `${n.title || ''} ${n.titleZh || ''} ${n.description || ''} ${n.descriptionZh || ''}`;
const buyerCtx = OEM_BUYER_CTX.test(text) || (GPU_NEWS_CTX.test(text) && /Dell|HPE|Super|伺服器|server|OEM/i.test(text));
if (!buyerCtx) continue;
for (const [name, sym] of Object.entries(NAME_ALIASES)) {
if (!DOWNSTREAM_BUYER_SYMS.has(sym) || sym === focal || !text.includes(name)) continue;
const item = normalizeEntityItem(name, focal);
item.symbol = sym;
add(downstream, item);
}
}
return { upstream, downstream, related };
}
export const SECTOR_SUPPLIER_TICKERS = {
semiconductor: ['TSM', 'ASML', 'AMAT', 'LRCX', 'KLAC', 'MU', 'SNPS', 'CDNS'],
software: ['MSFT', 'AMZN', 'GOOGL'],
};
/** GPU加速器晶片常見下游OEM 伺服器廠 + 雲端買方 */
export const SECTOR_DOWNSTREAM_BUYERS = {
semiconductor: [
{ label: 'AI 伺服器 OEM', tickers: ['DELL', 'HPE', 'SMCI', 'CSCO'], note: '採購 GPU 組裝 AI 伺服器再銷售' },
{ label: '雲端與大型企業', tickers: ['MSFT', 'AMZN', 'GOOGL', 'META', 'ORCL'], note: '資料中心與 AI 工作負載' },
],
};
const SYMBOL_DOWNSTREAM_SECTOR = {
NVDA: 'semiconductor',
AMD: 'semiconductor',
INTC: 'semiconductor',
MRVL: 'semiconductor',
};
function isGpuSemiconductor(symbol, profile = {}) {
const sym = String(symbol || '').toUpperCase();
if (SYMBOL_DOWNSTREAM_SECTOR[sym]) return true;
const ind = `${profile.industry || ''} ${profile.sector || ''}`.toLowerCase();
return /semiconductor|chip/i.test(ind) && /graphic|gpu|accelerat|comput|processor|display/i.test(ind);
}
export function inferDownstreamGroups(symbol, profile = {}) {
const key = SYMBOL_DOWNSTREAM_SECTOR[String(symbol || '').toUpperCase()]
|| (isGpuSemiconductor(symbol, profile) ? 'semiconductor' : null);
if (!key || !SECTOR_DOWNSTREAM_BUYERS[key]) return [];
return SECTOR_DOWNSTREAM_BUYERS[key].map(b => ({
label: b.label,
entities: b.tickers.map(code => ({
name: code,
symbol: code,
confidence: 'medium',
source: 'sector_downstream',
})),
note: b.note,
confidence: 'medium',
}));
}
function downstreamHasTradableBuyers(detail, focalSymbol) {
const focal = String(focalSymbol || '').toUpperCase();
return (detail || []).some(g =>
(g.entities || []).some(e => {
const item = normalizeEntityItem(e, focal);
return item.symbol && isTradableSymbol(item.symbol) && item.symbol !== focal;
}),
);
}
function isGenericDownstreamGroup(g, focalSymbol) {
const ents = g?.entities || [];
if (!ents.length) return true;
return ents.every(e => {
const item = normalizeEntityItem(e, focalSymbol);
return !item.symbol || !isTradableSymbol(item.symbol);
});
}
export function ensureDownstreamBuyers(chain, symbol, profile = {}) {
let next = chain || {};
if (!downstreamHasTradableBuyers(next.downstreamDetail, symbol)) {
const inferred = inferDownstreamGroups(symbol, profile);
if (inferred.length) {
next = {
...next,
downstreamDetail: dedupeGroups([...inferred, ...(next.downstreamDetail || [])]),
chainSources: [...new Set([...(next.chainSources || []), '產業常見購買方'])],
};
}
}
if (downstreamHasTradableBuyers(next.downstreamDetail, symbol)) {
next = {
...next,
downstreamDetail: (next.downstreamDetail || []).filter(g => !isGenericDownstreamGroup(g, symbol)),
};
}
return finalizeIndustryChain(next, symbol);
}
/** 一律追加供應商/客戶名單(去重,不覆蓋既有分組) */
export function appendDetailNames(detail, names, label, note, focalSymbol = '') {
const incoming = (names || []).map(n => normalizeEntityItem(n, focalSymbol));
return mergeDetailGroups(detail, incoming, label, note);
}
function mergeDetailGroups(existing, incoming, label, note) {
const out = [...(existing || [])];
if (!incoming.length) return out;
const flat = out.flatMap(g => g.entities || []);
const have = new Set(flat.map(e => (e.symbol || e.name || '').toUpperCase()));
const fresh = incoming.filter(e => {
const k = (e.symbol || e.name || '').toUpperCase();
return k && !have.has(k);
});
if (!fresh.length) return out;
out.unshift({ label, entities: fresh, note });
return out.slice(0, 8);
}
/** 合併新聞萃取進產業鏈 */
export function mergeNewsIntoChain(chain, newsList, focalSymbol) {
const base = chain || {};
const { upstream, downstream, related } = extractChainFromNews(newsList, focalSymbol);
let upstreamDetail = mergeDetailGroups(base.upstreamDetail, upstream, '供應商/合作(新聞)', '近期公開新聞');
let downstreamDetail = mergeDetailGroups(base.downstreamDetail, downstream, '購買方(新聞)', '近期公開新聞');
let peers = [...(base.peers || [])];
for (const r of related) {
const sym = r.symbol;
if (sym && !peers.includes(sym) && sym !== focalSymbol) peers.push(sym);
}
peers = peers.filter(p => String(p).toUpperCase() !== focalSymbol).slice(0, 14);
return finalizeIndustryChain({
...base,
upstreamDetail,
downstreamDetail,
peers,
chainSources: [...new Set([...(base.chainSources || []), upstream.length || downstream.length ? '近期新聞' : null].filter(Boolean))],
}, focalSymbol);
}
/** 統一實體格式、補代號、重算 flat 列表 */
export function finalizeIndustryChain(chain, focalSymbol = '') {
const focal = String(focalSymbol || '').toUpperCase();
let upstreamDetail = normalizeDetailGroups(chain.upstreamDetail, focal)
.filter(g => (g.entities || []).length > 0);
let downstreamDetail = normalizeDetailGroups(chain.downstreamDetail, focal)
.filter(g => (g.entities || []).length > 0);
if (!upstreamDetail.length && Array.isArray(chain.upstream)) {
upstreamDetail = [{ label: '上游', entities: chain.upstream.map(e => normalizeEntityItem(e, focal)), note: '' }];
}
if (!downstreamDetail.length && Array.isArray(chain.downstream)) {
downstreamDetail = [{ label: '下游', entities: chain.downstream.map(e => normalizeEntityItem(e, focal)), note: '' }];
}
let peers = (chain.peers || []).map(p => {
const item = normalizeEntityItem(p, focal);
const sym = item.symbol || (isTradableSymbol(String(p)) ? String(p).toUpperCase() : null);
return isTradableSymbol(sym) ? sym : null;
}).filter(Boolean);
peers = [...new Set(peers)].filter(p => p !== focal).slice(0, 14);
const flatUp = upstreamDetail.flatMap(g => (g.entities || []).map(e => e.name)).filter(Boolean);
const flatDown = downstreamDetail.flatMap(g => (g.entities || []).map(e => e.name)).filter(Boolean);
return {
...chain,
upstream: flatUp.length ? flatUp : chain.upstream,
downstream: flatDown.length ? flatDown : chain.downstream,
upstreamDetail,
downstreamDetail,
peers,
searches: [],
};
}
const SUPPLIER_GROUP_RE = /供應|10-K|新聞|產業常見|合作/i;
const CUSTOMER_GROUP_RE = /客戶|購買|買方|OEM|ODM|伺服器|雲端|hyperscale|需求|10-K|新聞|產業/i;
function groupKey(g) {
return String(g?.label || '').trim();
}
function hasTradableEntity(g, focalSymbol = '') {
return (g?.entities || []).some(e => {
const item = normalizeEntityItem(e, focalSymbol);
return item.symbol && isTradableSymbol(item.symbol);
});
}
function isNamedSupplierGroup(g, focalSymbol = '') {
return SUPPLIER_GROUP_RE.test(g?.label || '') || hasTradableEntity(g, focalSymbol);
}
function isNamedCustomerGroup(g, focalSymbol = '') {
return CUSTOMER_GROUP_RE.test(g?.label || '') || hasTradableEntity(g, focalSymbol);
}
function dedupeGroups(groups) {
const out = [];
const seen = new Set();
for (const g of groups || []) {
const k = groupKey(g);
if (!k || seen.has(k)) continue;
seen.add(k);
out.push(g);
}
return out;
}
/** 合併 AI 產業鏈時保留已抓到的供應商/客戶分組,避免被泛稱覆蓋 */
export function mergeEnrichedChain(base = {}, enriched = {}, focalSymbol = '') {
const bUp = base.upstreamDetail || [];
const bDown = base.downstreamDetail || [];
const eUp = enriched.upstreamDetail || enriched.upstream || [];
const eDown = enriched.downstreamDetail || enriched.downstream || [];
const keepSuppliers = bUp.filter(g => isNamedSupplierGroup(g, focalSymbol));
const keepCustomers = bDown.filter(g => isNamedCustomerGroup(g, focalSymbol));
const eUpList = Array.isArray(eUp) ? eUp : [];
const eDownList = Array.isArray(eDown) ? eDown : [];
let upstreamDetail = dedupeGroups([
...keepSuppliers,
...eUpList.filter(g => !keepSuppliers.some(k => groupKey(k) === groupKey(g))),
]);
let downstreamDetail = dedupeGroups([
...keepCustomers,
...eDownList.filter(g => !keepCustomers.some(k => groupKey(k) === groupKey(g))),
]);
if (!upstreamDetail.length && eUpList.length) upstreamDetail = eUpList;
if (!downstreamDetail.length && eDownList.length) downstreamDetail = eDownList;
let chain = {
...base,
...enriched,
upstreamDetail,
downstreamDetail,
peers: [...new Set([...(base.peers || []), ...(enriched.peers || [])])],
tenKExcerpt: sanitizeChainExcerpt(enriched.tenKExcerpt || base.tenKExcerpt),
chainSources: [...new Set([...(base.chainSources || []), ...(enriched.chainSources || [])])],
};
chain = layoutPeersIntoGrid(chain, focalSymbol);
return finalizeIndustryChain(chain, focalSymbol);
}
/** 同業代號放進上游欄「同業/競爭」分組,不再堆在格子下方 */
export function layoutPeersIntoGrid(chain, focalSymbol = '') {
const focal = String(focalSymbol || '').toUpperCase();
const peers = (chain.peers || [])
.map(p => String(p).toUpperCase())
.filter(p => isTradableSymbol(p) && p !== focal);
if (!peers.length) return { ...chain, peers: [] };
const inGrid = new Set();
for (const g of [...(chain.upstreamDetail || []), ...(chain.downstreamDetail || [])]) {
for (const e of g.entities || []) {
const k = (e.symbol || e.name || '').toUpperCase();
if (k) inGrid.add(k);
}
}
const peerEntities = peers
.filter(sym => !inGrid.has(sym))
.map(sym => normalizeEntityItem(sym, focal));
if (!peerEntities.length) return { ...chain, peers: [] };
const upstreamDetail = [...(chain.upstreamDetail || [])];
const peerLabel = '同業/競爭';
const exist = upstreamDetail.find(g => groupKey(g) === peerLabel);
if (exist) {
const have = new Set((exist.entities || []).map(e => (e.symbol || e.name || '').toUpperCase()));
for (const e of peerEntities) {
const k = (e.symbol || e.name || '').toUpperCase();
if (k && !have.has(k)) { exist.entities.push(e); have.add(k); }
}
} else {
upstreamDetail.push({ label: peerLabel, entities: peerEntities, note: '同業標的' });
}
return { ...chain, upstreamDetail, peers: [] };
}
export function sanitizeChainExcerpt(text) {
const t = String(text || '').trim();
if (!t || t.length < 50) return null;
if (/^nvda-\d|000\d{7,}|\bFY\s+false\b/i.test(t.slice(0, 120))) return null;
return t.slice(0, 480);
}