// 產業鏈:新聞萃取、代號解析、實體可點擊結構 const UP_KW = /供應商|供應|上游|代工|材料|零件|設備|晶圓|封裝|HBM|EDA|IP|vendor|supplier|supply|manufactur|foundry|TSMC/i; const DOWN_KW = /客戶|下游|訂單|採購|部署|採用|合作|需求|customer|deploy|adopt|partner|cloud|data\s*center|hyperscale|server|伺服器|OEM|ODM|rack/i; const OEM_BUYER_CTX = /server|伺服器|OEM|ODM|rack|AI\s*server|GPU\s*server|AI\s*infrastructure|資料中心/i; const GPU_NEWS_CTX = /NVIDIA|NVDA|輝達|英偉達|GPU|Blackwell|H100|B200|accelerator/i; const DOWNSTREAM_BUYER_SYMS = new Set([ 'DELL', 'HPE', 'HPQ', 'SMCI', 'CSCO', 'MSFT', 'AMZN', 'GOOGL', 'META', 'ORCL', 'AAPL', 'TSLA', '2317.TW', '2382.TW', 'LENOVO', ]); /** 公司名/中文簡稱 → 可切換代號 */ const NAME_ALIASES = { 台積電: 'TSM', 台积电: 'TSM', TSMC: 'TSM', 'Taiwan Semiconductor': 'TSM', 輝達: 'NVDA', 英偉達: 'NVDA', NVIDIA: 'NVDA', 超微: 'AMD', AMD: 'AMD', 高通: 'QCOM', Qualcomm: 'QCOM', 博通: 'AVGO', Broadcom: 'AVGO', 聯發科: '2454.TW', MediaTek: '2454.TW', 日月光: '3711.TW', ASE: '3711.TW', 鴻海: '2317.TW', Foxconn: '2317.TW', 富士康: '2317.TW', 廣達: '2382.TW', Quanta: '2382.TW', 聯電: 'UMC', 'United Microelectronics': 'UMC', 台塑: '1301.TW', 台塑化: '6505.TW', 中石化: '6505.TW', 中油: '6505.TW', 微軟: 'MSFT', Microsoft: 'MSFT', 谷歌: 'GOOGL', Google: 'GOOGL', Alphabet: 'GOOGL', 亞馬遜: 'AMZN', Amazon: 'AMZN', 蘋果: 'AAPL', Apple: 'AAPL', Meta: 'META', 臉書: 'META', Facebook: 'META', 特斯拉: 'TSLA', Tesla: 'TSLA', Synopsys: 'SNPS', Cadence: 'CDNS', ASML: 'ASML', 'Applied Materials': 'AMAT', Lam: 'LRCX', KLA: 'KLAC', 美光: 'MU', Micron: 'MU', 三星: '005930.KS', Samsung: '005930.KS', 英特爾: 'INTC', Intel: 'INTC', 甲骨文: 'ORCL', Oracle: 'ORCL', 思科: 'CSCO', Cisco: 'CSCO', 戴爾: 'DELL', Dell: 'DELL', 惠普: 'HPE', HP: 'HPQ', 'Hewlett Packard Enterprise': 'HPE', 'Hewlett-Packard Enterprise': 'HPE', 超微電腦: 'SMCI', 'Super Micro': 'SMCI', Supermicro: 'SMCI', 'Dell Technologies': 'DELL', 亞馬遜雲: 'AMZN', AWS: 'AMZN', 微軟Azure: 'MSFT', Azure: 'MSFT', }; function isUsTicker(s) { return /^[A-Z]{1,5}$/.test(s); } function isTwTicker(s) { return /^\d{4}(\.TW)?$/i.test(s); } const TICKER_BLOCKLIST = new Set([ 'AI', 'IT', 'US', 'EU', 'UK', 'CEO', 'CFO', 'COO', 'GPU', 'CPU', 'CSP', 'API', 'EPS', 'SEC', 'IPO', 'ETF', 'USD', 'EUR', 'GBP', 'JPY', 'CNY', 'TWD', 'FY', 'QOQ', 'YOY', 'AND', 'THE', 'FOR', 'INC', ]); export function isTradableSymbol(sym) { const s = String(sym || '').toUpperCase().trim(); if (!s || TICKER_BLOCKLIST.has(s)) return false; if (isUsTicker(s)) return true; if (isTwTicker(s)) return true; if (/^\d{6}\.KS$/i.test(s)) return true; return false; } export function resolveEntitySymbol(raw, focalSymbol = '') { const text = String(raw || '').trim(); if (!text || text === '待查證' || /^原物料|終端|通路|待查/i.test(text)) return null; const focal = String(focalSymbol || '').toUpperCase(); const paren = text.match(/\(([A-Z]{1,5})\)/); if (paren && paren[1] !== focal) return paren[1]; const dollar = text.match(/\$([A-Z]{1,5})\b/); if (dollar && dollar[1] !== focal) return dollar[1]; if (isUsTicker(text) && text !== focal) return text; if (isTwTicker(text)) { const tw = text.replace(/\.tw$/i, ''); return `${tw}.TW`; } if (NAME_ALIASES[text]) return NAME_ALIASES[text]; const stripped = text.replace(/\s+(Inc\.|Corp\.|Corporation|Ltd\.|LLC|Co\.|公司|股份|集團)/gi, '').trim(); if (NAME_ALIASES[stripped]) return NAME_ALIASES[stripped]; for (const [name, sym] of Object.entries(NAME_ALIASES)) { if (name.length < 2) continue; if (text.includes(name) && sym !== focal) return sym; } const inc = text.match(/^([A-Za-z][A-Za-z0-9&.\- ]{1,30})(?:\s+Inc\.|\s+Corp\.)/); if (inc) { const base = inc[1].trim(); if (NAME_ALIASES[base]) return NAME_ALIASES[base]; const words = base.split(/\s+/); const last = words[words.length - 1]; if (last && NAME_ALIASES[last]) return NAME_ALIASES[last]; } return null; } export function normalizeEntityItem(raw, focalSymbol = '') { if (raw && typeof raw === 'object') { const name = String(raw.name || raw.label || raw.symbol || '').trim(); const symbol = raw.symbol || resolveEntitySymbol(name, focalSymbol) || resolveEntitySymbol(raw.symbol, focalSymbol); return { name: name || symbol || '—', symbol: symbol || null }; } const name = String(raw || '').trim(); const symbol = resolveEntitySymbol(name, focalSymbol); return { name, symbol }; } function normalizeDetailGroups(groups, focalSymbol) { return (groups || []).map(g => { const entities = (g.entities || []).map(e => normalizeEntityItem(e, focalSymbol)); return { ...g, entities }; }); } function groupFromItems(items, label, note) { const ents = items.filter(i => i.name).slice(0, 10); if (!ents.length) return null; return { label, entities: ents, note }; } /** 從近期新聞標題/摘要抽出上下游相關公司 */ export function extractChainFromNews(newsList = [], focalSymbol = '') { const focal = String(focalSymbol || '').toUpperCase(); const upstream = []; const downstream = []; const related = []; const seen = new Set([focal]); const add = (bucket, item) => { const sym = item.symbol || item.name; const key = (sym || '').toUpperCase(); if (!key || seen.has(key)) return; seen.add(key); bucket.push(item); }; for (const n of newsList) { const text = `${n.title || ''} ${n.titleZh || ''} ${n.description || ''} ${n.descriptionZh || ''}`; if (!text.trim()) continue; const up = UP_KW.test(text); const down = DOWN_KW.test(text); for (const m of text.matchAll(/\$([A-Z]{1,5})\b|\(([A-Z]{1,5})\)/g)) { const sym = (m[1] || m[2] || '').toUpperCase(); if (!isUsTicker(sym) || sym === focal) continue; const item = normalizeEntityItem(sym, focal); if (up && !down) add(upstream, item); else if (down && !up) add(downstream, item); else add(related, item); } for (const [name, sym] of Object.entries(NAME_ALIASES)) { if (!text.includes(name) || sym === focal) continue; const item = normalizeEntityItem(name, focal); item.symbol = sym; if (up && !down) add(upstream, item); else if (down && !up) add(downstream, item); else add(related, item); } } for (const n of newsList) { const text = `${n.title || ''} ${n.titleZh || ''} ${n.description || ''} ${n.descriptionZh || ''}`; if (!/供應商|供货商|supplier|vendor|foundry|代工/i.test(text)) continue; for (const [name, sym] of Object.entries(NAME_ALIASES)) { if (!text.includes(name) || sym === focal) continue; add(upstream, normalizeEntityItem(name, focal)); } } for (const n of newsList) { const text = `${n.title || ''} ${n.titleZh || ''} ${n.description || ''} ${n.descriptionZh || ''}`; const buyerCtx = OEM_BUYER_CTX.test(text) || (GPU_NEWS_CTX.test(text) && /Dell|HPE|Super|伺服器|server|OEM/i.test(text)); if (!buyerCtx) continue; for (const [name, sym] of Object.entries(NAME_ALIASES)) { if (!DOWNSTREAM_BUYER_SYMS.has(sym) || sym === focal || !text.includes(name)) continue; const item = normalizeEntityItem(name, focal); item.symbol = sym; add(downstream, item); } } return { upstream, downstream, related }; } export const SECTOR_SUPPLIER_TICKERS = { semiconductor: ['TSM', 'ASML', 'AMAT', 'LRCX', 'KLAC', 'MU', 'SNPS', 'CDNS'], software: ['MSFT', 'AMZN', 'GOOGL'], }; /** GPU/加速器晶片常見下游:OEM 伺服器廠 + 雲端買方 */ export const SECTOR_DOWNSTREAM_BUYERS = { semiconductor: [ { label: 'AI 伺服器 OEM', tickers: ['DELL', 'HPE', 'SMCI', 'CSCO'], note: '採購 GPU 組裝 AI 伺服器再銷售' }, { label: '雲端與大型企業', tickers: ['MSFT', 'AMZN', 'GOOGL', 'META', 'ORCL'], note: '資料中心與 AI 工作負載' }, ], }; const SYMBOL_DOWNSTREAM_SECTOR = { NVDA: 'semiconductor', AMD: 'semiconductor', INTC: 'semiconductor', MRVL: 'semiconductor', }; function isGpuSemiconductor(symbol, profile = {}) { const sym = String(symbol || '').toUpperCase(); if (SYMBOL_DOWNSTREAM_SECTOR[sym]) return true; const ind = `${profile.industry || ''} ${profile.sector || ''}`.toLowerCase(); return /semiconductor|chip/i.test(ind) && /graphic|gpu|accelerat|comput|processor|display/i.test(ind); } export function inferDownstreamGroups(symbol, profile = {}) { const key = SYMBOL_DOWNSTREAM_SECTOR[String(symbol || '').toUpperCase()] || (isGpuSemiconductor(symbol, profile) ? 'semiconductor' : null); if (!key || !SECTOR_DOWNSTREAM_BUYERS[key]) return []; return SECTOR_DOWNSTREAM_BUYERS[key].map(b => ({ label: b.label, entities: b.tickers.map(code => ({ name: code, symbol: code, confidence: 'medium', source: 'sector_downstream', })), note: b.note, confidence: 'medium', })); } function downstreamHasTradableBuyers(detail, focalSymbol) { const focal = String(focalSymbol || '').toUpperCase(); return (detail || []).some(g => (g.entities || []).some(e => { const item = normalizeEntityItem(e, focal); return item.symbol && isTradableSymbol(item.symbol) && item.symbol !== focal; }), ); } function isGenericDownstreamGroup(g, focalSymbol) { const ents = g?.entities || []; if (!ents.length) return true; return ents.every(e => { const item = normalizeEntityItem(e, focalSymbol); return !item.symbol || !isTradableSymbol(item.symbol); }); } export function ensureDownstreamBuyers(chain, symbol, profile = {}) { let next = chain || {}; if (!downstreamHasTradableBuyers(next.downstreamDetail, symbol)) { const inferred = inferDownstreamGroups(symbol, profile); if (inferred.length) { next = { ...next, downstreamDetail: dedupeGroups([...inferred, ...(next.downstreamDetail || [])]), chainSources: [...new Set([...(next.chainSources || []), '產業常見購買方'])], }; } } if (downstreamHasTradableBuyers(next.downstreamDetail, symbol)) { next = { ...next, downstreamDetail: (next.downstreamDetail || []).filter(g => !isGenericDownstreamGroup(g, symbol)), }; } return finalizeIndustryChain(next, symbol); } /** 一律追加供應商/客戶名單(去重,不覆蓋既有分組) */ export function appendDetailNames(detail, names, label, note, focalSymbol = '') { const incoming = (names || []).map(n => normalizeEntityItem(n, focalSymbol)); return mergeDetailGroups(detail, incoming, label, note); } function mergeDetailGroups(existing, incoming, label, note) { const out = [...(existing || [])]; if (!incoming.length) return out; const flat = out.flatMap(g => g.entities || []); const have = new Set(flat.map(e => (e.symbol || e.name || '').toUpperCase())); const fresh = incoming.filter(e => { const k = (e.symbol || e.name || '').toUpperCase(); return k && !have.has(k); }); if (!fresh.length) return out; out.unshift({ label, entities: fresh, note }); return out.slice(0, 8); } /** 合併新聞萃取進產業鏈 */ export function mergeNewsIntoChain(chain, newsList, focalSymbol) { const base = chain || {}; const { upstream, downstream, related } = extractChainFromNews(newsList, focalSymbol); let upstreamDetail = mergeDetailGroups(base.upstreamDetail, upstream, '供應商/合作(新聞)', '近期公開新聞'); let downstreamDetail = mergeDetailGroups(base.downstreamDetail, downstream, '購買方(新聞)', '近期公開新聞'); let peers = [...(base.peers || [])]; for (const r of related) { const sym = r.symbol; if (sym && !peers.includes(sym) && sym !== focalSymbol) peers.push(sym); } peers = peers.filter(p => String(p).toUpperCase() !== focalSymbol).slice(0, 14); return finalizeIndustryChain({ ...base, upstreamDetail, downstreamDetail, peers, chainSources: [...new Set([...(base.chainSources || []), upstream.length || downstream.length ? '近期新聞' : null].filter(Boolean))], }, focalSymbol); } /** 統一實體格式、補代號、重算 flat 列表 */ export function finalizeIndustryChain(chain, focalSymbol = '') { const focal = String(focalSymbol || '').toUpperCase(); let upstreamDetail = normalizeDetailGroups(chain.upstreamDetail, focal) .filter(g => (g.entities || []).length > 0); let downstreamDetail = normalizeDetailGroups(chain.downstreamDetail, focal) .filter(g => (g.entities || []).length > 0); if (!upstreamDetail.length && Array.isArray(chain.upstream)) { upstreamDetail = [{ label: '上游', entities: chain.upstream.map(e => normalizeEntityItem(e, focal)), note: '' }]; } if (!downstreamDetail.length && Array.isArray(chain.downstream)) { downstreamDetail = [{ label: '下游', entities: chain.downstream.map(e => normalizeEntityItem(e, focal)), note: '' }]; } let peers = (chain.peers || []).map(p => { const item = normalizeEntityItem(p, focal); const sym = item.symbol || (isTradableSymbol(String(p)) ? String(p).toUpperCase() : null); return isTradableSymbol(sym) ? sym : null; }).filter(Boolean); peers = [...new Set(peers)].filter(p => p !== focal).slice(0, 14); const flatUp = upstreamDetail.flatMap(g => (g.entities || []).map(e => e.name)).filter(Boolean); const flatDown = downstreamDetail.flatMap(g => (g.entities || []).map(e => e.name)).filter(Boolean); return { ...chain, upstream: flatUp.length ? flatUp : chain.upstream, downstream: flatDown.length ? flatDown : chain.downstream, upstreamDetail, downstreamDetail, peers, searches: [], }; } const SUPPLIER_GROUP_RE = /供應|10-K|新聞|產業常見|合作/i; const CUSTOMER_GROUP_RE = /客戶|購買|買方|OEM|ODM|伺服器|雲端|hyperscale|需求|10-K|新聞|產業/i; function groupKey(g) { return String(g?.label || '').trim(); } function hasTradableEntity(g, focalSymbol = '') { return (g?.entities || []).some(e => { const item = normalizeEntityItem(e, focalSymbol); return item.symbol && isTradableSymbol(item.symbol); }); } function isNamedSupplierGroup(g, focalSymbol = '') { return SUPPLIER_GROUP_RE.test(g?.label || '') || hasTradableEntity(g, focalSymbol); } function isNamedCustomerGroup(g, focalSymbol = '') { return CUSTOMER_GROUP_RE.test(g?.label || '') || hasTradableEntity(g, focalSymbol); } function dedupeGroups(groups) { const out = []; const seen = new Set(); for (const g of groups || []) { const k = groupKey(g); if (!k || seen.has(k)) continue; seen.add(k); out.push(g); } return out; } /** 合併 AI 產業鏈時保留已抓到的供應商/客戶分組,避免被泛稱覆蓋 */ export function mergeEnrichedChain(base = {}, enriched = {}, focalSymbol = '') { const bUp = base.upstreamDetail || []; const bDown = base.downstreamDetail || []; const eUp = enriched.upstreamDetail || enriched.upstream || []; const eDown = enriched.downstreamDetail || enriched.downstream || []; const keepSuppliers = bUp.filter(g => isNamedSupplierGroup(g, focalSymbol)); const keepCustomers = bDown.filter(g => isNamedCustomerGroup(g, focalSymbol)); const eUpList = Array.isArray(eUp) ? eUp : []; const eDownList = Array.isArray(eDown) ? eDown : []; let upstreamDetail = dedupeGroups([ ...keepSuppliers, ...eUpList.filter(g => !keepSuppliers.some(k => groupKey(k) === groupKey(g))), ]); let downstreamDetail = dedupeGroups([ ...keepCustomers, ...eDownList.filter(g => !keepCustomers.some(k => groupKey(k) === groupKey(g))), ]); if (!upstreamDetail.length && eUpList.length) upstreamDetail = eUpList; if (!downstreamDetail.length && eDownList.length) downstreamDetail = eDownList; let chain = { ...base, ...enriched, upstreamDetail, downstreamDetail, peers: [...new Set([...(base.peers || []), ...(enriched.peers || [])])], tenKExcerpt: sanitizeChainExcerpt(enriched.tenKExcerpt || base.tenKExcerpt), chainSources: [...new Set([...(base.chainSources || []), ...(enriched.chainSources || [])])], }; chain = layoutPeersIntoGrid(chain, focalSymbol); return finalizeIndustryChain(chain, focalSymbol); } /** 同業代號放進上游欄「同業/競爭」分組,不再堆在格子下方 */ export function layoutPeersIntoGrid(chain, focalSymbol = '') { const focal = String(focalSymbol || '').toUpperCase(); const peers = (chain.peers || []) .map(p => String(p).toUpperCase()) .filter(p => isTradableSymbol(p) && p !== focal); if (!peers.length) return { ...chain, peers: [] }; const inGrid = new Set(); for (const g of [...(chain.upstreamDetail || []), ...(chain.downstreamDetail || [])]) { for (const e of g.entities || []) { const k = (e.symbol || e.name || '').toUpperCase(); if (k) inGrid.add(k); } } const peerEntities = peers .filter(sym => !inGrid.has(sym)) .map(sym => normalizeEntityItem(sym, focal)); if (!peerEntities.length) return { ...chain, peers: [] }; const upstreamDetail = [...(chain.upstreamDetail || [])]; const peerLabel = '同業/競爭'; const exist = upstreamDetail.find(g => groupKey(g) === peerLabel); if (exist) { const have = new Set((exist.entities || []).map(e => (e.symbol || e.name || '').toUpperCase())); for (const e of peerEntities) { const k = (e.symbol || e.name || '').toUpperCase(); if (k && !have.has(k)) { exist.entities.push(e); have.add(k); } } } else { upstreamDetail.push({ label: peerLabel, entities: peerEntities, note: '同業標的' }); } return { ...chain, upstreamDetail, peers: [] }; } export function sanitizeChainExcerpt(text) { const t = String(text || '').trim(); if (!t || t.length < 50) return null; if (/^nvda-\d|000\d{7,}|\bFY\s+false\b/i.test(t.slice(0, 120))) return null; return t.slice(0, 480); }