finance-dashboard/lib/companyintel-sources.js

338 lines
13 KiB
JavaScript
Raw Permalink Normal View History

2026-06-04 09:32:28 +00:00
// 公司研究多來源抓取台灣國際新聞、簡介、10-K 供應鏈線索、管理層動態)
import { yahooQuoteSummary, yahooFinanceSearchNews } from './yahoo-session.js';
import {
cleanNewsPlain, cleanGoogleNewsTitle, parseGoogleRssDescription, normalizeNewsItem,
} from './news-text.js';
const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124 Safari/537.36';
const SEC_UA = 'EmmyInvestDashboard/1.0 (personal learning tool; contact@example.com)';
async function text(url, headers = {}, ms = 14000) {
const ctrl = new AbortController();
const timer = setTimeout(() => ctrl.abort(), ms);
try {
const res = await fetch(url, { headers: { 'User-Agent': UA, ...headers }, signal: ctrl.signal });
if (!res.ok) throw new Error(`HTTP ${res.status}`);
return await res.text();
} finally { clearTimeout(timer); }
}
async function json(url, headers = {}, ms = 14000) {
return JSON.parse(await text(url, { Accept: 'application/json,text/plain,*/*', ...headers }, ms));
}
const strip = (s) => cleanNewsPlain(s);
const tag = (block, name) => block.match(new RegExp(`<${name}[^>]*>([\\s\\S]*?)<\\/${name}>`, 'i'))?.[1]?.trim() || '';
function parseGoogleRss(xml, region, limit = 12) {
const items = [...String(xml || '').matchAll(/<item>([\s\S]*?)<\/item>/gi)]
.map(m => m[1])
.slice(0, limit);
return items.map(block => {
const title = cleanGoogleNewsTitle(tag(block, 'title'));
const link = tag(block, 'link') || (block.match(/<link[^>]*>([^<]+)<\/link>/i)?.[1] || '').trim();
const pub = tag(block, 'pubDate');
const { anchorText, fontPub } = parseGoogleRssDescription(tag(block, 'description'));
const sourceName = cleanNewsPlain(tag(block, 'source'));
const publisher = sourceName || fontPub || 'Google 新聞';
let description = '';
if (anchorText && anchorText !== title && anchorText.length > 6 && !/news\.google\.com/i.test(anchorText)) {
description = anchorText;
}
return normalizeNewsItem({
title,
titleZh: title,
description: description.slice(0, 400),
descriptionZh: description.slice(0, 400),
url: link,
publisher,
created: pub ? new Date(pub).toISOString().slice(0, 10) : null,
region,
source: region === 'tw' ? 'Google 新聞(台灣)' : 'Google 新聞(國際)',
});
}).filter(n => n.titleZh && n.url);
}
export async function fetchTaiwanNews(symbol, companyName) {
const queries = [
/NVDA/i.test(symbol) ? '輝達' : null,
`${symbol} 台股`,
`${symbol} 美股`,
companyName && /[\u4e00-\u9fff]/.test(companyName) ? companyName : null,
].filter(Boolean);
const seen = new Set();
const out = [];
for (const q of queries) {
try {
const url = `https://news.google.com/rss/search?q=${encodeURIComponent(q)}&hl=zh-TW&gl=TW&ceid=TW:zh-Hant`;
const xml = await text(url, { Accept: 'application/rss+xml, application/xml, text/xml, */*' }, 10000);
for (const item of parseGoogleRss(xml, 'tw', 15)) {
const key = item.url;
if (seen.has(key)) continue;
seen.add(key);
out.push(item);
}
} catch { /* next query */ }
if (out.length >= 12) break;
}
return out.slice(0, 12);
}
export async function fetchGlobalNews(symbol) {
const out = [];
const seen = new Set();
try {
const yNews = await yahooFinanceSearchNews(symbol, 14);
for (const n of yNews) {
const item = normalizeNewsItem({
title: n.title,
titleZh: n.title,
description: strip(n.summary || ''),
descriptionZh: strip(n.summary || ''),
url: n.link,
publisher: n.publisher || 'Yahoo Finance',
created: n.providerPublishTime ? new Date(n.providerPublishTime * 1000).toISOString().slice(0, 10) : null,
region: 'global',
source: 'Yahoo Finance',
});
if (item.url && !seen.has(item.url)) { seen.add(item.url); out.push(item); }
}
} catch { /* */ }
try {
const y = await json(`https://query1.finance.yahoo.com/v1/finance/search?q=${encodeURIComponent(symbol)}&newsCount=12&quotesCount=0`);
for (const n of y.news || []) {
const item = normalizeNewsItem({
title: n.title,
titleZh: n.title,
description: strip(n.summary || ''),
descriptionZh: strip(n.summary || ''),
url: n.link,
publisher: n.publisher || 'Yahoo Finance',
created: n.providerPublishTime ? new Date(n.providerPublishTime * 1000).toISOString().slice(0, 10) : null,
region: 'global',
source: 'Yahoo Finance',
});
if (item.url && !seen.has(item.url)) { seen.add(item.url); out.push(item); }
}
} catch { /* */ }
for (const q of [`${symbol} stock`, `${symbol} earnings CEO`]) {
try {
const url = `https://news.google.com/rss/search?q=${encodeURIComponent(q)}&hl=en-US&gl=US&ceid=US:en`;
const xml = await text(url, {}, 10000);
for (const item of parseGoogleRss(xml, 'global', 10)) {
if (seen.has(item.url)) continue;
seen.add(item.url);
out.push(item);
}
} catch { /* */ }
if (out.length >= 14) break;
}
try {
const d = await json(`https://api.nasdaq.com/api/news/topic/articlebysymbol?q=${encodeURIComponent(symbol)}|stocks&offset=0&limit=8&fallback=true`, {
Accept: 'application/json', Origin: 'https://www.nasdaq.com', Referer: 'https://www.nasdaq.com/',
});
for (const r of d?.data?.rows || []) {
const url = r.url ? (r.url.startsWith('http') ? r.url : `https://www.nasdaq.com${r.url}`) : null;
if (!url || seen.has(url)) continue;
seen.add(url);
out.push(normalizeNewsItem({
title: r.title,
titleZh: r.title,
description: strip(r.description || ''),
descriptionZh: strip(r.description || ''),
url,
publisher: r.publisher || 'Nasdaq',
created: r.created || r.ago,
region: 'global',
source: 'Nasdaq',
}));
}
} catch { /* */ }
return out.slice(0, 14);
}
let _tickerMap = null;
async function tickerToCik(symbol) {
if (!_tickerMap) {
const d = await json('https://www.sec.gov/files/company_tickers.json', { 'User-Agent': SEC_UA });
_tickerMap = {};
for (const k of Object.keys(d)) _tickerMap[String(d[k].ticker).toUpperCase()] = { cik: String(d[k].cik_str).padStart(10, '0'), name: d[k].title };
}
return _tickerMap[symbol] || null;
}
export async function fetchCompanyProfileExtended(symbol, seed = {}) {
if (seed.longBusinessSummary && seed.sector) {
return {
symbol,
longBusinessSummary: seed.longBusinessSummary,
website: seed.website || null,
sector: seed.sector,
industry: seed.industry || null,
country: seed.country || null,
employees: seed.fullTimeEmployees ?? null,
peers: seed.peers || [],
source: seed.source || 'Yahoo assetProfile',
};
}
let profile = { symbol, longBusinessSummary: null, website: null, sector: null, industry: null, country: null, employees: null, peers: [] };
try {
const d = await yahooQuoteSummary(symbol, 'assetProfile,summaryProfile,peer');
const p = d?.assetProfile || {};
const sp = d?.summaryProfile || {};
const peers = (d?.peer?.symbols || [])
.map(s => String(s).split('.').pop()?.toUpperCase()).filter(s => s && s !== symbol);
profile = {
symbol,
longBusinessSummary: p.longBusinessSummary || sp.longBusinessSummary || null,
website: p.website || sp.website || null,
sector: p.sector || sp.sector || null,
industry: p.industry || sp.industry || null,
country: p.country || sp.country || null,
employees: p.fullTimeEmployees ?? sp.fullTimeEmployees ?? null,
peers: [...new Set(peers)].slice(0, 12),
source: 'Yahoo quoteSummary',
};
} catch { /* */ }
return profile;
}
function extractNamedEntities(section) {
const names = new Set();
const patterns = [
/(?:customers?|clients?|suppliers?|competitors?|partners?)[^.]{0,400}/gi,
/\b([A-Z][A-Za-z0-9&.\- ]{2,40}(?:Inc\.|Corp\.|Corporation|Ltd\.|LLC|Co\.))/g,
];
for (const re of patterns) {
for (const m of section.matchAll(re)) {
const chunk = m[1] || m[0];
const hits = chunk.match(/\b([A-Z][A-Za-z0-9&.\- ]{2,35}(?:Inc\.|Corp\.|Corporation|Ltd\.|LLC|Co\.))/g) || [];
for (const h of hits) {
const n = h.trim();
if (n.length > 3 && n.length < 50) names.add(n);
}
}
}
return [...names].slice(0, 15);
}
function extract10kSuppliers(plain) {
const names = new Set();
const chunks = [
plain.match(/(?:suppliers?|supply\s+chain|sole\s+supplier|third[- ]party\s+manufactur)[^.]{0,2000}/gi) || [],
plain.match(/(?:we\s+(?:rely|depend)\s+(?:on|upon)\s+)[^.]{0,800}/gi) || [],
plain.match(/(?:contract\s+manufactur|foundry)[^.]{0,1200}/gi) || [],
].flat();
for (const block of chunks) {
for (const n of extractNamedEntities(block)) names.add(n);
for (const m of block.matchAll(/\b(TSMC|Taiwan Semiconductor|Samsung|SK\s*Hynix|Micron|ASML|Synopsys|Cadence|Foxconn|Hon\s*Hai)\b/gi)) {
names.add(m[1].trim());
}
}
return [...names].slice(0, 18);
}
function extract10kCustomers(plain) {
const names = new Set();
const chunks = plain.match(/(?:major\s+customers?|principal\s+customers?|customers?\s+include|accounted\s+for\s+\d+%)[^.]{0,2000}/gi) || [];
for (const block of chunks) {
for (const n of extractNamedEntities(block)) names.add(n);
for (const m of block.matchAll(/\b(Microsoft|Amazon|Google|Alphabet|Meta|Apple|Tesla|Oracle)\b/gi)) {
names.add(m[1].trim());
}
for (const m of block.matchAll(/\b(Dell\s+Technologies|Hewlett[\s-]?Packard\s+Enterprise|Super\s*Micro\s+Computer|Lenovo|Cisco)\b/gi)) {
names.add(m[1].trim());
}
}
return [...names].slice(0, 18);
}
export async function fetch10kChainHints(symbol) {
const hit = await tickerToCik(symbol);
if (!hit) return { excerpt: null, customers: [], suppliers: [], competitors: [] };
const sub = await json(`https://data.sec.gov/submissions/CIK${hit.cik}.json`, { 'User-Agent': SEC_UA });
const f = sub.filings?.recent || {};
let accn = null;
let primary = null;
for (let i = 0; i < (f.form || []).length; i++) {
if (f.form[i] === '10-K') {
accn = f.accessionNumber[i];
primary = f.primaryDocument?.[i];
break;
}
}
if (!accn || !primary) return { excerpt: null, customers: [], suppliers: [], competitors: [] };
const accNo = accn.replace(/-/g, '');
const url = `https://www.sec.gov/Archives/edgar/data/${Number(hit.cik)}/${accNo}/${primary}`;
const html = await text(url, { 'User-Agent': SEC_UA }, 28000);
const plain = strip(html).slice(0, 180000);
const custSec = plain.match(/(?:major customers?|principal customers?|customers? include)[^.]{0,1200}/i)?.[0] || '';
const supSec = plain.match(/(?:suppliers?|supply chain|manufacturing)[^.]{0,1200}/i)?.[0] || '';
const compSec = plain.match(/(?:competition|competitors?)[^.]{0,1200}/i)?.[0] || '';
const bizSec = plain.match(/(?:business overview|description of business)[^.]{0,2500}/i)?.[0] || plain.slice(0, 2500);
const customers = [...new Set([...extractNamedEntities(custSec), ...extract10kCustomers(plain)])];
const suppliers = [...new Set([...extractNamedEntities(supSec), ...extract10kSuppliers(plain)])];
return {
excerpt: bizSec.slice(0, 2000),
customers,
suppliers,
competitors: extractNamedEntities(compSec),
source: 'SEC 10-K',
filingUrl: url,
companyName: hit.name,
};
}
const MGMT_KW = /chief executive|ceo|cfo|coo|president|board|director|executive|resign|appoint|compensation|guidance|layoff|restructur|merger|acquisition|investigation|subpoena|執行長|財務長|董事|人事|裁員|併購|收購|指引|調查/i;
export function filterManagementNews(news) {
return (news || []).filter(n => MGMT_KW.test(`${n.title} ${n.description}`)).slice(0, 10);
}
export async function fetchRecent8kHeadlines(symbol) {
const hit = await tickerToCik(symbol);
if (!hit) return [];
const sub = await json(`https://data.sec.gov/submissions/CIK${hit.cik}.json`, { 'User-Agent': SEC_UA });
const f = sub.filings?.recent || {};
const out = [];
for (let i = 0; i < (f.form || []).length && out.length < 8; i++) {
if (!/^8-K/i.test(f.form[i])) continue;
out.push({
form: f.form[i],
filedDate: f.filingDate[i],
description: f.primaryDocDescription?.[i] || '',
accession: f.accessionNumber[i],
url: `https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=${hit.cik}&type=8-K&dateb=&owner=include&count=40`,
});
}
return out;
}
export async function gatherIntelSources(symbol, profile = {}) {
symbol = String(symbol || '').trim().toUpperCase();
const [profileExt, hints, headlines] = await Promise.all([
fetchCompanyProfileExtended(symbol, profile).catch(() => ({})),
fetch10kChainHints(symbol).catch(() => ({})),
fetchRecent8kHeadlines(symbol).catch(() => []),
]);
const companyName = profile.name || profile.companyName || hints?.companyName || null;
const [newsTw, newsGlobal] = await Promise.all([
fetchTaiwanNews(symbol, companyName).catch(() => []),
fetchGlobalNews(symbol).catch(() => []),
]);
const mgmtRaw = filterManagementNews([...newsTw, ...newsGlobal]);
return {
symbol,
gatheredAt: new Date().toISOString(),
profileExt,
hints,
headlines8k: headlines,
newsTw,
newsGlobal,
managementNewsRaw: mgmtRaw,
companyName: companyName || hints?.companyName || profileExt?.symbol,
};
}