finance-dashboard/lib/companyintel-sources.js

338 lines
13 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// 公司研究多來源抓取台灣國際新聞、簡介、10-K 供應鏈線索、管理層動態)
import { yahooQuoteSummary, yahooFinanceSearchNews } from './yahoo-session.js';
import {
cleanNewsPlain, cleanGoogleNewsTitle, parseGoogleRssDescription, normalizeNewsItem,
} from './news-text.js';
const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124 Safari/537.36';
const SEC_UA = 'EmmyInvestDashboard/1.0 (personal learning tool; contact@example.com)';
async function text(url, headers = {}, ms = 14000) {
const ctrl = new AbortController();
const timer = setTimeout(() => ctrl.abort(), ms);
try {
const res = await fetch(url, { headers: { 'User-Agent': UA, ...headers }, signal: ctrl.signal });
if (!res.ok) throw new Error(`HTTP ${res.status}`);
return await res.text();
} finally { clearTimeout(timer); }
}
async function json(url, headers = {}, ms = 14000) {
return JSON.parse(await text(url, { Accept: 'application/json,text/plain,*/*', ...headers }, ms));
}
const strip = (s) => cleanNewsPlain(s);
const tag = (block, name) => block.match(new RegExp(`<${name}[^>]*>([\\s\\S]*?)<\\/${name}>`, 'i'))?.[1]?.trim() || '';
function parseGoogleRss(xml, region, limit = 12) {
const items = [...String(xml || '').matchAll(/<item>([\s\S]*?)<\/item>/gi)]
.map(m => m[1])
.slice(0, limit);
return items.map(block => {
const title = cleanGoogleNewsTitle(tag(block, 'title'));
const link = tag(block, 'link') || (block.match(/<link[^>]*>([^<]+)<\/link>/i)?.[1] || '').trim();
const pub = tag(block, 'pubDate');
const { anchorText, fontPub } = parseGoogleRssDescription(tag(block, 'description'));
const sourceName = cleanNewsPlain(tag(block, 'source'));
const publisher = sourceName || fontPub || 'Google 新聞';
let description = '';
if (anchorText && anchorText !== title && anchorText.length > 6 && !/news\.google\.com/i.test(anchorText)) {
description = anchorText;
}
return normalizeNewsItem({
title,
titleZh: title,
description: description.slice(0, 400),
descriptionZh: description.slice(0, 400),
url: link,
publisher,
created: pub ? new Date(pub).toISOString().slice(0, 10) : null,
region,
source: region === 'tw' ? 'Google 新聞(台灣)' : 'Google 新聞(國際)',
});
}).filter(n => n.titleZh && n.url);
}
export async function fetchTaiwanNews(symbol, companyName) {
const queries = [
/NVDA/i.test(symbol) ? '輝達' : null,
`${symbol} 台股`,
`${symbol} 美股`,
companyName && /[\u4e00-\u9fff]/.test(companyName) ? companyName : null,
].filter(Boolean);
const seen = new Set();
const out = [];
for (const q of queries) {
try {
const url = `https://news.google.com/rss/search?q=${encodeURIComponent(q)}&hl=zh-TW&gl=TW&ceid=TW:zh-Hant`;
const xml = await text(url, { Accept: 'application/rss+xml, application/xml, text/xml, */*' }, 10000);
for (const item of parseGoogleRss(xml, 'tw', 15)) {
const key = item.url;
if (seen.has(key)) continue;
seen.add(key);
out.push(item);
}
} catch { /* next query */ }
if (out.length >= 12) break;
}
return out.slice(0, 12);
}
export async function fetchGlobalNews(symbol) {
const out = [];
const seen = new Set();
try {
const yNews = await yahooFinanceSearchNews(symbol, 14);
for (const n of yNews) {
const item = normalizeNewsItem({
title: n.title,
titleZh: n.title,
description: strip(n.summary || ''),
descriptionZh: strip(n.summary || ''),
url: n.link,
publisher: n.publisher || 'Yahoo Finance',
created: n.providerPublishTime ? new Date(n.providerPublishTime * 1000).toISOString().slice(0, 10) : null,
region: 'global',
source: 'Yahoo Finance',
});
if (item.url && !seen.has(item.url)) { seen.add(item.url); out.push(item); }
}
} catch { /* */ }
try {
const y = await json(`https://query1.finance.yahoo.com/v1/finance/search?q=${encodeURIComponent(symbol)}&newsCount=12&quotesCount=0`);
for (const n of y.news || []) {
const item = normalizeNewsItem({
title: n.title,
titleZh: n.title,
description: strip(n.summary || ''),
descriptionZh: strip(n.summary || ''),
url: n.link,
publisher: n.publisher || 'Yahoo Finance',
created: n.providerPublishTime ? new Date(n.providerPublishTime * 1000).toISOString().slice(0, 10) : null,
region: 'global',
source: 'Yahoo Finance',
});
if (item.url && !seen.has(item.url)) { seen.add(item.url); out.push(item); }
}
} catch { /* */ }
for (const q of [`${symbol} stock`, `${symbol} earnings CEO`]) {
try {
const url = `https://news.google.com/rss/search?q=${encodeURIComponent(q)}&hl=en-US&gl=US&ceid=US:en`;
const xml = await text(url, {}, 10000);
for (const item of parseGoogleRss(xml, 'global', 10)) {
if (seen.has(item.url)) continue;
seen.add(item.url);
out.push(item);
}
} catch { /* */ }
if (out.length >= 14) break;
}
try {
const d = await json(`https://api.nasdaq.com/api/news/topic/articlebysymbol?q=${encodeURIComponent(symbol)}|stocks&offset=0&limit=8&fallback=true`, {
Accept: 'application/json', Origin: 'https://www.nasdaq.com', Referer: 'https://www.nasdaq.com/',
});
for (const r of d?.data?.rows || []) {
const url = r.url ? (r.url.startsWith('http') ? r.url : `https://www.nasdaq.com${r.url}`) : null;
if (!url || seen.has(url)) continue;
seen.add(url);
out.push(normalizeNewsItem({
title: r.title,
titleZh: r.title,
description: strip(r.description || ''),
descriptionZh: strip(r.description || ''),
url,
publisher: r.publisher || 'Nasdaq',
created: r.created || r.ago,
region: 'global',
source: 'Nasdaq',
}));
}
} catch { /* */ }
return out.slice(0, 14);
}
let _tickerMap = null;
async function tickerToCik(symbol) {
if (!_tickerMap) {
const d = await json('https://www.sec.gov/files/company_tickers.json', { 'User-Agent': SEC_UA });
_tickerMap = {};
for (const k of Object.keys(d)) _tickerMap[String(d[k].ticker).toUpperCase()] = { cik: String(d[k].cik_str).padStart(10, '0'), name: d[k].title };
}
return _tickerMap[symbol] || null;
}
export async function fetchCompanyProfileExtended(symbol, seed = {}) {
if (seed.longBusinessSummary && seed.sector) {
return {
symbol,
longBusinessSummary: seed.longBusinessSummary,
website: seed.website || null,
sector: seed.sector,
industry: seed.industry || null,
country: seed.country || null,
employees: seed.fullTimeEmployees ?? null,
peers: seed.peers || [],
source: seed.source || 'Yahoo assetProfile',
};
}
let profile = { symbol, longBusinessSummary: null, website: null, sector: null, industry: null, country: null, employees: null, peers: [] };
try {
const d = await yahooQuoteSummary(symbol, 'assetProfile,summaryProfile,peer');
const p = d?.assetProfile || {};
const sp = d?.summaryProfile || {};
const peers = (d?.peer?.symbols || [])
.map(s => String(s).split('.').pop()?.toUpperCase()).filter(s => s && s !== symbol);
profile = {
symbol,
longBusinessSummary: p.longBusinessSummary || sp.longBusinessSummary || null,
website: p.website || sp.website || null,
sector: p.sector || sp.sector || null,
industry: p.industry || sp.industry || null,
country: p.country || sp.country || null,
employees: p.fullTimeEmployees ?? sp.fullTimeEmployees ?? null,
peers: [...new Set(peers)].slice(0, 12),
source: 'Yahoo quoteSummary',
};
} catch { /* */ }
return profile;
}
function extractNamedEntities(section) {
const names = new Set();
const patterns = [
/(?:customers?|clients?|suppliers?|competitors?|partners?)[^.]{0,400}/gi,
/\b([A-Z][A-Za-z0-9&.\- ]{2,40}(?:Inc\.|Corp\.|Corporation|Ltd\.|LLC|Co\.))/g,
];
for (const re of patterns) {
for (const m of section.matchAll(re)) {
const chunk = m[1] || m[0];
const hits = chunk.match(/\b([A-Z][A-Za-z0-9&.\- ]{2,35}(?:Inc\.|Corp\.|Corporation|Ltd\.|LLC|Co\.))/g) || [];
for (const h of hits) {
const n = h.trim();
if (n.length > 3 && n.length < 50) names.add(n);
}
}
}
return [...names].slice(0, 15);
}
function extract10kSuppliers(plain) {
const names = new Set();
const chunks = [
plain.match(/(?:suppliers?|supply\s+chain|sole\s+supplier|third[- ]party\s+manufactur)[^.]{0,2000}/gi) || [],
plain.match(/(?:we\s+(?:rely|depend)\s+(?:on|upon)\s+)[^.]{0,800}/gi) || [],
plain.match(/(?:contract\s+manufactur|foundry)[^.]{0,1200}/gi) || [],
].flat();
for (const block of chunks) {
for (const n of extractNamedEntities(block)) names.add(n);
for (const m of block.matchAll(/\b(TSMC|Taiwan Semiconductor|Samsung|SK\s*Hynix|Micron|ASML|Synopsys|Cadence|Foxconn|Hon\s*Hai)\b/gi)) {
names.add(m[1].trim());
}
}
return [...names].slice(0, 18);
}
function extract10kCustomers(plain) {
const names = new Set();
const chunks = plain.match(/(?:major\s+customers?|principal\s+customers?|customers?\s+include|accounted\s+for\s+\d+%)[^.]{0,2000}/gi) || [];
for (const block of chunks) {
for (const n of extractNamedEntities(block)) names.add(n);
for (const m of block.matchAll(/\b(Microsoft|Amazon|Google|Alphabet|Meta|Apple|Tesla|Oracle)\b/gi)) {
names.add(m[1].trim());
}
for (const m of block.matchAll(/\b(Dell\s+Technologies|Hewlett[\s-]?Packard\s+Enterprise|Super\s*Micro\s+Computer|Lenovo|Cisco)\b/gi)) {
names.add(m[1].trim());
}
}
return [...names].slice(0, 18);
}
export async function fetch10kChainHints(symbol) {
const hit = await tickerToCik(symbol);
if (!hit) return { excerpt: null, customers: [], suppliers: [], competitors: [] };
const sub = await json(`https://data.sec.gov/submissions/CIK${hit.cik}.json`, { 'User-Agent': SEC_UA });
const f = sub.filings?.recent || {};
let accn = null;
let primary = null;
for (let i = 0; i < (f.form || []).length; i++) {
if (f.form[i] === '10-K') {
accn = f.accessionNumber[i];
primary = f.primaryDocument?.[i];
break;
}
}
if (!accn || !primary) return { excerpt: null, customers: [], suppliers: [], competitors: [] };
const accNo = accn.replace(/-/g, '');
const url = `https://www.sec.gov/Archives/edgar/data/${Number(hit.cik)}/${accNo}/${primary}`;
const html = await text(url, { 'User-Agent': SEC_UA }, 28000);
const plain = strip(html).slice(0, 180000);
const custSec = plain.match(/(?:major customers?|principal customers?|customers? include)[^.]{0,1200}/i)?.[0] || '';
const supSec = plain.match(/(?:suppliers?|supply chain|manufacturing)[^.]{0,1200}/i)?.[0] || '';
const compSec = plain.match(/(?:competition|competitors?)[^.]{0,1200}/i)?.[0] || '';
const bizSec = plain.match(/(?:business overview|description of business)[^.]{0,2500}/i)?.[0] || plain.slice(0, 2500);
const customers = [...new Set([...extractNamedEntities(custSec), ...extract10kCustomers(plain)])];
const suppliers = [...new Set([...extractNamedEntities(supSec), ...extract10kSuppliers(plain)])];
return {
excerpt: bizSec.slice(0, 2000),
customers,
suppliers,
competitors: extractNamedEntities(compSec),
source: 'SEC 10-K',
filingUrl: url,
companyName: hit.name,
};
}
const MGMT_KW = /chief executive|ceo|cfo|coo|president|board|director|executive|resign|appoint|compensation|guidance|layoff|restructur|merger|acquisition|investigation|subpoena|執行長|財務長|董事|人事|裁員|併購|收購|指引|調查/i;
export function filterManagementNews(news) {
return (news || []).filter(n => MGMT_KW.test(`${n.title} ${n.description}`)).slice(0, 10);
}
export async function fetchRecent8kHeadlines(symbol) {
const hit = await tickerToCik(symbol);
if (!hit) return [];
const sub = await json(`https://data.sec.gov/submissions/CIK${hit.cik}.json`, { 'User-Agent': SEC_UA });
const f = sub.filings?.recent || {};
const out = [];
for (let i = 0; i < (f.form || []).length && out.length < 8; i++) {
if (!/^8-K/i.test(f.form[i])) continue;
out.push({
form: f.form[i],
filedDate: f.filingDate[i],
description: f.primaryDocDescription?.[i] || '',
accession: f.accessionNumber[i],
url: `https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=${hit.cik}&type=8-K&dateb=&owner=include&count=40`,
});
}
return out;
}
export async function gatherIntelSources(symbol, profile = {}) {
symbol = String(symbol || '').trim().toUpperCase();
const [profileExt, hints, headlines] = await Promise.all([
fetchCompanyProfileExtended(symbol, profile).catch(() => ({})),
fetch10kChainHints(symbol).catch(() => ({})),
fetchRecent8kHeadlines(symbol).catch(() => []),
]);
const companyName = profile.name || profile.companyName || hints?.companyName || null;
const [newsTw, newsGlobal] = await Promise.all([
fetchTaiwanNews(symbol, companyName).catch(() => []),
fetchGlobalNews(symbol).catch(() => []),
]);
const mgmtRaw = filterManagementNews([...newsTw, ...newsGlobal]);
return {
symbol,
gatheredAt: new Date().toISOString(),
profileExt,
hints,
headlines8k: headlines,
newsTw,
newsGlobal,
managementNewsRaw: mgmtRaw,
companyName: companyName || hints?.companyName || profileExt?.symbol,
};
}