// Google RSS / 新聞欄位:HTML 實體解碼與摘要清理
export function decodeHtmlEntities(s) {
let t = String(s ?? '');
if (!t) return '';
t = t.replace(/([0-9a-f]+);/gi, (_, hex) => {
const cp = parseInt(hex, 16);
return cp > 0 && cp < 0x110000 ? String.fromCodePoint(cp) : '';
});
t = t.replace(/(\d+);/g, (_, dec) => {
const cp = Number(dec);
return cp > 0 && cp < 0x110000 ? String.fromCodePoint(cp) : '';
});
const map = {
'<': '<', '>': '>', '&': '&', '"': '"', ''': "'", ''': "'",
' ': ' ', ' ': ' ',
};
for (const [ent, ch] of Object.entries(map)) {
if (t.includes(ent)) t = t.split(ent).join(ch);
}
return t;
}
/** 解碼後移除標籤、壓縮空白 */
export function cleanNewsPlain(s) {
const decoded = decodeHtmlEntities(s);
return decoded
.replace(/