claude-code/extract_docx.py

#!/usr/bin/env python3
import zipfile
from xml.etree import ElementTree
import os

docx_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '【Top.One】產品文檔.docx')
output_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'topone_extracted.txt')

W = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'

with zipfile.ZipFile(docx_path) as z:
    with z.open('word/document.xml') as f:
        content = f.read()

tree = ElementTree.fromstring(content)
paras = tree.findall(f'.//{W}p')

lines = []
for para in paras:
    texts = []
    for t in para.iter(f'{W}t'):
        if t.text:
            texts.append(t.text)
    line = ''.join(texts)
    
    pPr = para.find(f'{W}pPr')
    style = ''
    if pPr is not None:
        pStyle = pPr.find(f'{W}pStyle')
        if pStyle is not None:
            style = pStyle.get(f'{W}val', '')
    
    prefix = ''
    if style == 'Heading1' or style == '1':
        prefix = '# '
    elif style == 'Heading2' or style == '2':
        prefix = '## '
    elif style == 'Heading3' or style == '3':
        prefix = '### '
    elif style == 'Heading4' or style == '4':
        prefix = '#### '
    
    lines.append(f'{prefix}{line}')

with open(output_path, 'w', encoding='utf-8') as f:
    f.write('\n'.join(lines))

print(f'Wrote {len(lines)} lines to {output_path}')
rename 2026-02-27 13:45:37 +00:00			`#!/usr/bin/env python3`
			`import zipfile`
			`from xml.etree import ElementTree`
			`import os`

			`docx_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '【Top.One】產品文檔.docx')`
			`output_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'topone_extracted.txt')`

			`W = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'`

			`with zipfile.ZipFile(docx_path) as z:`
			`with z.open('word/document.xml') as f:`
			`content = f.read()`

			`tree = ElementTree.fromstring(content)`
			`paras = tree.findall(f'.//{W}p')`

			`lines = []`
			`for para in paras:`
			`texts = []`
			`for t in para.iter(f'{W}t'):`
			`if t.text:`
			`texts.append(t.text)`
			`line = ''.join(texts)`

			`pPr = para.find(f'{W}pPr')`
			`style = ''`
			`if pPr is not None:`
			`pStyle = pPr.find(f'{W}pStyle')`
			`if pStyle is not None:`
			`style = pStyle.get(f'{W}val', '')`

			`prefix = ''`
			`if style == 'Heading1' or style == '1':`
			`prefix = '# '`
			`elif style == 'Heading2' or style == '2':`
			`prefix = '## '`
			`elif style == 'Heading3' or style == '3':`
			`prefix = '### '`
			`elif style == 'Heading4' or style == '4':`
			`prefix = '#### '`

			`lines.append(f'{prefix}{line}')`

			`with open(output_path, 'w', encoding='utf-8') as f:`
			`f.write('\n'.join(lines))`

			`print(f'Wrote {len(lines)} lines to {output_path}')`