49 lines
1.3 KiB
Python
49 lines
1.3 KiB
Python
#!/usr/bin/env python3
|
|
import zipfile
|
|
from xml.etree import ElementTree
|
|
import os
|
|
|
|
docx_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '【Top.One】產品文檔.docx')
|
|
output_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'topone_extracted.txt')
|
|
|
|
W = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'
|
|
|
|
with zipfile.ZipFile(docx_path) as z:
|
|
with z.open('word/document.xml') as f:
|
|
content = f.read()
|
|
|
|
tree = ElementTree.fromstring(content)
|
|
paras = tree.findall(f'.//{W}p')
|
|
|
|
lines = []
|
|
for para in paras:
|
|
texts = []
|
|
for t in para.iter(f'{W}t'):
|
|
if t.text:
|
|
texts.append(t.text)
|
|
line = ''.join(texts)
|
|
|
|
pPr = para.find(f'{W}pPr')
|
|
style = ''
|
|
if pPr is not None:
|
|
pStyle = pPr.find(f'{W}pStyle')
|
|
if pStyle is not None:
|
|
style = pStyle.get(f'{W}val', '')
|
|
|
|
prefix = ''
|
|
if style == 'Heading1' or style == '1':
|
|
prefix = '# '
|
|
elif style == 'Heading2' or style == '2':
|
|
prefix = '## '
|
|
elif style == 'Heading3' or style == '3':
|
|
prefix = '### '
|
|
elif style == 'Heading4' or style == '4':
|
|
prefix = '#### '
|
|
|
|
lines.append(f'{prefix}{line}')
|
|
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
f.write('\n'.join(lines))
|
|
|
|
print(f'Wrote {len(lines)} lines to {output_path}')
|