60 lines
1.6 KiB
Python
60 lines
1.6 KiB
Python
|
|
import zipfile
|
||
|
|
from xml.etree import ElementTree
|
||
|
|
import sys
|
||
|
|
|
||
|
|
W = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'
|
||
|
|
docx_path = sys.argv[1]
|
||
|
|
output_path = sys.argv[2]
|
||
|
|
|
||
|
|
with zipfile.ZipFile(docx_path) as z:
|
||
|
|
with z.open('word/document.xml') as f:
|
||
|
|
content = f.read()
|
||
|
|
|
||
|
|
tree = ElementTree.fromstring(content)
|
||
|
|
paras = tree.findall(f'.//{W}p')
|
||
|
|
|
||
|
|
lines = []
|
||
|
|
for para in paras:
|
||
|
|
texts = []
|
||
|
|
for t in para.iter(f'{W}t'):
|
||
|
|
if t.text:
|
||
|
|
texts.append(t.text)
|
||
|
|
line = ''.join(texts)
|
||
|
|
|
||
|
|
pPr = para.find(f'{W}pPr')
|
||
|
|
style = ''
|
||
|
|
numId = ''
|
||
|
|
ilvl = '0'
|
||
|
|
if pPr is not None:
|
||
|
|
pStyle = pPr.find(f'{W}pStyle')
|
||
|
|
if pStyle is not None:
|
||
|
|
style = pStyle.get(f'{W}val', '')
|
||
|
|
numPr = pPr.find(f'{W}numPr')
|
||
|
|
if numPr is not None:
|
||
|
|
ilvlEl = numPr.find(f'{W}ilvl')
|
||
|
|
numIdEl = numPr.find(f'{W}numId')
|
||
|
|
if ilvlEl is not None:
|
||
|
|
ilvl = ilvlEl.get(f'{W}val', '0')
|
||
|
|
if numIdEl is not None:
|
||
|
|
numId = numIdEl.get(f'{W}val', '')
|
||
|
|
|
||
|
|
prefix = ''
|
||
|
|
if 'Heading1' in style or style == '1':
|
||
|
|
prefix = '# '
|
||
|
|
elif 'Heading2' in style or style == '2':
|
||
|
|
prefix = '## '
|
||
|
|
elif 'Heading3' in style or style == '3':
|
||
|
|
prefix = '### '
|
||
|
|
elif 'Heading4' in style or style == '4':
|
||
|
|
prefix = '#### '
|
||
|
|
elif numId:
|
||
|
|
indent = ' ' * int(ilvl)
|
||
|
|
prefix = indent + '- '
|
||
|
|
|
||
|
|
lines.append(f'{prefix}{line}')
|
||
|
|
|
||
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
||
|
|
f.write('\n'.join(lines))
|
||
|
|
|
||
|
|
sys.stderr.write(f'Wrote {len(lines)} lines\n')
|