在下面代码里面更改
def aw_extract_headings_and_contents_table_dict_id(file):
doc = aw.Document(file)
current_level = 0
data = []
doc.update_list_labels()
stack = []
for s in doc.sections:
sect = s.as_section()
for node in sect.body.get_child_nodes(aw.NodeType.ANY, True):
if node.node_type == aw.NodeType.PARAGRAPH:
node = node.as_paragraph()
if node.paragraph_format.outline_level in [0, 1, 2, 3, 4, 5]:
if node.node_type == aw.NodeType.FIELD_START:
continue
level = int(node.paragraph_format.outline_level) + 1
if level > current_level:
stack.append((current_level, data))
data = []
current_level = level
elif level < current_level:
while stack and stack[-1][0] >= level:
old_level, old_data = stack.pop()
data = old_data + data
current_level = old_level
label = ''
if node.list_format.is_list_item:
label = node.list_label.label_string
node.get_child_nodes(aw.NodeType.COMMENT, True).clear()
text_without_comments = node.get_text().strip()
data.append(
{label + text_without_comments: ''})
else:
node.get_child_nodes(aw.NodeType.COMMENT, True).clear()
if node.get_text().strip() and not node.get_ancestor(
aw.NodeType.TABLE) and not node.get_ancestor(aw.NodeType.FIELD_START) and data:
data[-1][list(data[-1].keys())[0]] += node.get_text().strip().replace(' SEQ 表 \* ARABIC ',
'').replace(
'TOC \h \c "表" HYPERLINK \l "_Toc14741"', '').replace(
'\u0013 SEQ 图 \\* ARABIC \u00141\u0015 ', '') + "\n"
if node.node_type == aw.NodeType.TABLE:
parent_node = node.as_table()
table_content = aw_read_table_id(parent_node)
if data:
data[-1][list(data[-1].keys())[0]] += table_content
while stack:
old_level, old_data = stack.pop()
data = old_data + data
merged_dict = {}
for small_dict in data:
for key, value in small_dict.items():
# 檢查鍵是否为空,若为空則跳過
if key:
merged_dict[key] = value
return merged_dict