rtf文件我上传不了不知道问什么
我需要分页之后进行续表 是需要的 可是当前代码没有实现这个续表功能呀
- 您想更新当前页面的页眉吗?更新页眉时,您需要插入分段符,以便在页面上设置不同的页眉。
- 表格需要放在下一页吗?或者您需要从第二个表格中删除这几行?
您能否更改截图上的文件,以获得所需的输出结果。
我需要用代码写入截图标记的问题 我不要手动改
代码我已经提供我需要怎么修改代码进行写入呢
你好这个问题有结果了吗?????
你好有结果了吗???????
@Tiaohh 半途而废:
@staticmethod
def find_last_related_paragraph(para):
last_related_paragraph = para
while (
last_related_paragraph.next_sibling
and last_related_paragraph.next_sibling.as_paragraph().paragraph_format.outline_level
> para.paragraph_format.outline_level
):
last_related_paragraph = last_related_paragraph.next_sibling
return last_related_paragraph
def test_generate_docx_with_result_laikai(self):
header_list = ["REFERENCES"]
doc_main = aw.Document(MY_DIR + "111 (1).docx")
builder_main = aw.DocumentBuilder(doc_main)
paragraphs = doc_main.get_child_nodes(aw.NodeType.PARAGRAPH, True)
for para in paragraphs:
para = para.as_paragraph()
para_content = para.to_string(aw.SaveFormat.TEXT)
para_content = para_content.replace("\r", "")
para_content = para_content.strip()
if para_content in header_list or para_content.capitalize() in header_list:
table_header = aw.Paragraph(doc_main)
table_header.paragraph_format.style_identifier = aw.StyleIdentifier.NORMAL
table_header.paragraph_format.alignment = aw.ParagraphAlignment.CENTER
# 增加一步 现将当前para段落中的内容清空
# para.parent_node.insert_after(table_header, para)
if para_content in ["APPENDICES", "REFERENCES"]:
# Create a section break
para = self.find_last_related_paragraph(para)
previous_para = para
while previous_para is not None:
if previous_para.node_type is aw.NodeType.PARAGRAPH and "C-Heading" in previous_para.as_paragraph().paragraph_format.style_name:
previous_para = previous_para
break
previous_para = previous_para.previous_pre_order(doc_main)
previous_para = previous_para.previous_sibling
while previous_para is not None:
if previous_para.node_type is aw.NodeType.PARAGRAPH:
previous_para = previous_para
break
previous_para = previous_para.previous_pre_order(doc_main)
builder_main.move_to(previous_para)
builder_main.insert_paragraph()
builder_main.paragraph_format.clear_formatting()
builder_main.list_format.list = None
builder_main.insert_break(aw.BreakType.SECTION_BREAK_CONTINUOUS)
# run = aw.Run(doc_main, "")
# # 将文本设置为加粗
# run.font.bold = True
# # 将加粗的 Run 对象添加到表头段落中
# table_header.append_child(run)
# idx_num = header_list.index(para_content)
# # 获取header对应的table
#
# num_tables = len(table_list[idx_num][::-1])
# for _index, info in enumerate(table_list[idx_num]):
# table_id = info.get("id", "")
# is_header_and_footer = info.get("is_header_and_footer", "")
# aw.Document(
# os.path.join(settings.UPLOAD_PATH, f"{table_id}.rtf")
# ).save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
# # 判断是否需要页眉页脚 False为删除页眉页脚
# if not is_header_and_footer:
# document = Document(
# os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
# )
# for section in document.sections:
# section.footer.is_linked_to_previous = True
# section.header.is_linked_to_previous = True
#
# document.save(
# os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
# )
doc_rtf = aw.Document(MY_DIR + "t_ae_1(1).docx")
builder = aw.DocumentBuilder(doc_rtf)
builder.row_format.heading_format = False
doc_rtf.save(ARTIFACTS_DIR + "t_ae_1(1).docx")
_doc = aw.Document(ARTIFACTS_DIR + "t_ae_1(1).docx")
_tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
_tables = [t for t in _tables]
for table in _tables[::-1]:
if table.get_ancestor(aw.NodeType.BODY):
i = _tables.index(table)
temp = _tables[i]
_tables[i] = _tables[i - 1]
_tables[i - 1] = temp
all_tables_count = sum(p.parent_node.node_type == aw.NodeType.BODY for p in _tables)
curr_count = 0
for table in _tables[::-1]: # 将table 信息反向的插入到word文件中。TODO 表格美化
if table.get_ancestor(aw.NodeType.BODY):
table_clone = table.as_table().clone(True)
imported_table = doc_main.import_node(table_clone, True)
imported_table.as_table().preferred_width = aw.tables.PreferredWidth.from_percent(100)
para = para.parent_node.insert_after(imported_table, para)
para = para.as_table().parent_node.insert_after(aw.Paragraph(doc_main), para)
if curr_count < all_tables_count:
builder_main.move_to(para)
builder_main.insert_break(aw.BreakType.SECTION_BREAK_NEW_PAGE)
para = builder_main.current_paragraph
curr_count += 1
# if imported_table.node_type == aw.NodeType.TABLE:
# imported_table = imported_table.as_table()
# imported_table.preferred_width = (
# aw.tables.PreferredWidth.from_percent(100)
# )
#
# for index, row in enumerate(imported_table.rows):
# row = row.as_row()
# # print(index, row.get_text().strip())
# for cell_index, cell in enumerate(row.cells):
# cell = cell.as_cell()
# cell.cell_format.vertical_alignment = (
# aw.tables.CellVerticalAlignment.BOTTOM
# )
# for paragraph in cell.paragraphs:
# paragraph = paragraph.as_paragraph()
# # 居中对齐
# for run in paragraph.runs:
# run = run.as_run()
# run.font.name = (
# "Courier New" # 设置西文是新罗马字体
# )
# run.font.name_far_east = "宋体"
# run.font.size = 8
# table_newline = aw.Paragraph(doc_main)
# run = aw.Run(doc_main, "")
# table_newline.append_child(run)
# imported_table.parent_node.insert_after(
# table_newline, imported_table
# )
doc_main.save(ARTIFACTS_DIR + "Result.docx")
Result.docx (26.5 KB)
现在,您需要在第 2 节和第 3 节中添加页眉/页脚,并使用我上面分享的代码禁用 is_link_to_previous。
这是您需要的正确结果吗?
@Tiaohh 更新代码以包含页眉/页脚示例。您需要为所有必需的部分添加这些代码,并处理格式问题。
@staticmethod
def find_last_related_paragraph(para):
last_related_paragraph = para
while (
last_related_paragraph.next_sibling
and last_related_paragraph.next_sibling.as_paragraph().paragraph_format.outline_level
> para.paragraph_format.outline_level
):
last_related_paragraph = last_related_paragraph.next_sibling
return last_related_paragraph
def test_generate_docx_with_result_laikai(self):
header_list = ["REFERENCES"]
doc_main = aw.Document(MY_DIR + "111 (1).docx")
builder_main = aw.DocumentBuilder(doc_main)
paragraphs = doc_main.get_child_nodes(aw.NodeType.PARAGRAPH, True)
for para in paragraphs:
para = para.as_paragraph()
para_content = para.to_string(aw.SaveFormat.TEXT)
para_content = para_content.replace("\r", "")
para_content = para_content.strip()
if para_content in header_list or para_content.capitalize() in header_list:
table_header = aw.Paragraph(doc_main)
table_header.paragraph_format.style_identifier = aw.StyleIdentifier.NORMAL
table_header.paragraph_format.alignment = aw.ParagraphAlignment.CENTER
# 增加一步 现将当前para段落中的内容清空
# para.parent_node.insert_after(table_header, para)
if para_content in ["APPENDICES", "REFERENCES"]:
# Create a section break
para = self.find_last_related_paragraph(para)
previous_para = para
while previous_para is not None:
if previous_para.node_type is aw.NodeType.PARAGRAPH and "C-Heading" in previous_para.as_paragraph().paragraph_format.style_name:
previous_para = previous_para
break
previous_para = previous_para.previous_pre_order(doc_main)
previous_para = previous_para.previous_sibling
while previous_para is not None:
if previous_para.node_type is aw.NodeType.PARAGRAPH:
previous_para = previous_para
break
previous_para = previous_para.previous_pre_order(doc_main)
builder_main.move_to(previous_para)
builder_main.insert_paragraph()
builder_main.paragraph_format.clear_formatting()
builder_main.list_format.list = None
builder_main.insert_break(aw.BreakType.SECTION_BREAK_CONTINUOUS)
# run = aw.Run(doc_main, "")
# # 将文本设置为加粗
# run.font.bold = True
# # 将加粗的 Run 对象添加到表头段落中
# table_header.append_child(run)
# idx_num = header_list.index(para_content)
# # 获取header对应的table
#
# num_tables = len(table_list[idx_num][::-1])
# for _index, info in enumerate(table_list[idx_num]):
# table_id = info.get("id", "")
# is_header_and_footer = info.get("is_header_and_footer", "")
# aw.Document(
# os.path.join(settings.UPLOAD_PATH, f"{table_id}.rtf")
# ).save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
# # 判断是否需要页眉页脚 False为删除页眉页脚
# if not is_header_and_footer:
# document = Document(
# os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
# )
# for section in document.sections:
# section.footer.is_linked_to_previous = True
# section.header.is_linked_to_previous = True
#
# document.save(
# os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
# )
doc_rtf = aw.Document(MY_DIR + "t_ae_1(1).docx")
builder = aw.DocumentBuilder(doc_rtf)
builder.row_format.heading_format = False
doc_rtf.save(ARTIFACTS_DIR + "t_ae_1(1).docx")
_doc = aw.Document(ARTIFACTS_DIR + "t_ae_1(1).docx")
_tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
_tables = [t for t in _tables]
for table in _tables[::-1]:
if table.get_ancestor(aw.NodeType.BODY):
i = _tables.index(table)
temp = _tables[i]
_tables[i] = _tables[i - 1]
_tables[i - 1] = temp
all_tables_count = sum(p.parent_node.node_type == aw.NodeType.BODY for p in _tables)
curr_count = 0
for table in _tables[::-1]: # 将table 信息反向的插入到word文件中。TODO 表格美化
if table.get_ancestor(aw.NodeType.BODY):
table_clone = table.as_table().clone(True)
imported_table = doc_main.import_node(table_clone, True)
imported_table.as_table().preferred_width = aw.tables.PreferredWidth.from_percent(100)
para = para.parent_node.insert_after(imported_table, para)
para = para.as_table().parent_node.insert_after(aw.Paragraph(doc_main), para)
section = doc_main.sections[1]
section.headers_footers.link_to_previous(False)
section.headers_footers.clear()
header = _tables[::-1][2].parent_node
cloned_header = header.clone(True)
imported_header = doc_main.import_node(cloned_header, True)
section.headers_footers.add(imported_header)
footer = _tables[::-1][0].parent_node
cloned_footer = footer.clone(True)
imported_footer = doc_main.import_node(cloned_footer, True)
section.headers_footers.add(imported_footer)
if curr_count < all_tables_count:
builder_main.move_to(para)
builder_main.insert_break(aw.BreakType.SECTION_BREAK_NEW_PAGE)
para = builder_main.current_paragraph
curr_count += 1
# if imported_table.node_type == aw.NodeType.TABLE:
# imported_table = imported_table.as_table()
# imported_table.preferred_width = (
# aw.tables.PreferredWidth.from_percent(100)
# )
#
# for index, row in enumerate(imported_table.rows):
# row = row.as_row()
# # print(index, row.get_text().strip())
# for cell_index, cell in enumerate(row.cells):
# cell = cell.as_cell()
# cell.cell_format.vertical_alignment = (
# aw.tables.CellVerticalAlignment.BOTTOM
# )
# for paragraph in cell.paragraphs:
# paragraph = paragraph.as_paragraph()
# # 居中对齐
# for run in paragraph.runs:
# run = run.as_run()
# run.font.name = (
# "Courier New" # 设置西文是新罗马字体
# )
# run.font.name_far_east = "宋体"
# run.font.size = 8
# table_newline = aw.Paragraph(doc_main)
# run = aw.Run(doc_main, "")
# table_newline.append_child(run)
# imported_table.parent_node.insert_after(
# table_newline, imported_table
# )
doc_main.save(ARTIFACTS_DIR + "Result.docx")
不对插入页眉页脚要是这种格式以表格形式插入
CSR_插件效果2.docx (32.5 KB)
可是我通过以下代码插入的样式不对
async def generate_docx_with_result_laikai(
header_list: list,
table_list: list,
save_path,
template_path,
):
logger.info(f"header list内容: {header_list}")
logger.info(f"table list内容: {table_list}")
base_file = template_path
clear_save_path = base_file
doc_main = aw.Document(clear_save_path)
builder_main = aw.DocumentBuilder(doc_main)
paragraphs = doc_main.get_child_nodes(aw.NodeType.PARAGRAPH, True)
for para in paragraphs:
para = para.as_paragraph()
para_content = para.to_string(aw.SaveFormat.TEXT)
para_content = para_content.replace("\r", "")
para_content = (
para_content.strip()
) # 特殊地方,发现目录中有这个符号,暂时不知道符号是干啥的
if (
para_content in header_list or para_content.capitalize() in header_list
): # 如果当前段落中有写作内容,那么找到内容,找到生成的结果
table_header = aw.Paragraph(doc_main)
table_header.paragraph_format.style_identifier = aw.StyleIdentifier.NORMAL
table_header.paragraph_format.alignment = aw.ParagraphAlignment.CENTER
para = find_last_related_paragraph(para)
previous_para = para
while previous_para is not None:
if (
previous_para.node_type is aw.NodeType.PARAGRAPH
and "C-Heading"
in previous_para.as_paragraph().paragraph_format.style_name
):
previous_para = previous_para
break
previous_para = previous_para.previous_pre_order(doc_main)
previous_para = previous_para.previous_sibling
while previous_para is not None:
if previous_para.node_type is aw.NodeType.PARAGRAPH:
previous_para = previous_para
break
previous_para = previous_para.previous_pre_order(doc_main)
builder_main.move_to(previous_para)
builder_main.insert_paragraph()
builder_main.paragraph_format.clear_formatting()
builder_main.list_format.list = None
builder_main.insert_break(aw.BreakType.SECTION_BREAK_CONTINUOUS)
try:
if para_content in ["APPENDICES", "REFERENCES"]:
para_content = para_content.capitalize()
idx_num = header_list.index(para_content)
# 获取header对应的table
num_tables = len(table_list[idx_num])
logger.info(f"当前表格长度:{num_tables}")
for _index, info in enumerate(table_list[idx_num]):
table_id = info.get("id", "")
is_header_and_footer = info.get("is_header_and_footer", "")
# logger.warning(f"table_id in table list:{table_id}")
aw.Document(
os.path.join(settings.UPLOAD_PATH, f"{table_id}.rtf")
).save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
# 判断是否需要页眉页脚 False为删除页眉页脚
if not is_header_and_footer:
document = Document(
os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
)
for section in document.sections:
section.footer.is_linked_to_previous = True
section.header.is_linked_to_previous = True
document.save(
os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
)
doc_rtf = aw.Document(
os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
)
# builder = aw.DocumentBuilder(doc_rtf)
# # 判断分页是否续表
# # builder.row_format.heading_format = False
# builder.row_format.heading_format = True
doc_rtf.save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
_doc = aw.Document(
os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
)
builder = aw.DocumentBuilder(_doc)
_tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
_tables = [t for t in _tables]
for table in _tables[::-1]:
if table.get_ancestor(aw.NodeType.BODY):
i = _tables.index(table)
temp = _tables[i]
_tables[i] = _tables[i - 1]
_tables[i - 1] = temp
all_tables_count = sum(
p.parent_node.node_type == aw.NodeType.BODY for p in _tables
)
curr_count = 0
for table in _tables[
::-1
]: # 将table 信息反向的插入到word文件中。TODO 表格美化
if table.get_ancestor(aw.NodeType.BODY):
table_clone = table.as_table().clone(True)
imported_table = doc_main.import_node(table_clone, True)
imported_table.as_table().preferred_width = (
aw.tables.PreferredWidth.from_percent(100)
)
para = para.parent_node.insert_after(imported_table, para)
para = para.as_table().parent_node.insert_after(
aw.Paragraph(doc_main), para
)
section = doc_main.sections[1]
section.headers_footers.link_to_previous(False)
section.headers_footers.clear()
header = _tables[::-1][2].parent_node
cloned_header = header.clone(True)
imported_header = doc_main.import_node(cloned_header, True)
section.headers_footers.add(imported_header)
footer = _tables[::-1][0].parent_node
cloned_footer = footer.clone(True)
imported_footer = doc_main.import_node(cloned_footer, True)
section.headers_footers.add(imported_footer)
if curr_count < all_tables_count:
builder_main.move_to(para)
builder_main.insert_break(
aw.BreakType.SECTION_BREAK_NEW_PAGE
)
para = builder_main.current_paragraph
curr_count += 1
else:
logger.info(f"当前段落不需要插入表格内容")
# 插入result 模型输出结果 结果只插入一次,table插入完成后插入
# TODO 段落美化
except:
logger.warning(f"没有找到header{traceback.format_exc()}")
print("没有找到header", traceback.format_exc())
doc_main.save(save_path)
以下是代码出来的效果」
111.docx (27.8 KB)
页眉页脚数据要以表格数据插入 并且续表也需要有页眉页脚表头这些信息哦
@Tiaohh 根据您的目标使用此代码:
header_list = ["DISPOSITION OF SUBJECTS"]
doc_main = aw.Document("111.docx")
builder_main = aw.DocumentBuilder(doc_main)
paragraphs = doc_main.get_child_nodes(aw.NodeType.PARAGRAPH, True)
for para in paragraphs:
para = para.as_paragraph()
para_content = para.to_string(aw.SaveFormat.TEXT)
para_content = para_content.replace("\r", "")
para_content = para_content.strip()
if para_content in header_list or para_content.capitalize() in header_list:
table_header = aw.Paragraph(doc_main)
table_header.paragraph_format.style_identifier = aw.StyleIdentifier.NORMAL
table_header.paragraph_format.alignment = aw.ParagraphAlignment.CENTER
# 增加一步 现将当前para段落中的内容清空
# para.parent_node.insert_after(table_header, para)
if para_content in ["DISPOSITION OF SUBJECTS", "Data Sets Analyzed", "APPENDICES", "REFERENCES"]:
layout_collector = aw.layout.LayoutCollector(doc_main)
page_index = layout_collector.get_start_page_index(para)
# Create a section break
para = self.find_last_related_paragraph(para)
if page_index > 1:
previous_para = para
while previous_para is not None:
if previous_para.node_type is aw.NodeType.PARAGRAPH \
and "C-Heading" in previous_para.as_paragraph().paragraph_format.style_name \
and layout_collector.get_start_page_index(previous_para) < page_index:
previous_para = previous_para
break
previous_para = previous_para.previous_pre_order(doc_main)
builder_main.move_to(previous_para)
builder_main.insert_paragraph()
builder_main.paragraph_format.clear_formatting()
builder_main.list_format.list = None
builder_main.insert_break(aw.BreakType.SECTION_BREAK_CONTINUOUS)
builder_main.current_paragraph.remove()
# run = aw.Run(doc_main, "")
# # 将文本设置为加粗
# run.font.bold = True
# # 将加粗的 Run 对象添加到表头段落中
# table_header.append_child(run)
# idx_num = header_list.index(para_content)
# # 获取header对应的table
#
# num_tables = len(table_list[idx_num][::-1])
# for _index, info in enumerate(table_list[idx_num]):
# table_id = info.get("id", "")
# is_header_and_footer = info.get("is_header_and_footer", "")
# aw.Document(
# os.path.join(settings.UPLOAD_PATH, f"{table_id}.rtf")
# ).save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
# # 判断是否需要页眉页脚 False为删除页眉页脚
# if not is_header_and_footer:
# document = Document(
# os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
# )
# for section in document.sections:
# section.footer.is_linked_to_previous = True
# section.header.is_linked_to_previous = True
#
# document.save(
# os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
# )
doc_rtf = aw.Document(MY_DIR + "t_ae_1(1).docx")
builder = aw.DocumentBuilder(doc_rtf)
builder.row_format.heading_format = False
doc_rtf.save(ARTIFACTS_DIR + "t_ae_1(1).docx")
_doc = aw.Document(ARTIFACTS_DIR + "t_ae_1(1).docx")
_tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
_tables = [t for t in _tables]
for table in _tables[::-1]:
if table.get_ancestor(aw.NodeType.BODY):
i = _tables.index(table)
temp = _tables[i]
_tables[i] = _tables[i - 1]
_tables[i - 1] = temp
section = para.as_paragraph().parent_section
page_setup = section.page_setup
page_setup.orientation = aw.Orientation.LANDSCAPE
all_tables_count = sum(p.parent_node.node_type == aw.NodeType.BODY for p in _tables)
curr_count = 0
for table in _tables: # 将table 信息反向的插入到word文件中。TODO 表格美化
if table.get_ancestor(aw.NodeType.BODY):
table_index = _tables.index(table)
header_table = _tables[table_index - 1].as_table()
footer_table = _tables[table_index + 1].as_table()
header_table = header_table.clone(True).as_table()
footer_table = footer_table.clone(True).as_table()
table = table.clone(True).as_table()
imported_header = doc_main.import_node(header_table, True)
imported_footer = doc_main.import_node(footer_table, True)
imported_table = doc_main.import_node(table, True)
para = para.parent_node.insert_after(imported_header, para)
para = para.parent_node.insert_after(imported_table, para)
para = para.parent_node.insert_after(imported_footer, para)
para = para.parent_node.insert_after(aw.Paragraph(doc_main), para)
if curr_count < all_tables_count:
builder_main.move_to(para)
builder_main.insert_break(aw.BreakType.SECTION_BREAK_NEW_PAGE)
para = builder_main.current_paragraph
curr_count += 1
if curr_count == all_tables_count:
section = builder_main.current_section
page_setup = section.page_setup
page_setup.orientation = aw.Orientation.PORTRAIT
builder_main.current_paragraph.remove()
doc_main.save("Result.docx")
这就是结果:
Result.docx (21.7 KB)
if not is_header_and_footer:
document = Document(
os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
)
for section in document.sections:
section.footer.is_linked_to_previous = True
section.header.is_linked_to_previous = True
document.save(
os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
)这里的代码不能删掉,我们需要设置。是否要页眉页脚
@Tiaohh 使用此代码即可获得:
header_list = ["DISPOSITION OF SUBJECTS"]
doc_main = aw.Document(MY_DIR + "111 (1).docx")
builder_main = aw.DocumentBuilder(doc_main)
paragraphs = doc_main.get_child_nodes(aw.NodeType.PARAGRAPH, True)
for para in paragraphs:
para = para.as_paragraph()
para_content = para.to_string(aw.SaveFormat.TEXT)
para_content = para_content.replace("\r", "")
para_content = para_content.strip()
if para_content in header_list or para_content.capitalize() in header_list:
table_header = aw.Paragraph(doc_main)
table_header.paragraph_format.style_identifier = aw.StyleIdentifier.NORMAL
table_header.paragraph_format.alignment = aw.ParagraphAlignment.CENTER
# 增加一步 现将当前para段落中的内容清空
# para.parent_node.insert_after(table_header, para)
if para_content in ["DISPOSITION OF SUBJECTS", "Data Sets Analyzed", "APPENDICES", "REFERENCES"]:
layout_collector = aw.layout.LayoutCollector(doc_main)
page_index = layout_collector.get_start_page_index(para)
# Create a section break
para = self.find_last_related_paragraph(para)
if page_index > 1:
previous_para = para
while previous_para is not None:
if previous_para.node_type is aw.NodeType.PARAGRAPH \
and "C-Heading" in previous_para.as_paragraph().paragraph_format.style_name \
and layout_collector.get_start_page_index(previous_para) < page_index:
previous_para = previous_para
break
previous_para = previous_para.previous_pre_order(doc_main)
builder_main.move_to(previous_para)
builder_main.insert_paragraph()
builder_main.paragraph_format.clear_formatting()
builder_main.list_format.list = None
builder_main.insert_break(aw.BreakType.SECTION_BREAK_CONTINUOUS)
builder_main.current_paragraph.remove()
# run = aw.Run(doc_main, "")
# # 将文本设置为加粗
# run.font.bold = True
# # 将加粗的 Run 对象添加到表头段落中
# table_header.append_child(run)
# idx_num = header_list.index(para_content)
# # 获取header对应的table
#
# num_tables = len(table_list[idx_num][::-1])
# for _index, info in enumerate(table_list[idx_num]):
# table_id = info.get("id", "")
# is_header_and_footer = info.get("is_header_and_footer", "")
# aw.Document(
# os.path.join(settings.UPLOAD_PATH, f"{table_id}.rtf")
# ).save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
# # 判断是否需要页眉页脚 False为删除页眉页脚
# if not is_header_and_footer:
# document = Document(
# os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
# )
# for section in document.sections:
# section.footer.is_linked_to_previous = True
# section.header.is_linked_to_previous = True
#
# document.save(
# os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
# )
doc_rtf = aw.Document(MY_DIR + "t_ae_1(1).docx")
builder = aw.DocumentBuilder(doc_rtf)
builder.row_format.heading_format = False
doc_rtf.save(ARTIFACTS_DIR + "t_ae_1(1).docx")
_doc = aw.Document(ARTIFACTS_DIR + "t_ae_1(1).docx")
_tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
_tables = [t for t in _tables]
for table in _tables[::-1]:
if table.get_ancestor(aw.NodeType.BODY):
i = _tables.index(table)
temp = _tables[i]
_tables[i] = _tables[i - 1]
_tables[i - 1] = temp
section = para.as_paragraph().parent_section
page_setup = section.page_setup
page_setup.orientation = aw.Orientation.LANDSCAPE
all_tables_count = sum(p.parent_node.node_type == aw.NodeType.BODY for p in _tables)
curr_count = 0
for table in _tables: # 将table 信息反向的插入到word文件中。TODO 表格美化
if table.get_ancestor(aw.NodeType.BODY):
table_index = _tables.index(table)
header_table = _tables[table_index - 1].as_table()
footer_table = _tables[table_index + 1].as_table()
header_table = header_table.clone(True).as_table()
footer_table = footer_table.clone(True).as_table()
table = table.clone(True).as_table()
imported_header = doc_main.import_node(header_table, True)
imported_footer = doc_main.import_node(footer_table, True)
imported_table = doc_main.import_node(table, True)
para = para.parent_node.insert_after(imported_header, para)
t1 = para.as_table()
for row in t1.rows:
row.as_row().row_format.heading_format = True
para = para.parent_node.insert_after(imported_table, para)
t1 = para.as_table()
t1.rows[0].row_format.heading_format = True
t1.rows[1].row_format.heading_format = True
para = para.parent_node.insert_after(imported_footer, para)
para = para.parent_node.insert_after(aw.Paragraph(doc_main), para)
if curr_count < all_tables_count:
builder_main.move_to(para)
builder_main.insert_break(aw.BreakType.SECTION_BREAK_NEW_PAGE)
para = builder_main.current_paragraph
curr_count += 1
if curr_count == all_tables_count:
section = builder_main.current_section
page_setup = section.page_setup
page_setup.orientation = aw.Orientation.PORTRAIT
builder_main.current_paragraph.remove()
doc_main.save(ARTIFACTS_DIR + "Result.docx")
我可以为您提供数据
def find_last_related_paragraph(paragraphs, title_text):
"""
找到标题后的最后一个相关段落。
如果标题下面没有段落内容,返回标题本身。
"""
title_paragraph = None
# 找到目标标题
for para in paragraphs:
para = para.as_paragraph() # 确保是段落
para_text = para.get_text().strip() # 去除空格
if para_text == title_text:
title_paragraph = para
break
if title_paragraph is None:
# 返回文档的最后一个段落作为备用
return paragraphs[-1]
# 找到标题后的最后一个相关段落
last_related_paragraph = title_paragraph # 初始位置是标题
# 遍历直到找到级别改变的段落
while (
last_related_paragraph.next_sibling
and last_related_paragraph.next_sibling.as_paragraph().paragraph_format.outline_level
> title_paragraph.paragraph_format.outline_level
):
last_related_paragraph = last_related_paragraph.next_sibling
return last_related_paragraph
async def generate_docx_with_result_laikai(
header_list: list,
table_list: list,
save_path,
template_path,
):
logger.info(f"header list内容: {header_list}")
logger.info(f"table list内容: {table_list}")
base_file = template_path
clear_save_path = base_file
doc_main = aw.Document(clear_save_path)
builder_main = aw.DocumentBuilder(doc_main)
paragraphs = doc_main.get_child_nodes(aw.NodeType.PARAGRAPH, True)
for para in paragraphs:
para = para.as_paragraph()
para_content = para.to_string(aw.SaveFormat.TEXT)
para_content = para_content.replace("\r", "")
para_content = (
para_content.strip()
) # 特殊地方,发现目录中有这个符号,暂时不知道符号是干啥的
if (
para_content in header_list or para_content.capitalize() in header_list
): # 如果当前段落中有写作内容,那么找到内容,找到生成的结果
table_header = aw.Paragraph(doc_main)
table_header.paragraph_format.style_identifier = aw.StyleIdentifier.NORMAL
table_header.paragraph_format.alignment = aw.ParagraphAlignment.CENTER
para = find_last_related_paragraph(paragraphs, para_content)
previous_para = para
while previous_para is not None:
if (
previous_para.node_type is aw.NodeType.PARAGRAPH
and "C-Heading"
in previous_para.as_paragraph().paragraph_format.style_name
):
previous_para = previous_para
break
previous_para = previous_para.previous_pre_order(doc_main)
previous_para = previous_para.previous_sibling
while previous_para is not None:
if previous_para.node_type is aw.NodeType.PARAGRAPH:
previous_para = previous_para
break
previous_para = previous_para.previous_pre_order(doc_main)
builder_main.move_to(previous_para)
builder_main.insert_paragraph()
builder_main.paragraph_format.clear_formatting()
builder_main.list_format.list = None
builder_main.insert_break(aw.BreakType.SECTION_BREAK_CONTINUOUS)
try:
if para_content in ["APPENDICES", "REFERENCES"]:
para_content = para_content.capitalize()
idx_num = header_list.index(para_content)
# 获取header对应的table
num_tables = len(table_list[idx_num])
logger.info(f"当前表格长度:{num_tables}")
for _index, info in enumerate(table_list[idx_num]):
table_id = info.get("id", "")
is_header_and_footer = info.get("is_header_and_footer", "")
# logger.warning(f"table_id in table list:{table_id}")
aw.Document(
os.path.join(settings.UPLOAD_PATH, f"{table_id}.rtf")
).save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
# 判断是否需要页眉页脚 False为删除页眉页脚
if not is_header_and_footer:
document = Document(
os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
)
for section in document.sections:
section.footer.is_linked_to_previous = True
section.header.is_linked_to_previous = True
document.save(
os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
)
doc_rtf = aw.Document(
os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
)
# builder = aw.DocumentBuilder(doc_rtf)
# # 判断分页是否续表
# # builder.row_format.heading_format = False
# builder.row_format.heading_format = True
doc_rtf.save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
_doc = aw.Document(
os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
)
builder = aw.DocumentBuilder(_doc)
_tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
_tables = [t for t in _tables]
for table in _tables[::-1]:
if table.get_ancestor(aw.NodeType.BODY):
i = _tables.index(table)
temp = _tables[i]
_tables[i] = _tables[i - 1]
_tables[i - 1] = temp
all_tables_count = sum(
p.parent_node.node_type == aw.NodeType.BODY for p in _tables
)
curr_count = 0
for table in _tables[
::-1
]: # 将table 信息反向的插入到word文件中。TODO 表格美化
if table.get_ancestor(aw.NodeType.BODY):
table_clone = table.as_table().clone(True)
imported_table = doc_main.import_node(table_clone, True)
imported_table.as_table().preferred_width = (
aw.tables.PreferredWidth.from_percent(100)
)
para = para.parent_node.insert_after(imported_table, para)
para = para.as_table().parent_node.insert_after(
aw.Paragraph(doc_main), para
)
section = doc_main.sections[1]
section.headers_footers.link_to_previous(False)
section.headers_footers.clear()
header = _tables[::-1][2].parent_node
cloned_header = header.clone(True)
imported_header = doc_main.import_node(cloned_header, True)
section.headers_footers.add(imported_header)
footer = _tables[::-1][0].parent_node
cloned_footer = footer.clone(True)
imported_footer = doc_main.import_node(cloned_footer, True)
section.headers_footers.add(imported_footer)
if curr_count < all_tables_count:
builder_main.move_to(para)
builder_main.insert_break(
aw.BreakType.SECTION_BREAK_NEW_PAGE
)
para = builder_main.current_paragraph
curr_count += 1
# _tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
# _tables = [t for t in _tables]
# for table in _tables[::-1]:
# if table.get_ancestor(aw.NodeType.BODY):
# i = _tables.index(table)
# temp = _tables[i]
# _tables[i] = _tables[i - 1]
# _tables[i - 1] = temp
# # 删除表格第一行内容
# logger.info(f"插入表格: {table_id}")
# for table in _tables[
# ::-1
# ]: # 将table 信息反向的插入到word文件中。TODO 表格美化
#
# table_clone = table.as_table().clone(True)
# imported_table = doc_main.import_node(table_clone, True)
# logger.info(
# f"imported table 节点类型: {imported_table.node_type}"
# )
#
# if imported_table.node_type == aw.NodeType.TABLE:
# logger.info(f"imported table 节点是表格")
# imported_table = imported_table.as_table()
# imported_table.preferred_width = (
# aw.tables.PreferredWidth.from_percent(100)
# )
#
# for index, row in enumerate(imported_table.rows):
# row = row.as_row()
# # print(index, row.get_text().strip())
# for cell_index, cell in enumerate(row.cells):
# cell = cell.as_cell()
# cell.cell_format.vertical_alignment = (
# aw.tables.CellVerticalAlignment.BOTTOM
# )
# for paragraph in cell.paragraphs:
# paragraph = paragraph.as_paragraph()
# # 居中对齐
# for run in paragraph.runs:
# run = run.as_run()
# run.font.name = (
# "Courier New" # 设置西文是新罗马字体
# )
# run.font.name_far_east = "宋体"
# run.font.size = 8
# print("一个表格s数据")
# run = aw.Run(doc_main, "")
# # 将文本设置为加粗
# run.font.bold = True
# # 将加粗的 Run 对象添加到表头段落中
# table_header.append_child(run)
# # 增加一步 现将当前para段落中的内容清空
# para.parent_node.insert_after(table_header, para)
# # 在插入段落标题之后插入段落内容
# para.parent_node.insert_after(imported_table, para)
# table_header.parent_node.insert_after(
# imported_table, table_header
# )
# table_newline = aw.Paragraph(doc_main)
# run = aw.Run(doc_main, "")
# table_newline.append_child(run)
# imported_table.parent_node.insert_after(
# table_newline, imported_table
# )
else:
logger.info(f"当前段落不需要插入表格内容")
# 插入result 模型输出结果 结果只插入一次,table插入完成后插入
# TODO 段落美化
except:
logger.warning(f"没有找到header{traceback.format_exc()}")
print("没有找到header", traceback.format_exc())
doc_main.save(save_path)
return {"prompt_tokens": 1, "complete_tokens": 1, "total_tokens": 1}
if __name__ == "__main__":
pass
async def a():
csr_table_file = os.path.join(settings.DOCX_TEMPLATE_PATH, "csr_blank.docx")
await generate_docx_with_result_laikai(
[
"DISPOSITION OF SUBJECTS",
],
[
[
{
"id": "f32b82df656a4bd9a90121d95ea0d86c",
"is_header_and_footer": True,
},
{
"id": "2",
"is_header_and_footer": True,
},
],
],
"111.docx",
csr_table_file,
)
# #
import asyncio
asyncio.run(a())
2.docx (14.9 KB)
f32b82df656a4bd9a90121d95ea0d86c.docx (18.6 KB)
你需要将这两个docx转换成rtf文件进行测试