怎么在指定标题下面添加新的标题?

rtf文件我上传不了不知道问什么

我需要分页之后进行续表 是需要的 可是当前代码没有实现这个续表功能呀

@Tiaohh

  1. 您想更新当前页面的页眉吗?更新页眉时,您需要插入分段符,以便在页面上设置不同的页眉。
  2. 表格需要放在下一页吗?或者您需要从第二个表格中删除这几行?

您能否更改截图上的文件,以获得所需的输出结果。

我需要用代码写入截图标记的问题 我不要手动改

代码我已经提供我需要怎么修改代码进行写入呢

怎么获取标题或者段落内容节点的位置呢,修改的之后找到这个节点位置删除在插入是不是就可以呢 还有这个问题麻烦也帮我看一下,官方文档里也没有,csdn教程也没有,真的不知道怎么弄了

你好这个问题有结果了吗?????

你好有结果了吗???????

@Tiaohh 半途而废:

@staticmethod
def find_last_related_paragraph(para):
    last_related_paragraph = para
    while (
            last_related_paragraph.next_sibling
            and last_related_paragraph.next_sibling.as_paragraph().paragraph_format.outline_level
            > para.paragraph_format.outline_level
    ):
        last_related_paragraph = last_related_paragraph.next_sibling

    return last_related_paragraph
def test_generate_docx_with_result_laikai(self):
    header_list = ["REFERENCES"]
    doc_main = aw.Document(MY_DIR + "111 (1).docx")
    builder_main = aw.DocumentBuilder(doc_main)
    paragraphs = doc_main.get_child_nodes(aw.NodeType.PARAGRAPH, True)
    for para in paragraphs:

        para = para.as_paragraph()
        para_content = para.to_string(aw.SaveFormat.TEXT)
        para_content = para_content.replace("\r", "")
        para_content = para_content.strip()
        if para_content in header_list or para_content.capitalize() in header_list:
            table_header = aw.Paragraph(doc_main)
            table_header.paragraph_format.style_identifier = aw.StyleIdentifier.NORMAL
            table_header.paragraph_format.alignment = aw.ParagraphAlignment.CENTER
            # 增加一步 现将当前para段落中的内容清空
            # para.parent_node.insert_after(table_header, para)
            if para_content in ["APPENDICES", "REFERENCES"]:
                # Create a section break
                para = self.find_last_related_paragraph(para)
                previous_para = para
                while previous_para is not None:
                    if previous_para.node_type is aw.NodeType.PARAGRAPH and "C-Heading" in previous_para.as_paragraph().paragraph_format.style_name:
                        previous_para = previous_para
                        break
                    previous_para = previous_para.previous_pre_order(doc_main)

                previous_para = previous_para.previous_sibling
                while previous_para is not None:
                    if previous_para.node_type is aw.NodeType.PARAGRAPH:
                        previous_para = previous_para
                        break
                    previous_para = previous_para.previous_pre_order(doc_main)

                builder_main.move_to(previous_para)
                builder_main.insert_paragraph()
                builder_main.paragraph_format.clear_formatting()
                builder_main.list_format.list = None
                builder_main.insert_break(aw.BreakType.SECTION_BREAK_CONTINUOUS)
                # run = aw.Run(doc_main, "")
                # # 将文本设置为加粗
                # run.font.bold = True
                # # 将加粗的 Run 对象添加到表头段落中
                # table_header.append_child(run)
                # idx_num = header_list.index(para_content)
                # # 获取header对应的table
                #
                # num_tables = len(table_list[idx_num][::-1])
                # for _index, info in enumerate(table_list[idx_num]):
                #     table_id = info.get("id", "")
                #     is_header_and_footer = info.get("is_header_and_footer", "")
                #     aw.Document(
                #         os.path.join(settings.UPLOAD_PATH, f"{table_id}.rtf")
                #     ).save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
                #     # 判断是否需要页眉页脚 False为删除页眉页脚
                #     if not is_header_and_footer:
                #         document = Document(
                #             os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                #         )
                #         for section in document.sections:
                #             section.footer.is_linked_to_previous = True
                #             section.header.is_linked_to_previous = True
                #
                #         document.save(
                #             os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                #         )

                doc_rtf = aw.Document(MY_DIR + "t_ae_1(1).docx")
                builder = aw.DocumentBuilder(doc_rtf)
                builder.row_format.heading_format = False
                doc_rtf.save(ARTIFACTS_DIR + "t_ae_1(1).docx")
                _doc = aw.Document(ARTIFACTS_DIR + "t_ae_1(1).docx")
                _tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
                _tables = [t for t in _tables]

                for table in _tables[::-1]:
                    if table.get_ancestor(aw.NodeType.BODY):
                        i = _tables.index(table)
                        temp = _tables[i]
                        _tables[i] = _tables[i - 1]
                        _tables[i - 1] = temp

                all_tables_count = sum(p.parent_node.node_type == aw.NodeType.BODY for p in _tables)
                curr_count = 0
                for table in _tables[::-1]:  # 将table 信息反向的插入到word文件中。TODO 表格美化
                    if table.get_ancestor(aw.NodeType.BODY):
                        table_clone = table.as_table().clone(True)
                        imported_table = doc_main.import_node(table_clone, True)
                        imported_table.as_table().preferred_width = aw.tables.PreferredWidth.from_percent(100)
                        para = para.parent_node.insert_after(imported_table, para)
                        para = para.as_table().parent_node.insert_after(aw.Paragraph(doc_main), para)
                        if curr_count < all_tables_count:
                            builder_main.move_to(para)
                            builder_main.insert_break(aw.BreakType.SECTION_BREAK_NEW_PAGE)
                            para = builder_main.current_paragraph
                            curr_count += 1

                    # if imported_table.node_type == aw.NodeType.TABLE:
                    #     imported_table = imported_table.as_table()
                    #     imported_table.preferred_width = (
                    #         aw.tables.PreferredWidth.from_percent(100)
                    #     )
                    #
                    #     for index, row in enumerate(imported_table.rows):
                    #         row = row.as_row()
                    #         # print(index, row.get_text().strip())
                    #         for cell_index, cell in enumerate(row.cells):
                    #             cell = cell.as_cell()
                    #             cell.cell_format.vertical_alignment = (
                    #                 aw.tables.CellVerticalAlignment.BOTTOM
                    #             )
                    #             for paragraph in cell.paragraphs:
                    #                 paragraph = paragraph.as_paragraph()
                    #                 # 居中对齐
                    #                 for run in paragraph.runs:
                    #                     run = run.as_run()
                    #                     run.font.name = (
                    #                         "Courier New"  # 设置西文是新罗马字体
                    #                     )
                    #                     run.font.name_far_east = "宋体"
                    #                     run.font.size = 8



                    # table_newline = aw.Paragraph(doc_main)
                    # run = aw.Run(doc_main, "")
                    # table_newline.append_child(run)
                    # imported_table.parent_node.insert_after(
                    #     table_newline, imported_table
                    # )

    doc_main.save(ARTIFACTS_DIR + "Result.docx")

Result.docx (26.5 KB)

现在,您需要在第 2 节和第 3 节中添加页眉/页脚,并使用我上面分享的代码禁用 is_link_to_previous。

这是您需要的正确结果吗?

@Tiaohh 更新代码以包含页眉/页脚示例。您需要为所有必需的部分添加这些代码,并处理格式问题。

@staticmethod
def find_last_related_paragraph(para):
    last_related_paragraph = para
    while (
            last_related_paragraph.next_sibling
            and last_related_paragraph.next_sibling.as_paragraph().paragraph_format.outline_level
            > para.paragraph_format.outline_level
    ):
        last_related_paragraph = last_related_paragraph.next_sibling

    return last_related_paragraph
def test_generate_docx_with_result_laikai(self):
    header_list = ["REFERENCES"]
    doc_main = aw.Document(MY_DIR + "111 (1).docx")
    builder_main = aw.DocumentBuilder(doc_main)
    paragraphs = doc_main.get_child_nodes(aw.NodeType.PARAGRAPH, True)
    for para in paragraphs:

        para = para.as_paragraph()
        para_content = para.to_string(aw.SaveFormat.TEXT)
        para_content = para_content.replace("\r", "")
        para_content = para_content.strip()
        if para_content in header_list or para_content.capitalize() in header_list:
            table_header = aw.Paragraph(doc_main)
            table_header.paragraph_format.style_identifier = aw.StyleIdentifier.NORMAL
            table_header.paragraph_format.alignment = aw.ParagraphAlignment.CENTER
            # 增加一步 现将当前para段落中的内容清空
            # para.parent_node.insert_after(table_header, para)
            if para_content in ["APPENDICES", "REFERENCES"]:
                # Create a section break
                para = self.find_last_related_paragraph(para)
                previous_para = para
                while previous_para is not None:
                    if previous_para.node_type is aw.NodeType.PARAGRAPH and "C-Heading" in previous_para.as_paragraph().paragraph_format.style_name:
                        previous_para = previous_para
                        break
                    previous_para = previous_para.previous_pre_order(doc_main)

                previous_para = previous_para.previous_sibling
                while previous_para is not None:
                    if previous_para.node_type is aw.NodeType.PARAGRAPH:
                        previous_para = previous_para
                        break
                    previous_para = previous_para.previous_pre_order(doc_main)

                builder_main.move_to(previous_para)
                builder_main.insert_paragraph()
                builder_main.paragraph_format.clear_formatting()
                builder_main.list_format.list = None
                builder_main.insert_break(aw.BreakType.SECTION_BREAK_CONTINUOUS)
                # run = aw.Run(doc_main, "")
                # # 将文本设置为加粗
                # run.font.bold = True
                # # 将加粗的 Run 对象添加到表头段落中
                # table_header.append_child(run)
                # idx_num = header_list.index(para_content)
                # # 获取header对应的table
                #
                # num_tables = len(table_list[idx_num][::-1])
                # for _index, info in enumerate(table_list[idx_num]):
                #     table_id = info.get("id", "")
                #     is_header_and_footer = info.get("is_header_and_footer", "")
                #     aw.Document(
                #         os.path.join(settings.UPLOAD_PATH, f"{table_id}.rtf")
                #     ).save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
                #     # 判断是否需要页眉页脚 False为删除页眉页脚
                #     if not is_header_and_footer:
                #         document = Document(
                #             os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                #         )
                #         for section in document.sections:
                #             section.footer.is_linked_to_previous = True
                #             section.header.is_linked_to_previous = True
                #
                #         document.save(
                #             os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                #         )

                doc_rtf = aw.Document(MY_DIR + "t_ae_1(1).docx")
                builder = aw.DocumentBuilder(doc_rtf)
                builder.row_format.heading_format = False
                doc_rtf.save(ARTIFACTS_DIR + "t_ae_1(1).docx")
                _doc = aw.Document(ARTIFACTS_DIR + "t_ae_1(1).docx")
                _tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
                _tables = [t for t in _tables]

                for table in _tables[::-1]:
                    if table.get_ancestor(aw.NodeType.BODY):
                        i = _tables.index(table)
                        temp = _tables[i]
                        _tables[i] = _tables[i - 1]
                        _tables[i - 1] = temp

                all_tables_count = sum(p.parent_node.node_type == aw.NodeType.BODY for p in _tables)
                curr_count = 0
                for table in _tables[::-1]:  # 将table 信息反向的插入到word文件中。TODO 表格美化
                    if table.get_ancestor(aw.NodeType.BODY):
                        table_clone = table.as_table().clone(True)
                        imported_table = doc_main.import_node(table_clone, True)
                        imported_table.as_table().preferred_width = aw.tables.PreferredWidth.from_percent(100)
                        para = para.parent_node.insert_after(imported_table, para)
                        para = para.as_table().parent_node.insert_after(aw.Paragraph(doc_main), para)

                        section = doc_main.sections[1]
                        section.headers_footers.link_to_previous(False)
                        section.headers_footers.clear()
                        header = _tables[::-1][2].parent_node
                        cloned_header = header.clone(True)
                        imported_header = doc_main.import_node(cloned_header, True)
                        section.headers_footers.add(imported_header)

                        footer = _tables[::-1][0].parent_node
                        cloned_footer = footer.clone(True)
                        imported_footer = doc_main.import_node(cloned_footer, True)
                        section.headers_footers.add(imported_footer)

                        if curr_count < all_tables_count:
                            builder_main.move_to(para)
                            builder_main.insert_break(aw.BreakType.SECTION_BREAK_NEW_PAGE)
                            para = builder_main.current_paragraph
                            curr_count += 1

                    # if imported_table.node_type == aw.NodeType.TABLE:
                    #     imported_table = imported_table.as_table()
                    #     imported_table.preferred_width = (
                    #         aw.tables.PreferredWidth.from_percent(100)
                    #     )
                    #
                    #     for index, row in enumerate(imported_table.rows):
                    #         row = row.as_row()
                    #         # print(index, row.get_text().strip())
                    #         for cell_index, cell in enumerate(row.cells):
                    #             cell = cell.as_cell()
                    #             cell.cell_format.vertical_alignment = (
                    #                 aw.tables.CellVerticalAlignment.BOTTOM
                    #             )
                    #             for paragraph in cell.paragraphs:
                    #                 paragraph = paragraph.as_paragraph()
                    #                 # 居中对齐
                    #                 for run in paragraph.runs:
                    #                     run = run.as_run()
                    #                     run.font.name = (
                    #                         "Courier New"  # 设置西文是新罗马字体
                    #                     )
                    #                     run.font.name_far_east = "宋体"
                    #                     run.font.size = 8



                    # table_newline = aw.Paragraph(doc_main)
                    # run = aw.Run(doc_main, "")
                    # table_newline.append_child(run)
                    # imported_table.parent_node.insert_after(
                    #     table_newline, imported_table
                    # )

    doc_main.save(ARTIFACTS_DIR + "Result.docx")

@Tiaohh 我对页眉/页脚进行了修改,在最后一个页眉之前创建了 SectionBreak,以便为新页面添加不同的页眉/页脚。

不对插入页眉页脚要是这种格式以表格形式插入
CSR_插件效果2.docx (32.5 KB)

可是我通过以下代码插入的样式不对

async def generate_docx_with_result_laikai(
    header_list: list,
    table_list: list,
    save_path,
    template_path,
):
    logger.info(f"header list内容: {header_list}")
    logger.info(f"table list内容: {table_list}")
    base_file = template_path
    clear_save_path = base_file
    doc_main = aw.Document(clear_save_path)
    builder_main = aw.DocumentBuilder(doc_main)
    paragraphs = doc_main.get_child_nodes(aw.NodeType.PARAGRAPH, True)

    for para in paragraphs:
        para = para.as_paragraph()
        para_content = para.to_string(aw.SaveFormat.TEXT)
        para_content = para_content.replace("\r", "")
        para_content = (
            para_content.strip()
        )  # 特殊地方,发现目录中有这个符号,暂时不知道符号是干啥的
        if (
            para_content in header_list or para_content.capitalize() in header_list
        ):  # 如果当前段落中有写作内容,那么找到内容,找到生成的结果
            table_header = aw.Paragraph(doc_main)
            table_header.paragraph_format.style_identifier = aw.StyleIdentifier.NORMAL
            table_header.paragraph_format.alignment = aw.ParagraphAlignment.CENTER
            para = find_last_related_paragraph(para)

            previous_para = para
            while previous_para is not None:
                if (
                    previous_para.node_type is aw.NodeType.PARAGRAPH
                    and "C-Heading"
                    in previous_para.as_paragraph().paragraph_format.style_name
                ):
                    previous_para = previous_para
                    break
                previous_para = previous_para.previous_pre_order(doc_main)

            previous_para = previous_para.previous_sibling
            while previous_para is not None:
                if previous_para.node_type is aw.NodeType.PARAGRAPH:
                    previous_para = previous_para
                    break
                previous_para = previous_para.previous_pre_order(doc_main)

            builder_main.move_to(previous_para)
            builder_main.insert_paragraph()
            builder_main.paragraph_format.clear_formatting()
            builder_main.list_format.list = None
            builder_main.insert_break(aw.BreakType.SECTION_BREAK_CONTINUOUS)

            try:
                if para_content in ["APPENDICES", "REFERENCES"]:
                    para_content = para_content.capitalize()
                idx_num = header_list.index(para_content)
                # 获取header对应的table

                num_tables = len(table_list[idx_num])
                logger.info(f"当前表格长度:{num_tables}")
                for _index, info in enumerate(table_list[idx_num]):
                    table_id = info.get("id", "")
                    is_header_and_footer = info.get("is_header_and_footer", "")
                    # logger.warning(f"table_id in table list:{table_id}")
                    aw.Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.rtf")
                    ).save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
                    # 判断是否需要页眉页脚 False为删除页眉页脚
                    if not is_header_and_footer:
                        document = Document(
                            os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                        )
                        for section in document.sections:
                            section.footer.is_linked_to_previous = True
                            section.header.is_linked_to_previous = True

                        document.save(
                            os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                        )

                    doc_rtf = aw.Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                    )
                    # builder = aw.DocumentBuilder(doc_rtf)
                    # # 判断分页是否续表
                    # # builder.row_format.heading_format = False
                    # builder.row_format.heading_format = True
                    doc_rtf.save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
                    _doc = aw.Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                    )
                    builder = aw.DocumentBuilder(_doc)
                    _tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
                    _tables = [t for t in _tables]

                    for table in _tables[::-1]:
                        if table.get_ancestor(aw.NodeType.BODY):
                            i = _tables.index(table)
                            temp = _tables[i]
                            _tables[i] = _tables[i - 1]
                            _tables[i - 1] = temp

                    all_tables_count = sum(
                        p.parent_node.node_type == aw.NodeType.BODY for p in _tables
                    )
                    curr_count = 0
                    for table in _tables[
                        ::-1
                    ]:  # 将table 信息反向的插入到word文件中。TODO 表格美化
                        if table.get_ancestor(aw.NodeType.BODY):
                            table_clone = table.as_table().clone(True)
                            imported_table = doc_main.import_node(table_clone, True)
                            imported_table.as_table().preferred_width = (
                                aw.tables.PreferredWidth.from_percent(100)
                            )
                            para = para.parent_node.insert_after(imported_table, para)
                            para = para.as_table().parent_node.insert_after(
                                aw.Paragraph(doc_main), para
                            )

                            section = doc_main.sections[1]
                            section.headers_footers.link_to_previous(False)
                            section.headers_footers.clear()
                            header = _tables[::-1][2].parent_node
                            cloned_header = header.clone(True)
                            imported_header = doc_main.import_node(cloned_header, True)
                            section.headers_footers.add(imported_header)

                            footer = _tables[::-1][0].parent_node
                            cloned_footer = footer.clone(True)
                            imported_footer = doc_main.import_node(cloned_footer, True)
                            section.headers_footers.add(imported_footer)

                            if curr_count < all_tables_count:
                                builder_main.move_to(para)
                                builder_main.insert_break(
                                    aw.BreakType.SECTION_BREAK_NEW_PAGE
                                )
                                para = builder_main.current_paragraph
                                curr_count += 1

                else:
                    logger.info(f"当前段落不需要插入表格内容")
                # 插入result 模型输出结果 结果只插入一次,table插入完成后插入
                # TODO 段落美化

            except:
                logger.warning(f"没有找到header{traceback.format_exc()}")
                print("没有找到header", traceback.format_exc())

    doc_main.save(save_path)

以下是代码出来的效果」
111.docx (27.8 KB)

页眉页脚数据要以表格数据插入 并且续表也需要有页眉页脚表头这些信息哦

@Tiaohh 根据您的目标使用此代码:

header_list = ["DISPOSITION OF SUBJECTS"]
doc_main = aw.Document("111.docx")
builder_main = aw.DocumentBuilder(doc_main)
paragraphs = doc_main.get_child_nodes(aw.NodeType.PARAGRAPH, True)
for para in paragraphs:

    para = para.as_paragraph()
    para_content = para.to_string(aw.SaveFormat.TEXT)
    para_content = para_content.replace("\r", "")
    para_content = para_content.strip()
    if para_content in header_list or para_content.capitalize() in header_list:
        table_header = aw.Paragraph(doc_main)
        table_header.paragraph_format.style_identifier = aw.StyleIdentifier.NORMAL
        table_header.paragraph_format.alignment = aw.ParagraphAlignment.CENTER
        # 增加一步 现将当前para段落中的内容清空
        # para.parent_node.insert_after(table_header, para)
        if para_content in ["DISPOSITION OF SUBJECTS", "Data Sets Analyzed", "APPENDICES", "REFERENCES"]:
            layout_collector = aw.layout.LayoutCollector(doc_main)
            page_index = layout_collector.get_start_page_index(para)
            # Create a section break
            para = self.find_last_related_paragraph(para)
            if page_index > 1:
                previous_para = para
                while previous_para is not None:
                    if previous_para.node_type is aw.NodeType.PARAGRAPH \
                            and "C-Heading" in previous_para.as_paragraph().paragraph_format.style_name \
                            and layout_collector.get_start_page_index(previous_para) < page_index:
                        previous_para = previous_para
                        break
                    previous_para = previous_para.previous_pre_order(doc_main)

                builder_main.move_to(previous_para)
                builder_main.insert_paragraph()
                builder_main.paragraph_format.clear_formatting()
                builder_main.list_format.list = None
                builder_main.insert_break(aw.BreakType.SECTION_BREAK_CONTINUOUS)
                builder_main.current_paragraph.remove()

            # run = aw.Run(doc_main, "")
            # # 将文本设置为加粗
            # run.font.bold = True
            # # 将加粗的 Run 对象添加到表头段落中
            # table_header.append_child(run)
            # idx_num = header_list.index(para_content)
            # # 获取header对应的table
            #
            # num_tables = len(table_list[idx_num][::-1])
            # for _index, info in enumerate(table_list[idx_num]):
            #     table_id = info.get("id", "")
            #     is_header_and_footer = info.get("is_header_and_footer", "")
            #     aw.Document(
            #         os.path.join(settings.UPLOAD_PATH, f"{table_id}.rtf")
            #     ).save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
            #     # 判断是否需要页眉页脚 False为删除页眉页脚
            #     if not is_header_and_footer:
            #         document = Document(
            #             os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
            #         )
            #         for section in document.sections:
            #             section.footer.is_linked_to_previous = True
            #             section.header.is_linked_to_previous = True
            #
            #         document.save(
            #             os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
            #         )

            doc_rtf = aw.Document(MY_DIR + "t_ae_1(1).docx")
            builder = aw.DocumentBuilder(doc_rtf)
            builder.row_format.heading_format = False
            doc_rtf.save(ARTIFACTS_DIR + "t_ae_1(1).docx")
            _doc = aw.Document(ARTIFACTS_DIR + "t_ae_1(1).docx")

            _tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
            _tables = [t for t in _tables]

            for table in _tables[::-1]:
                if table.get_ancestor(aw.NodeType.BODY):
                    i = _tables.index(table)
                    temp = _tables[i]
                    _tables[i] = _tables[i - 1]
                    _tables[i - 1] = temp

            section = para.as_paragraph().parent_section
            page_setup = section.page_setup
            page_setup.orientation = aw.Orientation.LANDSCAPE

            all_tables_count = sum(p.parent_node.node_type == aw.NodeType.BODY for p in _tables)
            curr_count = 0
            for table in _tables:  # 将table 信息反向的插入到word文件中。TODO 表格美化
                if table.get_ancestor(aw.NodeType.BODY):
                    table_index = _tables.index(table)
                    header_table = _tables[table_index - 1].as_table()
                    footer_table = _tables[table_index + 1].as_table()
                    header_table = header_table.clone(True).as_table()
                    footer_table = footer_table.clone(True).as_table()
                    table = table.clone(True).as_table()
                    imported_header = doc_main.import_node(header_table, True)
                    imported_footer = doc_main.import_node(footer_table, True)
                    imported_table = doc_main.import_node(table, True)

                    para = para.parent_node.insert_after(imported_header, para)
                    para = para.parent_node.insert_after(imported_table, para)
                    para = para.parent_node.insert_after(imported_footer, para)
                    para = para.parent_node.insert_after(aw.Paragraph(doc_main), para)

                    if curr_count < all_tables_count:
                        builder_main.move_to(para)
                        builder_main.insert_break(aw.BreakType.SECTION_BREAK_NEW_PAGE)
                        para = builder_main.current_paragraph
                        curr_count += 1

                    if curr_count == all_tables_count:
                        section = builder_main.current_section
                        page_setup = section.page_setup
                        page_setup.orientation = aw.Orientation.PORTRAIT
                        builder_main.current_paragraph.remove()

doc_main.save("Result.docx")

这就是结果:
Result.docx (21.7 KB)

L1VzZXJzL2RpcC9MaWJyYXJ5L0NvbnRhaW5lcnMvNVpTTDJDSlUyVC5jb20uZGluZ3RhbGsubWFjL0RhdGEvTGlicmFyeS9BcHBsaWNhdGlvbiBTdXBwb3J0L0RpbmdUYWxrTWFjLzQwODAyMDQzNzRfdjIvSW1hZ2VGaWxlcy8xNzE0MjY3ODM0NjM5XzdEMjAzMkQ2LUUxQTEtNDNBRC05MTgzLTNFRDJBNTI3ODAyNi5wbmc=.png (158.5 KB)

还是有问题 看这个图片

image.png (100.1 KB)

想要这个效果文件

if not is_header_and_footer:
document = Document(
os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
)
for section in document.sections:
section.footer.is_linked_to_previous = True
section.header.is_linked_to_previous = True

                    document.save(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                    )这里的代码不能删掉,我们需要设置。是否要页眉页脚

@Tiaohh 使用此代码即可获得:

header_list = ["DISPOSITION OF SUBJECTS"]
doc_main = aw.Document(MY_DIR + "111 (1).docx")
builder_main = aw.DocumentBuilder(doc_main)
paragraphs = doc_main.get_child_nodes(aw.NodeType.PARAGRAPH, True)
for para in paragraphs:

    para = para.as_paragraph()
    para_content = para.to_string(aw.SaveFormat.TEXT)
    para_content = para_content.replace("\r", "")
    para_content = para_content.strip()
    if para_content in header_list or para_content.capitalize() in header_list:
        table_header = aw.Paragraph(doc_main)
        table_header.paragraph_format.style_identifier = aw.StyleIdentifier.NORMAL
        table_header.paragraph_format.alignment = aw.ParagraphAlignment.CENTER
        # 增加一步 现将当前para段落中的内容清空
        # para.parent_node.insert_after(table_header, para)
        if para_content in ["DISPOSITION OF SUBJECTS", "Data Sets Analyzed", "APPENDICES", "REFERENCES"]:
            layout_collector = aw.layout.LayoutCollector(doc_main)
            page_index = layout_collector.get_start_page_index(para)
            # Create a section break
            para = self.find_last_related_paragraph(para)
            if page_index > 1:
                previous_para = para
                while previous_para is not None:
                    if previous_para.node_type is aw.NodeType.PARAGRAPH \
                            and "C-Heading" in previous_para.as_paragraph().paragraph_format.style_name \
                            and layout_collector.get_start_page_index(previous_para) < page_index:
                        previous_para = previous_para
                        break
                    previous_para = previous_para.previous_pre_order(doc_main)

                builder_main.move_to(previous_para)
                builder_main.insert_paragraph()
                builder_main.paragraph_format.clear_formatting()
                builder_main.list_format.list = None
                builder_main.insert_break(aw.BreakType.SECTION_BREAK_CONTINUOUS)
                builder_main.current_paragraph.remove()

            # run = aw.Run(doc_main, "")
            # # 将文本设置为加粗
            # run.font.bold = True
            # # 将加粗的 Run 对象添加到表头段落中
            # table_header.append_child(run)
            # idx_num = header_list.index(para_content)
            # # 获取header对应的table
            #
            # num_tables = len(table_list[idx_num][::-1])
            # for _index, info in enumerate(table_list[idx_num]):
            #     table_id = info.get("id", "")
            #     is_header_and_footer = info.get("is_header_and_footer", "")
            #     aw.Document(
            #         os.path.join(settings.UPLOAD_PATH, f"{table_id}.rtf")
            #     ).save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
            #     # 判断是否需要页眉页脚 False为删除页眉页脚
            #     if not is_header_and_footer:
            #         document = Document(
            #             os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
            #         )
            #         for section in document.sections:
            #             section.footer.is_linked_to_previous = True
            #             section.header.is_linked_to_previous = True
            #
            #         document.save(
            #             os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
            #         )

            doc_rtf = aw.Document(MY_DIR + "t_ae_1(1).docx")
            builder = aw.DocumentBuilder(doc_rtf)
            builder.row_format.heading_format = False
            doc_rtf.save(ARTIFACTS_DIR + "t_ae_1(1).docx")
            _doc = aw.Document(ARTIFACTS_DIR + "t_ae_1(1).docx")

            _tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
            _tables = [t for t in _tables]

            for table in _tables[::-1]:
                if table.get_ancestor(aw.NodeType.BODY):
                    i = _tables.index(table)
                    temp = _tables[i]
                    _tables[i] = _tables[i - 1]
                    _tables[i - 1] = temp

            section = para.as_paragraph().parent_section
            page_setup = section.page_setup
            page_setup.orientation = aw.Orientation.LANDSCAPE

            all_tables_count = sum(p.parent_node.node_type == aw.NodeType.BODY for p in _tables)
            curr_count = 0
            for table in _tables:  # 将table 信息反向的插入到word文件中。TODO 表格美化
                if table.get_ancestor(aw.NodeType.BODY):
                    table_index = _tables.index(table)
                    header_table = _tables[table_index - 1].as_table()
                    footer_table = _tables[table_index + 1].as_table()
                    header_table = header_table.clone(True).as_table()
                    footer_table = footer_table.clone(True).as_table()
                    table = table.clone(True).as_table()
                    imported_header = doc_main.import_node(header_table, True)
                    imported_footer = doc_main.import_node(footer_table, True)
                    imported_table = doc_main.import_node(table, True)

                    para = para.parent_node.insert_after(imported_header, para)
                    t1 = para.as_table()
                    for row in t1.rows:
                        row.as_row().row_format.heading_format = True
                    
                    para = para.parent_node.insert_after(imported_table, para)
                    t1 = para.as_table()
                    t1.rows[0].row_format.heading_format = True
                    t1.rows[1].row_format.heading_format = True
                    
                    para = para.parent_node.insert_after(imported_footer, para)
                    para = para.parent_node.insert_after(aw.Paragraph(doc_main), para)

                    if curr_count < all_tables_count:
                        builder_main.move_to(para)
                        builder_main.insert_break(aw.BreakType.SECTION_BREAK_NEW_PAGE)
                        para = builder_main.current_paragraph
                        curr_count += 1

                    if curr_count == all_tables_count:
                        section = builder_main.current_section
                        page_setup = section.page_setup
                        page_setup.orientation = aw.Orientation.PORTRAIT
                        builder_main.current_paragraph.remove()

doc_main.save(ARTIFACTS_DIR + "Result.docx")

@Tiaohh 这里只是一个例子。我不能使用你所有的代码,因为我没有你所有的数据。您需要根据自己的目标修改这些代码。

我可以为您提供数据

def find_last_related_paragraph(paragraphs, title_text):
    """
    找到标题后的最后一个相关段落。
    如果标题下面没有段落内容,返回标题本身。
    """
    title_paragraph = None

    # 找到目标标题
    for para in paragraphs:
        para = para.as_paragraph()  # 确保是段落
        para_text = para.get_text().strip()  # 去除空格
        if para_text == title_text:
            title_paragraph = para
            break

    if title_paragraph is None:
        # 返回文档的最后一个段落作为备用
        return paragraphs[-1]

    # 找到标题后的最后一个相关段落
    last_related_paragraph = title_paragraph  # 初始位置是标题

    # 遍历直到找到级别改变的段落
    while (
        last_related_paragraph.next_sibling
        and last_related_paragraph.next_sibling.as_paragraph().paragraph_format.outline_level
        > title_paragraph.paragraph_format.outline_level
    ):
        last_related_paragraph = last_related_paragraph.next_sibling

    return last_related_paragraph


async def generate_docx_with_result_laikai(
    header_list: list,
    table_list: list,
    save_path,
    template_path,
):
    logger.info(f"header list内容: {header_list}")
    logger.info(f"table list内容: {table_list}")
    base_file = template_path
    clear_save_path = base_file
    doc_main = aw.Document(clear_save_path)
    builder_main = aw.DocumentBuilder(doc_main)
    paragraphs = doc_main.get_child_nodes(aw.NodeType.PARAGRAPH, True)

    for para in paragraphs:
        para = para.as_paragraph()
        para_content = para.to_string(aw.SaveFormat.TEXT)
        para_content = para_content.replace("\r", "")
        para_content = (
            para_content.strip()
        )  # 特殊地方,发现目录中有这个符号,暂时不知道符号是干啥的
        if (
            para_content in header_list or para_content.capitalize() in header_list
        ):  # 如果当前段落中有写作内容,那么找到内容,找到生成的结果

            table_header = aw.Paragraph(doc_main)
            table_header.paragraph_format.style_identifier = aw.StyleIdentifier.NORMAL
            table_header.paragraph_format.alignment = aw.ParagraphAlignment.CENTER
            para = find_last_related_paragraph(paragraphs, para_content)
            previous_para = para
            while previous_para is not None:
                if (
                    previous_para.node_type is aw.NodeType.PARAGRAPH
                    and "C-Heading"
                    in previous_para.as_paragraph().paragraph_format.style_name
                ):
                    previous_para = previous_para
                    break
                previous_para = previous_para.previous_pre_order(doc_main)

            previous_para = previous_para.previous_sibling
            while previous_para is not None:
                if previous_para.node_type is aw.NodeType.PARAGRAPH:
                    previous_para = previous_para
                    break
                previous_para = previous_para.previous_pre_order(doc_main)

            builder_main.move_to(previous_para)
            builder_main.insert_paragraph()
            builder_main.paragraph_format.clear_formatting()
            builder_main.list_format.list = None
            builder_main.insert_break(aw.BreakType.SECTION_BREAK_CONTINUOUS)

            try:
                if para_content in ["APPENDICES", "REFERENCES"]:
                    para_content = para_content.capitalize()
                idx_num = header_list.index(para_content)
                # 获取header对应的table

                num_tables = len(table_list[idx_num])
                logger.info(f"当前表格长度:{num_tables}")
                for _index, info in enumerate(table_list[idx_num]):
                    table_id = info.get("id", "")
                    is_header_and_footer = info.get("is_header_and_footer", "")
                    # logger.warning(f"table_id in table list:{table_id}")
                    aw.Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.rtf")
                    ).save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
                    # 判断是否需要页眉页脚 False为删除页眉页脚
                    if not is_header_and_footer:
                        document = Document(
                            os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                        )
                        for section in document.sections:
                            section.footer.is_linked_to_previous = True
                            section.header.is_linked_to_previous = True

                        document.save(
                            os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                        )

                    doc_rtf = aw.Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                    )
                    # builder = aw.DocumentBuilder(doc_rtf)
                    # # 判断分页是否续表
                    # # builder.row_format.heading_format = False
                    # builder.row_format.heading_format = True
                    doc_rtf.save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
                    _doc = aw.Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                    )
                    builder = aw.DocumentBuilder(_doc)
                    _tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
                    _tables = [t for t in _tables]

                    for table in _tables[::-1]:
                        if table.get_ancestor(aw.NodeType.BODY):
                            i = _tables.index(table)
                            temp = _tables[i]
                            _tables[i] = _tables[i - 1]
                            _tables[i - 1] = temp

                    all_tables_count = sum(
                        p.parent_node.node_type == aw.NodeType.BODY for p in _tables
                    )
                    curr_count = 0
                    for table in _tables[
                        ::-1
                    ]:  # 将table 信息反向的插入到word文件中。TODO 表格美化
                        if table.get_ancestor(aw.NodeType.BODY):
                            table_clone = table.as_table().clone(True)
                            imported_table = doc_main.import_node(table_clone, True)
                            imported_table.as_table().preferred_width = (
                                aw.tables.PreferredWidth.from_percent(100)
                            )
                            para = para.parent_node.insert_after(imported_table, para)
                            para = para.as_table().parent_node.insert_after(
                                aw.Paragraph(doc_main), para
                            )

                            section = doc_main.sections[1]
                            section.headers_footers.link_to_previous(False)
                            section.headers_footers.clear()
                            header = _tables[::-1][2].parent_node
                            cloned_header = header.clone(True)
                            imported_header = doc_main.import_node(cloned_header, True)
                            section.headers_footers.add(imported_header)

                            footer = _tables[::-1][0].parent_node
                            cloned_footer = footer.clone(True)
                            imported_footer = doc_main.import_node(cloned_footer, True)
                            section.headers_footers.add(imported_footer)

                            if curr_count < all_tables_count:
                                builder_main.move_to(para)
                                builder_main.insert_break(
                                    aw.BreakType.SECTION_BREAK_NEW_PAGE
                                )
                                para = builder_main.current_paragraph
                                curr_count += 1
                        # _tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
                        # _tables = [t for t in _tables]
                        # for table in _tables[::-1]:
                        #     if table.get_ancestor(aw.NodeType.BODY):
                        #         i = _tables.index(table)
                        #         temp = _tables[i]
                        #         _tables[i] = _tables[i - 1]
                        #         _tables[i - 1] = temp
                        # # 删除表格第一行内容
                        # logger.info(f"插入表格: {table_id}")
                        # for table in _tables[
                        #     ::-1
                        # ]:  # 将table 信息反向的插入到word文件中。TODO 表格美化
                        #
                        #     table_clone = table.as_table().clone(True)
                        #     imported_table = doc_main.import_node(table_clone, True)
                        #     logger.info(
                        #         f"imported table 节点类型: {imported_table.node_type}"
                        #     )
                        #
                        #     if imported_table.node_type == aw.NodeType.TABLE:
                        #         logger.info(f"imported table 节点是表格")
                        #         imported_table = imported_table.as_table()
                        #         imported_table.preferred_width = (
                        #             aw.tables.PreferredWidth.from_percent(100)
                        #         )
                        #
                        #         for index, row in enumerate(imported_table.rows):
                        #             row = row.as_row()
                        #             # print(index, row.get_text().strip())
                        #             for cell_index, cell in enumerate(row.cells):
                        #                 cell = cell.as_cell()
                        #                 cell.cell_format.vertical_alignment = (
                        #                     aw.tables.CellVerticalAlignment.BOTTOM
                        #                 )
                        #                 for paragraph in cell.paragraphs:
                        #                     paragraph = paragraph.as_paragraph()
                        #                     # 居中对齐
                        #                     for run in paragraph.runs:
                        #                         run = run.as_run()
                        #                         run.font.name = (
                        #                             "Courier New"  # 设置西文是新罗马字体
                        #                         )
                        #                         run.font.name_far_east = "宋体"
                        #                         run.font.size = 8
                        #         print("一个表格s数据")
                        #         run = aw.Run(doc_main, "")
                        #         # 将文本设置为加粗
                        #         run.font.bold = True
                        #         # 将加粗的 Run 对象添加到表头段落中
                        #         table_header.append_child(run)
                        #         # 增加一步 现将当前para段落中的内容清空
                        #         para.parent_node.insert_after(table_header, para)
                        #         # 在插入段落标题之后插入段落内容

                        # para.parent_node.insert_after(imported_table, para)
                        # table_header.parent_node.insert_after(
                        #     imported_table, table_header
                        # )
                        # table_newline = aw.Paragraph(doc_main)
                        # run = aw.Run(doc_main, "")
                        # table_newline.append_child(run)
                        # imported_table.parent_node.insert_after(
                        #     table_newline, imported_table
                        # )

                else:
                    logger.info(f"当前段落不需要插入表格内容")
                # 插入result 模型输出结果 结果只插入一次,table插入完成后插入
                # TODO 段落美化

            except:
                logger.warning(f"没有找到header{traceback.format_exc()}")
                print("没有找到header", traceback.format_exc())

    doc_main.save(save_path)
    return {"prompt_tokens": 1, "complete_tokens": 1, "total_tokens": 1}


if __name__ == "__main__":
    pass

    async def a():
        csr_table_file = os.path.join(settings.DOCX_TEMPLATE_PATH, "csr_blank.docx")
        await generate_docx_with_result_laikai(
            [
                "DISPOSITION OF SUBJECTS",
            ],
            [
                [
                    {
                        "id": "f32b82df656a4bd9a90121d95ea0d86c",
                        "is_header_and_footer": True,
                    },
                    {
                        "id": "2",
                        "is_header_and_footer": True,
                    },
                ],
            ],
            "111.docx",
            csr_table_file,
        )

    # #
    import asyncio

    asyncio.run(a())

2.docx (14.9 KB)

f32b82df656a4bd9a90121d95ea0d86c.docx (18.6 KB)

你需要将这两个docx转换成rtf文件进行测试