怎么在指定标题下面添加新的标题？

Tiaohh · April 25, 2024, 1:27am

为什么有页眉页脚就不能续表了呢

Tiaohh · April 25, 2024, 1:37am

顺序都不对啊。表格里吗怎么最下面出现了3页脚数据

Tiaohh · April 25, 2024, 2:27am

async def generate_docx_with_result_laikai(
    header_list: list,
    table_list: list,
    save_path,
    template_path,
):
    logger.info(f"header list内容: {header_list}")
    logger.info(f"table list内容: {table_list}")
    base_file = template_path
    clear_save_path = base_file
    doc_main = aw.Document(clear_save_path)
    paragraphs = doc_main.get_child_nodes(aw.NodeType.PARAGRAPH, True)
    for para in paragraphs:
        para = para.as_paragraph()
        para_content = para.to_string(aw.SaveFormat.TEXT)
        para_content = para_content.replace("\r", "")
        para_content = (
            para_content.strip()
        )  # 特殊地方，发现目录中有这个符号，暂时不知道符号是干啥的
        if (
            para_content in header_list or para_content.capitalize() in header_list
        ):  # 如果当前段落中有写作内容，那么找到内容，找到生成的结果
            table_header = aw.Paragraph(doc_main)
            table_header.paragraph_format.style_identifier = aw.StyleIdentifier.NORMAL
            table_header.paragraph_format.alignment = aw.ParagraphAlignment.CENTER
            para = find_next_heading_after_target(paragraphs, para_content)
            run = aw.Run(doc_main, "")
            # 将文本设置为加粗
            run.font.bold = True
            # 将加粗的 Run 对象添加到表头段落中
            table_header.append_child(run)
            # 增加一步 现将当前para段落中的内容清空
            para.parent_node.insert_after(table_header, para)
            logger.info(f"当前段落内容：{para_content}")
            try:
                if para_content in ["APPENDICES", "REFERENCES"]:
                    para_content = para_content.capitalize()
                idx_num = header_list.index(para_content)
                # 获取header对应的table

                num_tables = len(table_list[idx_num])
                logger.info(f"当前表格长度:{num_tables}")
                for _index, info in enumerate(table_list[idx_num]):
                    table_id = info.get("id", "")
                    is_header_and_footer = info.get("is_header_and_footer", "")
                    logger.warning(f"table_id in table list:{table_id}")
                    aw.Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.rtf")
                    ).save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
                    # 判断是否需要页眉页脚 False为删除页眉页脚
                    if not is_header_and_footer:
                        pass
                    document = Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                    )
                    for section in document.sections:
                        section.footer.is_linked_to_previous = True
                        section.header.is_linked_to_previous = True

                    document.save(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                    )
                    doc_rtf = aw.Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                    )
                    builder = aw.DocumentBuilder(doc_rtf)
                    # 判断分页是否续表
                    builder.row_format.heading_format = True
                    doc_rtf.save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
                    _doc = aw.Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                    )

                    # table_list = []
                    # headers = []
                    # fooots = []
                    _tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
                    # for section in _doc.sections:
                    #     # 读取页眉
                    #     section = section.as_section()
                    #     if section.headers_footers.header_primary:
                    #         heard = section.headers_footers.header_primary.get_text()
                    #         print(f"页眉 (heard) 内容: {heard}")
                    #         headers.append(heard)
                    #
                    #     # 读取正文中的表格
                    #     _tables = section.body.get_child_nodes(aw.NodeType.TABLE, True)
                    #     for table in _tables:
                    #         table_list.append(table)
                    #         table = table.as_table()
                    #         for row in table.rows:
                    #             _row = ""
                    #             for cell in row.as_row().cells:
                    #                 if _row:
                    #                     _row = (
                    #                         _row
                    #                         + "\t"
                    #                         + cell.as_cell().get_text().strip()
                    #                     )
                    #                 else:
                    #                     _row = cell.as_cell().get_text().strip()
                    #             # table_list.append(row)
                    #     if section.headers_footers.footer_primary:
                    #         fooot = section.headers_footers.footer_primary.get_text()
                    #         print(f"页脚(fooot) 内容: {fooot}")
                    #         fooots.append(fooot)
                    # 读取页脚
                    #
                    _tables = [t for t in _tables]
                    for table in _tables:
                        if table.get_ancestor(aw.NodeType.BODY):
                            i = _tables.index(table)
                            temp = _tables[i]
                            _tables[i] = _tables[i - 1]
                            _tables[i - 1] = temp
                    # 删除表格第一行内容
                    logger.info(f"插入表格: {table_id}")
                    for (
                        table
                    ) in _tables:  # 将table 信息反向的插入到word文件中。TODO 表格美化

                        table_clone = table.as_table().clone(True)
                        imported_table = doc_main.import_node(table_clone, True)
                        logger.info(
                            f"imported table 节点类型: {imported_table.node_type}"
                        )

                        if imported_table.node_type == aw.NodeType.TABLE:
                            logger.info(f"imported table 节点是表格")
                            imported_table = imported_table.as_table()
                            imported_table.preferred_width = (
                                aw.tables.PreferredWidth.from_percent(100)
                            )
                            # if fooots and headers:
                            #     if imported_table.first_row:
                            #         # 克隆第一行
                            #         cloned_row = imported_table.first_row.clone(True)
                            #         row = cloned_row.as_row()
                            #         # 修改克隆的第一行的内容
                            #         for cell in row.cells:
                            #             cell = cell.as_cell()
                            #             if cell.paragraphs:  # 确保单元格中有段落
                            #                 first_paragraph = cell.paragraphs[
                            #                     0
                            #                 ]  # 获取第一个段落
                            #                 first_paragraph.remove_all_children()  # 清除段落中的所有子节点
                            #
                            #                 # 创建新的 Run 并设置内容
                            #                 new_run = aw.Run(doc_main)
                            #                 new_run.text = headers[0]  # 修改内容
                            #
                            #                 # 将新的 Run 添加到段落中
                            #                 first_paragraph.append_child(new_run)
                            #
                            #             # 将克隆的第一行插入到表格的开头
                            #         imported_table.insert_before(
                            #             cloned_row, imported_table.first_row
                            #         )
                            #         # 将新行插入到表格的第一行之前
                            #         # 添加与其他行相同数量的单元格
                            #     if imported_table.last_row:
                            #         # 克隆最后一行
                            #         cloned_row = imported_table.first_row.clone(True)
                            #         row = cloned_row.as_row()
                            #         # 修改克隆行的内容
                            #         for cell in row.cells:
                            #             cell = cell.as_cell()  # 确保是单元格对象
                            #             if cell.paragraphs:  # 确保单元格中有段落
                            #                 first_paragraph = cell.paragraphs[
                            #                     0
                            #                 ]  # 获取第一个段落
                            #                 first_paragraph.remove_all_children()  # 清除段落中的所有子节点
                            #
                            #                 # 创建新的 Run 并设置内容
                            #                 new_run = aw.Run(doc_main)
                            #                 new_run.text = fooots[0]  # 设置新内容
                            #
                            #                 # 将新的 Run 添加到段落中
                            #                 first_paragraph.append_child(new_run)
                            #
                            #         # 将克隆的最后一行插入到表格的末尾
                            #         imported_table.insert_after(
                            #             cloned_row, imported_table.last_row
                            #         )

                            for index, row in enumerate(imported_table.rows):
                                row = row.as_row()
                                # print(index, row.get_text().strip())
                                for cell_index, cell in enumerate(row.cells):
                                    cell = cell.as_cell()
                                    cell.cell_format.vertical_alignment = (
                                        aw.tables.CellVerticalAlignment.BOTTOM
                                    )
                                    for paragraph in cell.paragraphs:
                                        paragraph = paragraph.as_paragraph()
                                        # 居中对齐
                                        for run in paragraph.runs:
                                            run = run.as_run()
                                            run.font.name = (
                                                "Courier New"  # 设置西文是新罗马字体
                                            )
                                            run.font.name_far_east = "宋体"
                                            run.font.size = 8
                            print("一个表格一件结束")
                            # 在插入段落标题之后插入段落内容
                            # insert_table_after_paragraph(doc_main, para, imported_table)

                            # para.parent_node.insert_before(imported_table, para)
                            table_header.parent_node.insert_after(
                                imported_table, table_header
                            )
                            if _index < num_tables - 1:
                                table_newline = aw.Paragraph(doc_main)
                                run = aw.Run(doc_main, "")
                                table_newline.append_child(run)
                                imported_table.parent_node.insert_before(
                                    table_newline, imported_table
                                )
                else:
                    logger.info(f"当前段落不需要插入表格内容")
                # 插入result 模型输出结果 结果只插入一次，table插入完成后插入
                # TODO 段落美化

            except:
                logger.warning(f"没有找到header{traceback.format_exc()}")
                print("没有找到header", traceback.format_exc())
    doc_main.save(save_path)。

为什么插入的表格没有续表，该续表的没有续表不该续表进行表头插入了？

Tiaohh · April 25, 2024, 2:41am

为什么如果读取文件的页眉页脚数据就没有进行续表呢

Tiaohh · April 25, 2024, 6:27am

可以帮我看一下续表功能吗，如果我插入2个表格信息，第二个表格续表的标题怎么变成了第一个表格内容

vyacheslav.deryushev · April 25, 2024, 6:41am

@Tiaohh 我不太明白你的意思。可能是我这边的翻译有问题。您需要合并两个表格，还是将其拆分？也许您可以提供一些图片来说明表格的问题。您在论坛上写了好几个主题，最后一个可以继续讨论的主题是什么？

Tiaohh · April 25, 2024, 6:59am

目前我的文档需要插入多个rtf表格信息，并且需要续表。如果续表第二个表格续表的是第一个表格的标题

vyacheslav.deryushev · April 25, 2024, 7:25am

@Tiaohh 因此，您需要连接具有相同标题的表？是否正确？

如果是，可以尝试使用 "is_last_row"来检测第一个表的最后一行，然后继续插入表数据。

Tiaohh · April 25, 2024, 7:47am

怎么设置呢？？？？？？？？？？？？

Tiaohh · April 25, 2024, 9:17am

async def generate_docx_with_result_laikai(
    header_list: list,
    table_list: list,
    save_path,
    template_path,
):
    logger.info(f"header list内容: {header_list}")
    logger.info(f"table list内容: {table_list}")
    base_file = template_path
    clear_save_path = base_file
    doc_main = aw.Document(clear_save_path)
    paragraphs = doc_main.get_child_nodes(aw.NodeType.PARAGRAPH, True)
    for para in paragraphs:
        para = para.as_paragraph()
        para_content = para.to_string(aw.SaveFormat.TEXT)
        para_content = para_content.replace("\r", "")
        para_content = (
            para_content.strip()
        )  # 特殊地方，发现目录中有这个符号，暂时不知道符号是干啥的
        if (
            para_content in header_list or para_content.capitalize() in header_list
        ):  # 如果当前段落中有写作内容，那么找到内容，找到生成的结果
            table_header = aw.Paragraph(doc_main)
            table_header.paragraph_format.style_identifier = aw.StyleIdentifier.NORMAL
            table_header.paragraph_format.alignment = aw.ParagraphAlignment.CENTER
            para, is_last = find_next_heading_after_target(paragraphs, para_content)
            run = aw.Run(doc_main, "")
            # 将文本设置为加粗
            run.font.bold = True
            # 将加粗的 Run 对象添加到表头段落中
            table_header.append_child(run)
            # 增加一步 现将当前para段落中的内容清空
            # para.parent_node.insert_after(table_header, para)
            try:
                if para_content in ["APPENDICES", "REFERENCES"]:
                    para_content = para_content.capitalize()
                idx_num = header_list.index(para_content)
                # 获取header对应的table

                num_tables = len(table_list[idx_num])
                logger.info(f"当前表格长度:{num_tables}")
                for _index, info in enumerate(table_list[idx_num]):
                    table_id = info.get("id", "")
                    is_header_and_footer = info.get("is_header_and_footer", "")
                    logger.warning(f"table_id in table list:{table_id}")
                    aw.Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.rtf")
                    ).save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
                    # 判断是否需要页眉页脚 False为删除页眉页脚
                    if not is_header_and_footer:
                        document = Document(
                            os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                        )
                        for section in document.sections:
                            section.footer.is_linked_to_previous = True
                            section.header.is_linked_to_previous = True

                        document.save(
                            os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                        )

                    doc_rtf = aw.Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                    )
                    builder = aw.DocumentBuilder(doc_rtf)
                    # 判断分页是否续表
                    builder.row_format.heading_format = False
                    # builder.row_format.heading_format = True
                    doc_rtf.save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
                    _doc = aw.Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                    )

                    # table_list = []
                    # headers = []
                    # fooots = []
                    _tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
                    # for section in _doc.sections:
                    #     # 读取页眉
                    #     section = section.as_section()
                    #     if section.headers_footers.header_primary:
                    #         heard = section.headers_footers.header_primary.get_text()
                    #         print(f"页眉 (heard) 内容: {heard}")
                    #         headers.append(heard)
                    #
                    #     # 读取正文中的表格
                    #     _tables = section.body.get_child_nodes(aw.NodeType.TABLE, True)
                    #     for table in _tables:
                    #         table_list.append(table)
                    #         table = table.as_table()
                    #         for row in table.rows:
                    #             _row = ""
                    #             for cell in row.as_row().cells:
                    #                 if _row:
                    #                     _row = (
                    #                         _row
                    #                         + "\t"
                    #                         + cell.as_cell().get_text().strip()
                    #                     )
                    #                 else:
                    #                     _row = cell.as_cell().get_text().strip()
                    #             # table_list.append(row)
                    #     if section.headers_footers.footer_primary:
                    #         fooot = section.headers_footers.footer_primary.get_text()
                    #         print(f"页脚(fooot) 内容: {fooot}")
                    #         fooots.append(fooot)
                    # 读取页脚
                    #
                    _tables = [t for t in _tables]
                    for table in _tables[::-1]:
                        if table.get_ancestor(aw.NodeType.BODY):
                            i = _tables.index(table)
                            temp = _tables[i]
                            _tables[i] = _tables[i - 1]
                            _tables[i - 1] = temp
                    # 删除表格第一行内容
                    logger.info(f"插入表格: {table_id}")
                    for (
                        table
                    ) in _tables:  # 将table 信息反向的插入到word文件中。TODO 表格美化

                        table_clone = table.as_table().clone(True)
                        imported_table = doc_main.import_node(table_clone, True)
                        logger.info(
                            f"imported table 节点类型: {imported_table.node_type}"
                        )

                        if imported_table.node_type == aw.NodeType.TABLE:
                            logger.info(f"imported table 节点是表格")
                            imported_table = imported_table.as_table()
                            imported_table.preferred_width = (
                                aw.tables.PreferredWidth.from_percent(100)
                            )
                            # if fooots and headers:
                            #     if imported_table.first_row:
                            #         # 克隆第一行
                            #         cloned_row = imported_table.first_row.clone(True)
                            #         row = cloned_row.as_row()
                            #         # 修改克隆的第一行的内容
                            #         for cell in row.cells:
                            #             cell = cell.as_cell()
                            #             if cell.paragraphs:  # 确保单元格中有段落
                            #                 first_paragraph = cell.paragraphs[
                            #                     0
                            #                 ]  # 获取第一个段落
                            #                 first_paragraph.remove_all_children()  # 清除段落中的所有子节点
                            #
                            #                 # 创建新的 Run 并设置内容
                            #                 new_run = aw.Run(doc_main)
                            #                 new_run.text = headers[0]  # 修改内容
                            #
                            #                 # 将新的 Run 添加到段落中
                            #                 first_paragraph.append_child(new_run)
                            #
                            #             # 将克隆的第一行插入到表格的开头
                            #         imported_table.insert_before(
                            #             cloned_row, imported_table.first_row
                            #         )
                            #         # 将新行插入到表格的第一行之前
                            #         # 添加与其他行相同数量的单元格
                            #     if imported_table.last_row:
                            #         # 克隆最后一行
                            #         cloned_row = imported_table.first_row.clone(True)
                            #         row = cloned_row.as_row()
                            #         # 修改克隆行的内容
                            #         for cell in row.cells:
                            #             cell = cell.as_cell()  # 确保是单元格对象
                            #             if cell.paragraphs:  # 确保单元格中有段落
                            #                 first_paragraph = cell.paragraphs[
                            #                     0
                            #                 ]  # 获取第一个段落
                            #                 first_paragraph.remove_all_children()  # 清除段落中的所有子节点
                            #
                            #                 # 创建新的 Run 并设置内容
                            #                 new_run = aw.Run(doc_main)
                            #                 new_run.text = fooots[0]  # 设置新内容
                            #
                            #                 # 将新的 Run 添加到段落中
                            #                 first_paragraph.append_child(new_run)
                            #
                            #         # 将克隆的最后一行插入到表格的末尾
                            #         imported_table.insert_after(
                            #             cloned_row, imported_table.last_row
                            #         )

                            for index, row in enumerate(imported_table.rows):
                                row = row.as_row()
                                # print(index, row.get_text().strip())
                                for cell_index, cell in enumerate(row.cells):
                                    cell = cell.as_cell()
                                    cell.cell_format.vertical_alignment = (
                                        aw.tables.CellVerticalAlignment.BOTTOM
                                    )
                                    for paragraph in cell.paragraphs:
                                        paragraph = paragraph.as_paragraph()
                                        # 居中对齐
                                        for run in paragraph.runs:
                                            run = run.as_run()
                                            run.font.name = (
                                                "Courier New"  # 设置西文是新罗马字体
                                            )
                                            run.font.name_far_east = "宋体"
                                            run.font.size = 8
                            print("一个表格s数据")
                            # 在插入段落标题之后插入段落内容
                        if is_last:
                            para.parent_node.insert_after(imported_table, para)
                        else:
                            para.parent_node.insert_after(imported_table, para)
                        # table_newline = aw.Paragraph(doc_main)
                        # run = aw.Run(doc_main, "")
                        # table_newline.append_child(run)
                        # imported_table.parent_node.insert_after(
                        #     table_newline, imported_table
                        # )

                else:
                    logger.info(f"当前段落不需要插入表格内容")
                # 插入result 模型输出结果 结果只插入一次，table插入完成后插入
                # TODO 段落美化

            except:
                logger.warning(f"没有找到header{traceback.format_exc()}")
                print("没有找到header", traceback.format_exc())
    doc_main.save(save_path)
    return {"prompt_tokens": 1, "complete_tokens": 1, "total_tokens": 1}
def find_next_heading_after_target(paragraphs, target_text):
    """
    在文档中找到目标文本后，查找下一个标题段落。
    """
    list_ = []
    is_last = False
    found_target = False
    next_heading = None

    for para_node in paragraphs:
        para = para_node.as_paragraph()
        if para.paragraph_format.outline_level in [0, 1, 2, 3, 4, 5]:
            list_.append({para.get_text().strip(): para})
        # 如果找到目标文本，继续寻找下一个标题段落
        if target_text in para.get_text().strip():

            found_target = True

        # 如果已经找到目标段落，现在寻找下一个标题段落
        if found_target:
            # 检查段落是否为标题段落，通常根据样式或 outline_level 判断
            # 这里我们假设标题的 outline_level 在 0-5 之间
            if para.paragraph_format.outline_level in [0, 1, 2, 3, 4, 5]:
                # 避免找到自己，因此在找到目标段落后继续查找下一个
                if target_text not in para.get_text().strip():
                    next_heading = para
                    break

    if not next_heading:
        next_heading = paragraphs[-1]
        is_last = True
    first_dict = list_[0]  # 取出第一个字典
    first_key = next(iter(first_dict))  # 取出第一个键
    if first_key and first_key == target_text:
        is_last = True
    print("First key:", first_key)
    return next_heading, is_last

你好我写的这个不对怎么把表格插入段落内容下面呢我写的不对是需要插入标题段落后面

vyacheslav.deryushev · April 25, 2024, 9:43am

@Tiaohh 也许它更愿意在导入前连接表。您需要处理第二个表格，删除未使用的行，然后再连接表格。至于页眉和页脚，由于源文件中有相同的页眉/页脚，因此可以在不同的列表中收集主题，并从中使用主题。这里有一个实现方法，但你需要为你自己的 puprose 植入代码。

因此，方法是获取两个具有相同页眉的表格，将其连接起来，如果需要，则按照之前的代码更改样式。

document = aw.Document("t_ae_1(1).docx")
dst_doc = aw.Document("Document.docx")
paras = dst_doc.get_child_nodes(aw.NodeType.PARAGRAPH, True)
collection = document.get_child_nodes(aw.NodeType.TABLE, True)
is_next_table = False
combined_table = aw.tables.Table(dst_doc)

for table in collection:
    if table.parent_node.node_type == aw.NodeType.BODY and not is_next_table:
        combined_table = table.clone(True).as_table()
        is_next_table = True

    if table.parent_node.node_type == aw.NodeType.BODY and is_next_table:
        table = table.as_table()
        if table.first_row.get_text() == combined_table.first_row.get_text():
            table.rows[0].remove()
            table.rows[1].remove()
            table.last_row.remove()
            second_table = table.clone(True).as_table()

            while second_table.has_child_nodes:
                combined_table.rows.add(second_table.first_row)

cloned_table = combined_table.clone(True).as_table()
cloned_table.preferred_width = aw.tables.PreferredWidth.from_percent(100)
new_imported_table = dst_doc.import_node(cloned_table, True).as_table()
para = paras[3].as_paragraph()
para.parent_node.append_child(new_imported_table)

dst_doc.save('test.docx')

这样的代码可以是这样的，但你需要定义和删除你不需要的行。

test.docx (25.2 KB)

Tiaohh · April 25, 2024, 9:48am

好的谢谢上面的问题可以帮我看一下吗

vyacheslav.deryushev · April 25, 2024, 9:54am

@Tiaohh 你是说查看你最近写的帖子？

Tiaohh · April 25, 2024, 9:56am

你好我写的这个不对怎么把表格插入段落内容下面呢我写的不对是需要插入标题段落后面

Tiaohh · April 25, 2024, 9:56am

def find_last_related_paragraph(paragraphs, title_text):
    """
    找到标题后的最后一个相关段落。
    """
    title_paragraph = None

    # 找到目标标题
    for para in paragraphs:
        para = para.as_paragraph()
        print(f"当前段落: '{para.get_text().strip()}'，标题: '{title_text}'")
        if para.get_text().strip() == title_text:
            title_paragraph = para
            break

    if title_paragraph is None:
        raise ValueError(f"未找到标题: {title_text}")

    # 找到标题后的最后一个相关段落
    last_related_paragraph = title_paragraph

    # 检查是否存在下一个节点，且其 outline_level 大于标题
    while (
        last_related_paragraph.next_sibling
        and last_related_paragraph.next_sibling.as_paragraph().paragraph_format.outline_level
        > title_paragraph.paragraph_format.outline_level
    ):
        last_related_paragraph = last_related_paragraph.next_sibling

    return last_related_paragraph


async def generate_docx_with_result_laikai(
    header_list: list,
    table_list: list,
    save_path,
    template_path,
):
    logger.info(f"header list内容: {header_list}")
    logger.info(f"table list内容: {table_list}")
    base_file = template_path
    clear_save_path = base_file
    doc_main = aw.Document(clear_save_path)
    paragraphs = doc_main.get_child_nodes(aw.NodeType.PARAGRAPH, True)
    for para in paragraphs:

        para = para.as_paragraph()
        para_content = para.to_string(aw.SaveFormat.TEXT)
        para_content = para_content.replace("\r", "")
        para_content = (
            para_content.strip()
        )  # 特殊地方，发现目录中有这个符号，暂时不知道符号是干啥的
        print(para_content, header_list)
        if (
            para_content in header_list or para_content.capitalize() in header_list
        ):  # 如果当前段落中有写作内容，那么找到内容，找到生成的结果

            table_header = aw.Paragraph(doc_main)
            table_header.paragraph_format.style_identifier = aw.StyleIdentifier.NORMAL
            table_header.paragraph_format.alignment = aw.ParagraphAlignment.CENTER
            para = find_last_related_paragraph(paragraphs, para_content)
            run = aw.Run(doc_main, "")
            # 将文本设置为加粗
            run.font.bold = True
            # 将加粗的 Run 对象添加到表头段落中
            table_header.append_child(run)
            # 增加一步 现将当前para段落中的内容清空
            # para.parent_node.insert_after(table_header, para)
            try:
                if para_content in ["APPENDICES", "REFERENCES"]:
                    para_content = para_content.capitalize()
                idx_num = header_list.index(para_content)
                # 获取header对应的table

                num_tables = len(table_list[idx_num][::-1])
                logger.info(f"当前表格长度:{num_tables}")
                for _index, info in enumerate(table_list[idx_num]):
                    table_id = info.get("id", "")
                    is_header_and_footer = info.get("is_header_and_footer", "")
                    logger.warning(f"table_id in table list:{table_id}")
                    aw.Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.rtf")
                    ).save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
                    # 判断是否需要页眉页脚 False为删除页眉页脚
                    if not is_header_and_footer:
                        document = Document(
                            os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                        )
                        for section in document.sections:
                            section.footer.is_linked_to_previous = True
                            section.header.is_linked_to_previous = True

                        document.save(
                            os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                        )

                    doc_rtf = aw.Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                    )
                    builder = aw.DocumentBuilder(doc_rtf)
                    # 判断分页是否续表
                    builder.row_format.heading_format = False
                    # builder.row_format.heading_format = True
                    doc_rtf.save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
                    _doc = aw.Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                    )
                    _tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
                    _tables = [t for t in _tables]
                    for table in _tables[::-1]:
                        if table.get_ancestor(aw.NodeType.BODY):
                            i = _tables.index(table)
                            temp = _tables[i]
                            _tables[i] = _tables[i - 1]
                            _tables[i - 1] = temp
                    # 删除表格第一行内容
                    logger.info(f"插入表格: {table_id}")
                    for table in _tables[
                        ::-1
                    ]:  # 将table 信息反向的插入到word文件中。TODO 表格美化

                        table_clone = table.as_table().clone(True)
                        imported_table = doc_main.import_node(table_clone, True)
                        logger.info(
                            f"imported table 节点类型: {imported_table.node_type}"
                        )

                        if imported_table.node_type == aw.NodeType.TABLE:
                            logger.info(f"imported table 节点是表格")
                            imported_table = imported_table.as_table()
                            imported_table.preferred_width = (
                                aw.tables.PreferredWidth.from_percent(100)
                            )

                            for index, row in enumerate(imported_table.rows):
                                row = row.as_row()
                                # print(index, row.get_text().strip())
                                for cell_index, cell in enumerate(row.cells):
                                    cell = cell.as_cell()
                                    cell.cell_format.vertical_alignment = (
                                        aw.tables.CellVerticalAlignment.BOTTOM
                                    )
                                    for paragraph in cell.paragraphs:
                                        paragraph = paragraph.as_paragraph()
                                        # 居中对齐
                                        for run in paragraph.runs:
                                            run = run.as_run()
                                            run.font.name = (
                                                "Courier New"  # 设置西文是新罗马字体
                                            )
                                            run.font.name_far_east = "宋体"
                                            run.font.size = 8
                            print("一个表格s数据")
                            # 在插入段落标题之后插入段落内容

                        para.parent_node.insert_after(imported_table, para)

                        # table_newline = aw.Paragraph(doc_main)
                        # run = aw.Run(doc_main, "")
                        # table_newline.append_child(run)
                        # imported_table.parent_node.insert_after(
                        #     table_newline, imported_table
                        # )

                else:
                    logger.info(f"当前段落不需要插入表格内容")
                # 插入result 模型输出结果 结果只插入一次，table插入完成后插入
                # TODO 段落美化

            except:
                logger.warning(f"没有找到header{traceback.format_exc()}")
                print("没有找到header", traceback.format_exc())
    doc_main.save(save_path)
    return {"prompt_tokens": 1, "complete_tokens": 1, "total_tokens": 1}代码

vyacheslav.deryushev · April 25, 2024, 10:54am

@Tiaohh 您在 table_list 中存储了哪些数据？

Tiaohh · April 25, 2024, 11:04am

存储的文件id信息。然后读取文件

Tiaohh · April 25, 2024, 11:05am

  )
                builder = aw.DocumentBuilder(doc_rtf)
                # 判断分页是否续表
                builder.row_format.heading_format = False
                # builder.row_format.heading_format = True
                doc_rtf.save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
                _doc = aw.Document(
                    os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                )
                _tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
                _tables = [t for t in _tables]
                for table in _tables[::-1]:
                    if table.get_ancestor(aw.NodeType.BODY):
                        i = _tables.index(table)
                        temp = _tables[i]
                        _tables[i] = _tables[i - 1]
                        _tables[i - 1] = temp
                # 删除表格第一行内容
                logger.info(f"插入表格: {table_id}")
                for table in _tables[
                    ::-1
                ]:  # 将table 信息反向的插入到word文件中。TODO 表格美化

                    table_clone = table.as_table().clone(True)
                    imported_table = doc_main.import_node(table_clone, True)
                    logger.info(
                        f"imported table 节点类型: {imported_table.node_type}"
                    )

for table in _tables[::-1]:
if table.get_ancestor(aw.NodeType.BODY):
i = _tables.index(table)
temp = _tables[i]
_tables[i] = _tables[i - 1]
_tables[i - 1] = temp怎么把这个内容合成一个表格信息呢。，页眉正文表格页脚。合并成一个表格信息呢

vyacheslav.deryushev · April 25, 2024, 12:03pm

@Tiaohh 您可以复制现有行、更新数据，并使用 "insert_before "或 "append_child "在顶部或底部插入数据。

row_to_insert = new_imported_table.first_row.clone(True).as_row()
row_to_insert.cells[0].first_paragraph.append_child(aw.Run(dst_doc, "This is new run!"))
new_imported_table.insert_before(row_to_insert, new_imported_table.first_row)

vyacheslav.deryushev · April 25, 2024, 3:09pm

@Tiaohh 我稍微修改了一下你的代码，就能得到你想要的结果:

@staticmethod
def find_last_related_paragraph(para):
    last_related_paragraph = para
    while (
            last_related_paragraph.next_sibling
            and last_related_paragraph.next_sibling.as_paragraph().paragraph_format.outline_level
            > para.paragraph_format.outline_level
    ):
        last_related_paragraph = last_related_paragraph.next_sibling

    return last_related_paragraph

para = self.find_last_related_paragraph(para)