怎么在指定标题下面添加新的标题？

Tiaohh · April 23, 2024, 6:15am

目前存在这一个问题，其他问题修改好了问题出现在如果我插入平级的标题，插入在研究の実施体制之后一个我是哈哈哈哈哈标题，如果研究の実施体制有子标题，这样顺序都变了。原来的段落格式不改变

Tiaohh · April 23, 2024, 6:20am

还有一个问题目录序号为什么没有重新排序呀 import aspose.words as aw

    lic = aw.License()
    lic_path = os.path.join(settings.BASE_PATH, "core/Aspose.Total.Product.Family.lic")
    lic.set_license(lic_path)
    # 加载现有文档
    doc = aw.Document("/Users/dip/Desktop/111.docx")
    deleting = False
    # 初始化文档构建器
    builder = aw.DocumentBuilder(doc)
    numbering_pattern = r"^\s*[\d\.]+\s*"  # 匹配以数字和点开头的部分
    # 处理删除操作
    deleted_ids = ["研究分担者"]  # 需要删除的标题列表
    for para in doc.get_child_nodes(aw.NodeType.PARAGRAPH, True):
        text_without_numbering = re.sub(numbering_pattern, "", para.get_text()).strip()
        is_title = para.as_paragraph().paragraph_format.outline_level in [
            0,
            1,
            2,
            3,
            4,
            5,
        ]

        if deleting:
            # 如果在删除状态下，遇到下一个标题则停止删除
            if is_title:
                deleting = False  # 退出删除模式
            else:
                para.remove()  # 删除当前段落

        # 如果找到要删除的标题，开始删除状态
        if is_title and text_without_numbering in deleted_ids:
            para.remove()  # 删除该标题
            deleting = True  # 开始删除模式

    # 处理添加操作
    added_titles_with_parents = [
        {
            "parent_id": 0,
            "parent": "研究の実施体制",
            "added_title": "1.2 研究代wwwwww表者111",
            "is_child": True,
            "level": 2,
        },
        {
            "parent_id": 0,
            "parent": "研究の実施体制",
            "added_title": "2 我是哈哈哈哈哈",
            "is_child": False,
            "level": 1,
        },
    ]

    # 创建标题字典，方便查找父级标题
    title_dict = {
        re.sub(numbering_pattern, "", para.get_text()).strip(): para
        for para in doc.get_child_nodes(aw.NodeType.PARAGRAPH, True)
    }

    # 添加新的标题
    for item in added_titles_with_parents:
        parent_title = item["parent"]
        added_title = item["added_title"]
        level = item["level"]
        if parent_title in title_dict:
            parent_para = title_dict[parent_title]
            parent_para = parent_para.as_paragraph()
            # 将光标移动到父级标题之后
            builder.move_to(parent_para)
            # 插入一个新的段落
            new_paragraph = builder.insert_paragraph().as_paragraph()
            # 从父级标题中获取样式
            parent_style = parent_para.paragraph_format  # 父级段落的样式
            parent_font = parent_para.runs[0].font  # 获取字体

            new_paragraph.paragraph_format.style = doc.styles.get_by_style_identifier(
                getattr(aw.StyleIdentifier, f"HEADING{level}")
            )
            # 添加新的文本
            new_run = aw.Run(doc)
            new_run.font.name = parent_font.name
            new_run.font.size = parent_font.size
            new_run.text = added_title
            new_paragraph.append_child(new_run)

    # # 更新文档中的所有域，这包括目录、交叉引用、页码等

    # 处理修改操作
    modifications = [{"original": "研究代表者", "modified": "1.1 研究代表者111"}]

    # 修改标题
    for mod in modifications:
        original = mod["original"]
        modified = mod["modified"]

        if original in title_dict:
            para = title_dict[original]
            para = para.as_paragraph()

            # 清除原有文本但保持段落结构
            para.runs.clear()  # 清除段落中的所有 `Run`

            # 使用 `insert_after()` 添加新文本
            new_run = aw.Run(doc, modified)  # 创建新的 `Run`
            para.insert_after(new_run, para.first_child)  # 插入到第一个子节点之后
    # 可选：更新自动编号的列表
    # 保存修改后的文档
    doc.update_fields()
    doc.update_table_layout()  # 可选：在目录有图形内容时可使用
    doc.update_list_labels()  # 可选：更新自动编号的列表
    doc.save("modified_document.docx")

Tiaohh · April 23, 2024, 6:53am

以上问题已经解决最后剩下的问题是怎么对文档目录序号进行排序。如果我插入2个序号为2的需要进行重新排序。

 import aspose.words as aw

    lic = aw.License()
    lic_path = os.path.join(settings.BASE_PATH, "core/Aspose.Total.Product.Family.lic")
    lic.set_license(lic_path)
    # 加载现有文档
    doc = aw.Document("/Users/dip/Desktop/111.docx")
    deleting = False
    # 初始化文档构建器
    builder = aw.DocumentBuilder(doc)
    numbering_pattern = r"^\s*[\d\.]+\s*"  # 匹配以数字和点开头的部分
    # 处理删除操作
    deleted_ids = ["研究分担者"]  # 需要删除的标题列表
    for para in doc.get_child_nodes(aw.NodeType.PARAGRAPH, True):
        text_without_numbering = re.sub(numbering_pattern, "", para.get_text()).strip()
        is_title = para.as_paragraph().paragraph_format.outline_level in [
            0,
            1,
            2,
            3,
            4,
            5,
        ]

        if deleting:
            # 如果在删除状态下，遇到下一个标题则停止删除
            if is_title:
                deleting = False  # 退出删除模式
            else:
                para.remove()  # 删除当前段落

        # 如果找到要删除的标题，开始删除状态
        if is_title and text_without_numbering in deleted_ids:
            para.remove()  # 删除该标题
            deleting = True  # 开始删除模式

    # 处理添加操作
    added_titles_with_parents = [
        {
            "parent_id": 0,
            "parent": "研究の実施体制",
            "added_title": "1.2 研究代wwwwww表者111",
            "is_child": True,
            "level": 2,
        },
        {
            "parent_id": 0,
            "parent": "研究の実施体制",
            "added_title": "2 我是哈哈哈哈哈",
            "is_child": False,
            "level": 1,
        },
    ]

    # 创建标题字典，方便查找父级标题
    title_dict = {
        re.sub(numbering_pattern, "", para.get_text()).strip(): para
        for para in doc.get_child_nodes(aw.NodeType.PARAGRAPH, True)
    }

    # 添加新的标题
    for item in added_titles_with_parents:
        parent_title = item["parent"]
        added_title = item["added_title"]
        level = item["level"]

        if parent_title in title_dict:
            parent_para = title_dict[parent_title]
            parent_para = parent_para.as_paragraph()
            builder.move_to(parent_para)  # 将光标移动到父级标题

            if item["is_child"]:
                # 在父级标题后直接插入
                new_paragraph = builder.insert_paragraph()
            else:
                # 找到父级标题的最后一个非标题段落
                current_para = parent_para
                # 遍历直到最后一个与父级标题相关的段落
                while (
                    current_para.next_sibling
                    and current_para.next_sibling.as_paragraph().paragraph_format.outline_level
                    > parent_para.as_paragraph().paragraph_format.outline_level
                ):
                    current_para = current_para.next_sibling

                # 移动到最后一个相关段落
                builder.move_to(current_para)
                new_paragraph = builder.insert_paragraph()

                # 设置新标题的样式和文本
            new_paragraph.paragraph_format.style = doc.styles.get_by_style_identifier(
                getattr(aw.StyleIdentifier, f"HEADING{level}")
            )

            # 设置字体和文本
            new_run = aw.Run(doc)
            parent_font = parent_para.runs[0].font  # 从父级标题获取字体
            new_run.font.name = parent_font.name
            new_run.font.size = parent_font.size
            new_run.text = added_title
            new_paragraph.append_child(new_run)

    # # 更新文档中的所有域，这包括目录、交叉引用、页码等

    # 处理修改操作
    modifications = [{"original": "研究代表者", "modified": "1.1 研究代表者111"}]

    # 修改标题
    for mod in modifications:
        original = mod["original"]
        modified = mod["modified"]

        if original in title_dict:
            para = title_dict[original]
            para = para.as_paragraph()

            # 清除原有文本但保持段落结构
            para.runs.clear()  # 清除段落中的所有 `Run`

            # 使用 `insert_after()` 添加新文本
            new_run = aw.Run(doc, modified)  # 创建新的 `Run`
            para.insert_after(new_run, para.first_child)  # 插入到第一个子节点之后

    doc.save("modified_document.docx")

vyacheslav.deryushev · April 23, 2024, 9:35am

@Tiaohh 在这种情况下，最好的选择是使用列表。我修改了您的文档，并添加了一些列表作为示例。

111_modified.docx (51.4 KB)

另外，请这样更新您的代码：

if item["is_child"]:
    # 在父级标题后直接插入
    new_paragraph = builder.insert_paragraph()
    builder.list_format.list_indent()
else:
    # 找到父级标题的最后一个非标题段落
    current_para = parent_para
    # 遍历直到最后一个与父级标题相关的段落
    while (
            current_para.next_sibling
            and current_para.next_sibling.as_paragraph().paragraph_format.outline_level
            > parent_para.as_paragraph().paragraph_format.outline_level
    ):
        current_para = current_para.next_sibling

    # 移动到最后一个相关段落
    builder.move_to(current_para)
    new_paragraph = builder.insert_paragraph()
    builder.list_format.list = parent_para.as_paragraph().list_format.list

列表会自动更新编号。

如果需要使用 "DocumentBuilder "插入其他文本，请通过以下方式重置列表格式：

builder.list_format.list = None

Tiaohh · April 24, 2024, 7:02am

你好帮我看一下我的代码报错没有 table_header.parent_node.insert_after(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: ‘NoneType’ object has no attribute ‘insert_after’
需求是我只需要在当前标题下面的段落内容之后插入表格信息
async def generate_docx_with_result_laikai(
header_list: list,
table_list: list,
save_path,
template_path,
):

logger.info(f"header list内容: {header_list}")
logger.info(f"table list内容: {table_list}")
base_file = template_path
clear_save_path = base_file
doc_main = aw.Document(clear_save_path)
paragraphs = doc_main.get_child_nodes(aw.NodeType.PARAGRAPH, True)
for para in paragraphs:
    para = para.as_paragraph()
    para_content = para.to_string(aw.SaveFormat.TEXT)
    para_content = para_content.replace("\r", "")
    para_content = (
        para_content.strip()
    )  # 特殊地方，发现目录中有这个符号，暂时不知道符号是干啥的
    if (
        para_content in header_list or para_content.capitalize() in header_list
    ):  # 如果当前段落中有写作内容，那么找到内容，找到生成的结果
        print(para_content)
        table_header = aw.Paragraph(doc_main)
        table_header.paragraph_format.style_identifier = aw.StyleIdentifier.NORMAL
        table_header.paragraph_format.alignment = aw.ParagraphAlignment.CENTER
        try:
            if para_content in ["APPENDICES", "REFERENCES"]:
                para_content = para_content.capitalize()
            idx_num = header_list.index(para_content)
            # 获取header对应的table
            num_tables = len(table_list[idx_num])
            logger.info(f"当前表格长度:{num_tables}")
            for _index, info in enumerate(table_list[idx_num]):
                table_id = info.get("id", "")
                is_header_and_footer = info.get("is_header_and_footer", "")
                logger.warning(f"table_id in table list:{table_id}")
                aw.Document(
                    os.path.join(settings.UPLOAD_PATH, f"{table_id}.rtf")
                ).save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
                # 判断是否需要页眉页脚 False为删除页眉页脚
                if not is_header_and_footer:
                    document = Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                    )
                    for section in document.sections:
                        section.footer.is_linked_to_previous = True
                        section.header.is_linked_to_previous = True
                    document.save(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                    )
                # doc_rtf = aw.Document(
                #     os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                # )
                # builder = aw.DocumentBuilder(doc_rtf)
                # 判断分页是否续表
                # builder.row_format.heading_format = False
                # doc_rtf.save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
                _doc = aw.Document(
                    os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                )
                _tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
                _tables = [t for t in _tables]
                # 删除表格第一行内容
                logger.info(f"插入表格: {table_id}")
                for (
                    table
                ) in _tables:  # 将table 信息反向的插入到word文件中。TODO 表格美化
                    table_clone = table.as_table().clone(True)
                    imported_table = doc_main.import_node(table_clone, True)
                    logger.info(
                        f"imported table 节点类型: {imported_table.node_type}"
                    )
                    if imported_table.node_type == aw.NodeType.TABLE:
                        logger.info(f"imported table 节点是表格")
                        imported_table = imported_table.as_table()
                        imported_table.preferred_width = (
                            aw.tables.PreferredWidth.from_percent(100)
                        )
                        imported_table.first_row.remove()
                        for index, row in enumerate(imported_table.rows):
                            row = row.as_row()
                            for cell_index, cell in enumerate(row.cells):
                                cell = cell.as_cell()
                                cell.cell_format.vertical_alignment = (
                                    aw.tables.CellVerticalAlignment.BOTTOM
                                )
                                for paragraph in cell.paragraphs:
                                    paragraph = paragraph.as_paragraph()
                                    # 居中对齐
                                    for run in paragraph.runs:
                                        run = run.as_run()
                                        run.font.name = "Times New Roman"  # 设置西文是新罗马字体
                                        run.font.name_far_east = "宋体"
                                        run.font.size = 8
                        # 在插入段落标题之后插入段落内容
                        table_header.parent_node.insert_after(
                            imported_table, table_header
                        )
                        if _index < num_tables - 1:
                            table_newline = aw.Paragraph(doc_main)
                            run = aw.Run(doc_main, "")
                            table_newline.append_child(run)
                            imported_table.parent_node.insert_before(
                                table_newline, imported_table
                            )
            else:
                logger.info(f"当前段落不需要插入表格内容")
            # 插入result 模型输出结果 结果只插入一次，table插入完成后插入
            # TODO 段落美化

        except:
            logger.warning(f"没有找到header{traceback.format_exc()}")
            print("没有找到header", traceback.format_exc())
doc_main.save(save_path)

Tiaohh · April 24, 2024, 7:18am

表格文件存在表格的标题怎么把文件信息插入到目标文件里吗

vyacheslav.deryushev · April 24, 2024, 8:23am

@Tiaohh 我需要简化您的代码并检查问题所在。您能提供任何文件在这里进行测试吗？

Tiaohh · April 24, 2024, 8:27am

问题已解决目前还有一个小问题

os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                    )
                    builder = aw.DocumentBuilder(doc_rtf)
                    # 判断分页是否续表
                    builder.row_format.heading_format = True
                    doc_rtf.save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
                    _doc = aw.Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                    )

                    _tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
                    _tables = [t for t in _tables]
                    # 删除表格第一行内容
                    logger.info(f"插入表格: {table_id}")
                    for table in _tables[
                        ::-1
                    ]:  # 将table 信息反向的插入到word文件中。TODO 表格美化
                        table_clone = table.as_table().clone(True)
                        imported_table = doc_main.import_node(table_clone, True)
                        logger.info(
                            f"imported table 节点类型: {imported_table.node_type}"
                        )
                        if imported_table.node_type == aw.NodeType.TABLE:

                            logger.info(f"imported table 节点是表格")
                            imported_table = imported_table.as_table()
                            imported_table.preferred_width = (
                                aw.tables.PreferredWidth.from_percent(100)
                            )
                            for index, row in enumerate(imported_table.rows):
                                row = row.as_row()
                                if "Source:" in row.get_text():
                                  

                                for cell_index, cell in enumerate(row.cells):
                                    cell = cell.as_cell()
                                    cell.cell_format.vertical_alignment = (
                                        aw.tables.CellVerticalAlignment.BOTTOM
                                    )
                                    for paragraph in cell.paragraphs:
                                        paragraph = paragraph.as_paragraph()
                                        # 居中对齐
                                        for run in paragraph.runs:
                                            run = run.as_run()
                                            run.font.name = (
                                                "Courier New"  # 设置西文是新罗马字体
                                            )

for index, row in enumerate(imported_table.rows):
row = row.as_row()
if “Source:” in row.get_text(): 怎么删除这一行数据呢

vyacheslav.deryushev · April 24, 2024, 8:31am

@Tiaohh 使用 row.remove()

Tiaohh · April 24, 2024, 8:32am

好的目前还有一个问题续表问题下面是我上传的rtf文件
t_ae_1(1).docx (20.9 KB)

Tiaohh · April 24, 2024, 8:32am

问题是如果我写到另一个文件里吗。没有分页，但是还是续表了

Tiaohh · April 24, 2024, 8:34am

111.docx (19.4 KB)

出现问题的文件我已经设置为True了 builder.row_format.heading_format = True

vyacheslav.deryushev · April 24, 2024, 8:42am

@Tiaohh 好的，我需要检查一下。

Tiaohh · April 24, 2024, 8:43am

并且写入进去怎么多了一个页脚呢在页眉上面

Tiaohh · April 24, 2024, 8:49am

async def generate_docx_with_result_laikai(
    header_list: list,
    table_list: list,
    save_path,
    template_path,
):

    logger.info(f"header list内容: {header_list}")
    logger.info(f"table list内容: {table_list}")
    base_file = template_path
    clear_save_path = base_file
    doc_main = aw.Document(clear_save_path)
    paragraphs = doc_main.get_child_nodes(aw.NodeType.PARAGRAPH, True)
    for para in paragraphs:
        para = para.as_paragraph()
        para_content = para.to_string(aw.SaveFormat.TEXT)
        para_content = para_content.replace("\r", "")
        para_content = (
            para_content.strip()
        )  # 特殊地方，发现目录中有这个符号，暂时不知道符号是干啥的
        if (
            para_content in header_list or para_content.capitalize() in header_list
        ):  # 如果当前段落中有写作内容，那么找到内容，找到生成的结果
            table_header = aw.Paragraph(doc_main)
            table_header.paragraph_format.style_identifier = aw.StyleIdentifier.NORMAL
            table_header.paragraph_format.alignment = aw.ParagraphAlignment.CENTER
            try:
                if para_content in ["APPENDICES", "REFERENCES"]:
                    para_content = para_content.capitalize()
                idx_num = header_list.index(para_content)
                # 获取header对应的table
                num_tables = len(table_list[idx_num])
                logger.info(f"当前表格长度:{num_tables}")
                for _index, info in enumerate(table_list[idx_num]):
                    table_id = info.get("id", "")
                    is_header_and_footer = info.get("is_header_and_footer", "")
                    logger.warning(f"table_id in table list:{table_id}")
                    aw.Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.rtf")
                    ).save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
                    # 判断是否需要页眉页脚 False为删除页眉页脚
                    if not is_header_and_footer:
                        document = Document(
                            os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                        )
                        for section in document.sections:
                            section.footer.is_linked_to_previous = True
                            section.header.is_linked_to_previous = True
                        document.save(
                            os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                        )
                    doc_rtf = aw.Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                    )
                    builder = aw.DocumentBuilder(doc_rtf)
                    # 判断分页是否续表
                    builder.row_format.heading_format = True
                    doc_rtf.save(os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx"))
                    _doc = aw.Document(
                        os.path.join(settings.UPLOAD_PATH, f"{table_id}.docx")
                    )

                    _tables = _doc.get_child_nodes(aw.NodeType.TABLE, True)
                    _tables = [t for t in _tables]
                    # 删除表格第一行内容
                    logger.info(f"插入表格: {table_id}")
                    for table in _tables[
                        ::-1
                    ]:  # 将table 信息反向的插入到word文件中。TODO 表格美化
                        table_clone = table.as_table().clone(True)
                        imported_table = doc_main.import_node(table_clone, True)
                        logger.info(
                            f"imported table 节点类型: {imported_table.node_type}"
                        )
                        if imported_table.node_type == aw.NodeType.TABLE:

                            logger.info(f"imported table 节点是表格")
                            imported_table = imported_table.as_table()
                            imported_table.preferred_width = (
                                aw.tables.PreferredWidth.from_percent(100)
                            )
                            for index, row in enumerate(imported_table.rows):
                                row = row.as_row()
                                # if (
                                #     "Source:" in row.get_text().strip()
                                #     or not row.get_text().strip()
                                # ):
                                #     row.remove()

                                for cell_index, cell in enumerate(row.cells):
                                    cell = cell.as_cell()
                                    cell.cell_format.vertical_alignment = (
                                        aw.tables.CellVerticalAlignment.BOTTOM
                                    )
                                    for paragraph in cell.paragraphs:
                                        paragraph = paragraph.as_paragraph()
                                        # 居中对齐
                                        for run in paragraph.runs:
                                            run = run.as_run()
                                            run.font.name = (
                                                "Courier New"  # 设置西文是新罗马字体
                                            )
                                            run.font.name_far_east = "宋体"
                                            run.font.size = 8
                            # 在插入段落标题之后插入段落内容
                            para.parent_node.insert_after(imported_table, para)
                            # table_header.parent_node.insert_after(
                            #     imported_table, table_header
                            # )
                            # if _index < num_tables - 1:
                            #     table_newline = aw.Paragraph(doc_main)
                            #     run = aw.Run(doc_main, "")
                            #     table_newline.append_child(run)
                            #     imported_table.parent_node.insert_before(
                            #         table_newline, imported_table
                            #     )
                else:
                    logger.info(f"当前段落不需要插入表格内容")
                # 插入result 模型输出结果 结果只插入一次，table插入完成后插入
                # TODO 段落美化

            except:
                logger.warning(f"没有找到header{traceback.format_exc()}")
                print("没有找到header", traceback.format_exc())
    doc_main.save(save_path)

完整代码。还需要把表格插入在标题里的段落后面，目前是插入在段落前面了

Tiaohh · April 24, 2024, 9:36am

你好有进展了吗？？？？？？？

vyacheslav.deryushev · April 24, 2024, 10:21am

@Tiaohh 我简化了代码，并在表前用页脚重现了问题。问题在于 Aspose.Words 的页眉页脚内容放置在表格之前，就像 DOM 模型中一样。因此，您需要以某种方式对"_tables "进行排序，以便在 HeaderFooter 对象之间移动表格。

Tiaohh · April 24, 2024, 10:26am

怎么排序呢？？？？，还有我怎么把表格插入到标题段落下面呢

Tiaohh · April 24, 2024, 10:27am

有修改后的代码吗？？？？？？？

Tiaohh · April 24, 2024, 10:36am

还在看吗？？？？？
？？？？？