Docx 怎么给具体文本添加批注信息呢

vyacheslav.deryushev · June 13, 2024, 8:43am

@hhh1111 我检查了您的文档，发现有很多相同的单词，因此您需要修改代码：

def add_comments_sae_drug_name(file_path, para_text_list):
    doc = aw.Document(file_path)
    # 查找文档中的特定文本并添加批注
    for item in para_text_list:
        for key, value in item.items():
            for k, v in value.items():
                is_comment_added = False
                opt = aw.replacing.FindReplaceOptions()
                opt.use_substitutions = True
                doc.range.replace(k, "$0", opt)
                comment = aw.Comment(doc, '', "", datetime.now())
                comment.set_text(v)
                # 查找文档中的特定文本并添加批注
                for find_para in doc.get_child_nodes(aw.NodeType.PARAGRAPH, True):
                    find_para = find_para.as_paragraph()
                    if key in find_para.get_text() and find_para.paragraph_format.is_heading:
                        find_para = find_para.next_sibling
                        while find_para is not None:
                            find_para = find_para.as_paragraph()
                            for run in find_para.runs:
                                run = run.as_run()
                                if run.text == k:
                                    # 创建批注范围
                                    comment_start = aw.CommentRangeStart(doc, comment.id)
                                    comment_end = aw.CommentRangeEnd(doc, comment.id)
                                    # 插入批注范围和批注
                                    paragraph = run.parent_paragraph
                                    paragraph.insert_before(comment_start, run)
                                    paragraph.insert_after(comment_end, run)
                                    paragraph.insert_after(comment, run)
                                    is_comment_added = True
                                    break

                            if is_comment_added:
                                break
                            find_para = find_para.next_sibling

    # 保存文档
    doc.save(file_path)
    return file_path

另外，由于有很多相同的词，你需要改变标题的样式。现在只是普通样式和粗体。没有足够的锚点来找到这些段落。例如，你可以将顶部段落名称的样式设置为 “标题 3”。

hhh1111 · June 13, 2024, 8:47am

可以通过加粗的样式来查找下面的第一个段落最后的句号吗

vyacheslav.deryushev · June 13, 2024, 8:51am

@hhh1111 这不太正确，因为它可以是具有相同粗体风格的同一单词。

hhh1111 · June 13, 2024, 8:53am

vyacheslav.deryushev:

def add_comments_sae_drug_name(file_path, para_text_list):
    doc = aw.Document(file_path)
    # 查找文档中的特定文本并添加批注
    for item in para_text_list:
        for key, value in item.items():
            for k, v in value.items():
                is_comment_added = False
                opt = aw.replacing.FindReplaceOptions()
                opt.use_substitutions = True
                doc.range.replace(k, "$0", opt)
                comment = aw.Comment(doc, '', "", datetime.now())
                comment.set_text(v)
                # 查找文档中的特定文本并添加批注
                for find_para in doc.get_child_nodes(aw.NodeType.PARAGRAPH, True):
                    find_para = find_para.as_paragraph()
                    if key in find_para.get_text() and find_para.paragraph_format.is_heading:
                        find_para = find_para.next_sibling
                        while find_para is not None:
                            find_para = find_para.as_paragraph()
                            for run in find_para.runs:
                                run = run.as_run()
                                if run.text == k:
                                    # 创建批注范围
                                    comment_start = aw.CommentRangeStart(doc, comment.id)
                                    comment_end = aw.CommentRangeEnd(doc, comment.id)
                                    # 插入批注范围和批注
                                    paragraph = run.parent_paragraph
                                    paragraph.insert_before(comment_start, run)
                                    paragraph.insert_after(comment_end, run)
                                    paragraph.insert_after(comment, run)
                                    is_comment_added = True
                                    break

                            if is_comment_added:
                                break
                            find_para = find_para.next_sibling

    # 保存文档
    doc.save(file_path)
    return file_path

def add_comments_sae_drug_name(file_path, para_text_list):
doc = aw.Document(file_path)
# 查找文档中的特定文本并添加批注
for item in para_text_list:
for key, value in item.items():
for k, v in value.items():
is_comment_added = False
opt = aw.replacing.FindReplaceOptions()
opt.use_substitutions = True
doc.range.replace(k, “$0”, opt)
comment = aw.Comment(doc, ‘’, “”, datetime.now())
comment.set_text(v)
# 查找文档中的特定文本并添加批注
for find_para in doc.get_child_nodes(aw.NodeType.PARAGRAPH, True):
find_para = find_para.as_paragraph()
if key in find_para.get_text() and find_para.paragraph_format.is_heading:
find_para = find_para.next_sibling
while find_para is not None:
find_para = find_para.as_paragraph()
for run in find_para.runs:
run = run.as_run()
if run.text == k:
# 创建批注范围
comment_start = aw.CommentRangeStart(doc, comment.id)
comment_end = aw.CommentRangeEnd(doc, comment.id)
# 插入批注范围和批注
paragraph = run.parent_paragraph
paragraph.insert_before(comment_start, run)
paragraph.insert_after(comment_end, run)
paragraph.insert_after(comment, run)
is_comment_added = True
break

                        if is_comment_added:
                            break
                        find_para = find_para.next_sibling

# 保存文档
doc.save(file_path)
return file_path

add_comments_sae_drug_name(“S101001-感染性肺炎, 窦性心动过缓.rtf”, [{“感染性肺炎”: {“该事件结局为”: “插入批注的内容”}},
{“窦性心动过缓”: {
“针对该事件的治疗药物包括”: “插入批注的内容”}}])没有插入进去呀

hhh1111 · June 13, 2024, 8:54am

那我设置标题一呢代码要怎么修改呢

vyacheslav.deryushev · June 13, 2024, 8:57am

@hhh1111 请提供您用于测试的文件。

hhh1111 · June 13, 2024, 9:01am

S101001-感染性肺炎, 窦性心动过缓.docx (368.8 KB)

hhh1111 · June 13, 2024, 9:02am

您好已经提供这个内容是没有针对该事件的治疗药物包括请插入这个句号
image.png (73.2 KB)

hhh1111 · June 13, 2024, 9:02am

还要一种情况是有针对该事件的治疗药物包括也需要根据加粗的一级标题进行查询下面的内容

hhh1111 · June 13, 2024, 9:31am

您好怎么样有结果啦吗？？？？？

vyacheslav.deryushev · June 13, 2024, 9:43am

@hhh1111 我已经回复了另一个帖子，现在我将查看您的文件。

hhh1111 · June 13, 2024, 9:54am

好的谢谢，希望您可以测试全一点，以上2种情况已经描述了

vyacheslav.deryushev · June 13, 2024, 10:07am

@hhh1111 针对该事件的治疗药物包括 "不在文件中的结果不太清楚。是否需要在文件中插入该文本？

hhh1111 · June 13, 2024, 10:17am

文件已存在这个内容
image.png (84.5 KB)

hhh1111 · June 13, 2024, 10:18am

如果不存在打个比方我搜索第一个 感染性肺炎下面的针对该事件的治疗药物包括不存在则插入到第一段内容的句号
image.png (89.9 KB)

hhh1111 · June 13, 2024, 10:21am

他出现的段落已经是固定的就在我截图的那个段落里面

vyacheslav.deryushev · June 13, 2024, 10:51am

@hhh1111 请尽量按照黑体样式使用代码。如果第一段中没有任何内容，它还会在段末插入句号。

def add_comments_sae_drug_name(file_path, para_text_list):
    doc = aw.Document(file_path)
    buider = aw.DocumentBuilder(doc)
    # 查找文档中的特定文本并添加批注
    for item in para_text_list:
        for key, value in item.items():
            for k, v in value.items():
                is_comment_added = False
                opt = aw.replacing.FindReplaceOptions()
                opt.use_substitutions = True
                doc.range.replace(k, "$0", opt)
                comment = aw.Comment(doc, '', "", datetime.now())
                comment.set_text(v)
                # 查找文档中的特定文本并添加批注
                for top_run in doc.get_child_nodes(aw.NodeType.RUN, True):
                    top_run = top_run.as_run()
                    if top_run.text == key and top_run.font.bold is True:
                        top_para = top_run.parent_paragraph
                        if top_para.get_ancestor(aw.NodeType.HEADER_FOOTER):
                            continue
                        next_para = top_para.next_sibling
                        while next_para is not None:
                            next_para = next_para.as_paragraph()
                            for run in next_para.runs:
                                run = run.as_run()
                                if run.text == k:
                                    # 创建批注范围
                                    comment_start = aw.CommentRangeStart(doc, comment.id)
                                    comment_end = aw.CommentRangeEnd(doc, comment.id)
                                    # 插入批注范围和批注
                                    next_para.insert_before(comment_start, run)
                                    next_para.insert_after(comment_end, run)
                                    next_para.insert_after(comment, run)
                                    is_comment_added = True
                                    break

                            if is_comment_added:
                                break
                            next_para = next_para.next_sibling

                        if not is_comment_added:
                            buider.move_to(top_para.next_sibling)
                            buider.write("。")

    # 保存文档
    doc.save(file_path)
    return file_path

hhh1111 · June 14, 2024, 1:29am

vyacheslav.deryushev:

def add_comments_sae_drug_name(file_path, para_text_list):
    doc = aw.Document(file_path)
    buider = aw.DocumentBuilder(doc)
    # 查找文档中的特定文本并添加批注
    for item in para_text_list:
        for key, value in item.items():
            for k, v in value.items():
                is_comment_added = False
                opt = aw.replacing.FindReplaceOptions()
                opt.use_substitutions = True
                doc.range.replace(k, "$0", opt)
                comment = aw.Comment(doc, '', "", datetime.now())
                comment.set_text(v)
                # 查找文档中的特定文本并添加批注
                for top_run in doc.get_child_nodes(aw.NodeType.RUN, True):
                    top_run = top_run.as_run()
                    if top_run.text == key and top_run.font.bold is True:
                        top_para = top_run.parent_paragraph
                        if top_para.get_ancestor(aw.NodeType.HEADER_FOOTER):
                            continue
                        next_para = top_para.next_sibling
                        while next_para is not None:
                            next_para = next_para.as_paragraph()
                            for run in next_para.runs:
                                run = run.as_run()
                                if run.text == k:
                                    # 创建批注范围
                                    comment_start = aw.CommentRangeStart(doc, comment.id)
                                    comment_end = aw.CommentRangeEnd(doc, comment.id)
                                    # 插入批注范围和批注
                                    next_para.insert_before(comment_start, run)
                                    next_para.insert_after(comment_end, run)
                                    next_para.insert_after(comment, run)
                                    is_comment_added = True
                                    break

                            if is_comment_added:
                                break
                            next_para = next_para.next_sibling

                        if not is_comment_added:
                            buider.move_to(top_para.next_sibling)
                            buider.write("。")

    # 保存文档
    doc.save(file_path)
    return file_path

image.jpg (233.2 KB)

还是不太对
add_comments_sae_drug_name(“S101001-感染性肺炎, 窦性心动过缓.rtf”, [{“感染性肺炎”: {“该事件结局为”: “插入批注的内容”}},
{“窦性心动过缓”: {
“针对该事件的治疗药物或操作包括”: “插入批注的内容”}}])
感染性肺炎加粗字体下面没有该事件结局为的内容。批注应该插入到第一段内容句号的位置
image.png (117.5 KB)

hhh1111 · June 14, 2024, 4:01am

image.png (275.6 KB)

为什么wps打开插入的位置是正常的用word打开批注就乱了
image.jpg (126.9 KB)

S101001-窦性心动过缓, 感染性肺炎.docx (369.1 KB)

vyacheslav.deryushev · June 14, 2024, 7:41am

@hhh1111 使用此代码在第一段的句号处插入注释。

def add_comments_sae_drug_name(file_path, para_text_list):
    doc = aw.Document(file_path)
	
    # 查找文档中的特定文本并添加批注
    for item in para_text_list:
        for key, value in item.items():
            for k, v in value.items():
                is_comment_added = False
                opt = aw.replacing.FindReplaceOptions()
                opt.use_substitutions = True
                doc.range.replace(k, "$0", opt)
                comment = aw.Comment(doc, '', "", datetime.now())
                comment.set_text(v)
                # 创建批注范围
                comment_start = aw.CommentRangeStart(doc, comment.id)
                comment_end = aw.CommentRangeEnd(doc, comment.id)
                # 查找文档中的特定文本并添加批注
                for top_run in doc.get_child_nodes(aw.NodeType.RUN, True):
                    top_run = top_run.as_run()
                    if top_run.text == key and top_run.font.bold is True:
                        top_para = top_run.parent_paragraph
                        if top_para.get_ancestor(aw.NodeType.HEADER_FOOTER):
                            continue
                        next_para = top_para.next_sibling
                        while next_para is not None:
                            next_para = next_para.as_paragraph()
                            for run in next_para.runs:
                                run = run.as_run()
                                if run.text == k:
                                    # 插入批注范围和批注
                                    next_para.insert_before(comment_start, run)
                                    next_para.insert_after(comment_end, run)
                                    next_para.insert_after(comment, run)
                                    is_comment_added = True
                                    break

                            if is_comment_added:
                                break
                            next_para = next_para.next_sibling

                        if not is_comment_added:
                            next_para = top_para.next_sibling.as_paragraph()
                            i = 0
                            while next_para.runs[i] is not None:
                                current_run = next_para.runs[i]
                                i += 1

                            after_run = current_run.clone(True)
                            current_run.parent_node.insert_after(after_run, current_run)
                            after_run = after_run.as_run()
                            after_run.text = current_run.text[-1]
                            current_run.text = current_run.text[0:len(current_run.text) - 1]

                            next_para.insert_before(comment_start, after_run)
                            next_para.insert_after(comment_end, after_run)
                            next_para.insert_after(comment, after_run)

    # 保存文档
    doc.save(file_path)
    return file_path