Tiaohh
2
new_paragraph.paragraph_format.character_unit_first_line_indent = 2
new_run = Run(doc_main)
# new_run.text = i + aw.ControlChar.PARAGRAPH_BREAK_CHAR
new_run.text = i.strip()
if "" in i.strip():
new_run.text = " "
new_run.font.highlight_color = # 设置背景颜色为黄色
怎么设置呢
@Tiaohh 如果您的目标是用一些文本替换占位符并突出显示它,您可以使用查找/替换功能轻松实现此目的:
import aspose.words as aw
import aspose.pydrawing as pydraw
doc = aw.Document("C:\\Temp\\in.docx")
opt = aw.replacing.FindReplaceOptions()
opt.apply_font.highlight_color = pydraw.Color.yellow
doc.range.replace("placeholder", "replacement", opt)
doc.save("C:\\Temp\\out.docx")
Tiaohh
4
if node.get_ancestor(aw.NodeType.TABLE) != None:
parent_node = node.parent_node.as_table()
able_content = read_table(parent_node)
data[-1]["Table"].append(1)
这里我怎么传table对象呢
def read_table(tables):
_table = []
for table in tables:
for row in table.as_table().rows:
_row = ""
for cell in row.as_row().cells:
if _row:
_row = _row + "\t" + cell.as_cell().get_text().strip()
else:
_row = cell.as_cell().get_text().strip()
_table.append(_row)
table_info = "\n".join(_table)
return table_info
Tiaohh
5
for paragraph in doc.get_child_nodes(aw.NodeType.PARAGRAPH, True):
try:
node = paragraph.as_paragraph()
level = 1
if node.get_ancestor(aw.NodeType.TABLE) == None:
if node.paragraph_format.outline_level in [0, 1, 2, 3, 4, 5]:
print(node.paragraph_format.style.name, "11111")
level = int(node.paragraph_format.outline_level) + 1
if level > current_level:
# 如果级别更深,将当前标题添加到堆栈
stack.append((current_level, data))
data = []
current_level = level
elif level < current_level:
# 如果级别更浅,将堆栈中的项添加回数据
while stack and stack[-1][0] >= level:
old_level, old_data = stack.pop()
data = old_data + data
current_level = old_level
data.append(
{
"Title": node.get_text(),
"Content": [],
"Level": level,
"Table": [],
"Tbale_name": [],
}
)
还有怎么获取段落的图片信息呢
@Tiaohh
您应该使用以下代码:
if node.get_ancestor(aw.NodeType.TABLE) != None:
parent_table = node.node.get_ancestor(aw.NodeType.TABLE).as_table()
able_content = read_table(parent_table )
不幸的是,目前还不太清楚你的意思。 您能否详细说明您的要求。
Tiaohh
10
def read_table(tables):
_table = []
for table in tables:
for row in table.as_table().rows:
_row = ""
for cell in row.as_row().cells:
if _row:
_row = _row + "\t" + cell.as_cell().get_text().strip()
else:
_row = cell.as_cell().get_text().strip()
_table.append(_row)
table_info = "\n".join(_table)
return table_info
if node.get_ancestor(aw.NodeType.TABLE) != None:
parent_node = node.get_ancestor(aw.NodeType.TABLE).as_table()
able_content = read_table(parent_node)
data[-1]["Table"].append(able_content)
except Exception as e:
这样写报错啊 Proxy error(InvalidCastException): Unable to cast object of type ‘Aspose.Words.Tables.Row’ to type ‘Aspose.Words.Tables.Table’.
@Tiaohh
您的要求还不够明确,能否详细说明一下? 如果可能,请附上输入文档并解释您希望获得的预期输出是什么。
异常发生在哪一行? 请附上您的输入文档并提供可运行的代码,以便我们重现该问题。
Tiaohh
12
for paragraph in doc.get_child_nodes(aw.NodeType.PARAGRAPH, True):
try:
node = paragraph.as_paragraph()
level = 1
if node.get_ancestor(aw.NodeType.TABLE) == None:
if node.paragraph_format.outline_level in [0, 1, 2, 3, 4, 5]:
level = int(node.paragraph_format.outline_level) + 1
if level > current_level:
# 如果级别更深,将当前标题添加到堆栈
stack.append((current_level, data))
data = []
current_level = level
elif level < current_level:
# 如果级别更浅,将堆栈中的项添加回数据
while stack and stack[-1][0] >= level:
old_level, old_data = stack.pop()
data = old_data + data
current_level = old_level
data.append(
{
"Title": node.get_text(),
"Content": [],
"Level": level,
"Table": [],
"Tbale_name": [],
}
)
else:
if data:
if node.get_text().startswith("表"):
if (
"Bullet" in node.paragraph_format.style.name
or "Caption" in node.paragraph_format.style.name
):
data[-1]["Tbale_name"].append(
node.get_text().strip("SEQ 表 \* ARABIC").strip("SEQ")
)
if (
node.get_text().startswith("表")
or node.get_text().startswith("来源:")
or node.get_text().startswith("图")
):
pass
else:
data[-1]["Content"].append(node.get_text())
if node.get_ancestor(aw.NodeType.TABLE) != None:
print(11111)
parent_node = node.get_ancestor(aw.NodeType.TABLE).as_table()
with concurrent.futures.ThreadPoolExecutor() as executor:
able_content = executor.submit(read_table, parent_node).result()
print(able_content)
data[-1]["Table"].append(able_content)
except Exception as e:
print(e)
while stack:
old_level, old_data = stack.pop()
data = old_data + data。
就是文档里面存在图片 我想根据标题获取下面的图片信息
Tiaohh
16
for paragraph in doc.get_child_nodes(aw.NodeType.PARAGRAPH, True):
try:
node = paragraph.as_paragraph()
level = 1
if node.get_ancestor(aw.NodeType.TABLE) == None:
if node.paragraph_format.outline_level in [0, 1, 2, 3, 4, 5]:
level = int(node.paragraph_format.outline_level) + 1
if level > current_level:
# 如果级别更深,将当前标题添加到堆栈
stack.append((current_level, data))
data = []
current_level = level
elif level < current_level:
# 如果级别更浅,将堆栈中的项添加回数据
while stack and stack[-1][0] >= level:
old_level, old_data = stack.pop()
data = old_data + data
current_level = old_level
data.append(
{
"Title": node.get_text(),
"Content": [],
"Level": level,
"Table": [],
"Tbale_name": [],
}
)
else:
if data:
if node.get_text().startswith("表"):
if (
"Bullet" in node.paragraph_format.style.name
or "Caption" in node.paragraph_format.style.name
):
data[-1]["Tbale_name"].append(
node.get_text().strip("SEQ 表 \* ARABIC").strip("SEQ")
)
if (
node.get_text().startswith("表")
or node.get_text().startswith("来源:")
or node.get_text().startswith("图")
):
pass
else:
data[-1]["Content"].append(node.get_text())
if node.get_ancestor(aw.NodeType.TABLE) != None:
print(11111)
parent_node = node.get_ancestor(aw.NodeType.TABLE).as_table()
with concurrent.futures.ThreadPoolExecutor() as executor:
able_content = executor.submit(read_table, parent_node).result()
print(able_content)
data[-1]["Table"].append(able_content)
except Exception as e:
print(e)
while stack:
old_level, old_data = stack.pop()
data = old_data + data。表格数据也不对啊 为什么一直循环一个段落的表格呢?
@Tiaohh 请附上您的输入文档(如果可能的话进行简化)并提供您希望获得的预期输出。
请参阅我们的文档以了解 Aspose.Words 文档对象模型:
https://docs.aspose.com/words/python-net/aspose-words-document-object-model/
这将帮助您理解文档在 DOM 中的表示方式。
MS Word 文档中的图像用 SHAPE 节点表示。
PS: 为了得到及时、准确的答复,请务必清楚地描述您的问题,提供您的输入文档、输出文档和预期输出文档,以便更容易理解您的需求。 如果您提供代码,请对其进行简化以使其可运行,以便我们可以在我们这边测试您的场景。
另外,根据我们的免费支持政策,我们有 12 小时的时间提供响应,因此无需淹没论坛。 感谢您的耐心和理解。
Tiaohh
20
def read_table(tables):
_table = []
for row in tables.rows:
_row = ""
for cell in row.as_row().cells:
if _row:
_row = _row + "\t" + cell.as_cell().get_text().strip()
else:
_row = cell.as_cell().get_text().strip()
_table.append(_row)
table_info = "\n".join(_table)
return table_info
for paragraph in doc.get_child_nodes(aw.NodeType.PARAGRAPH, True):
try:
node = paragraph.as_paragraph()
level = 1
if node.get_ancestor(aw.NodeType.TABLE) == None:
if node.paragraph_format.outline_level in [0, 1, 2, 3, 4, 5]:
level = int(node.paragraph_format.outline_level) + 1
if level > current_level:
# 如果级别更深,将当前标题添加到堆栈
stack.append((current_level, data))
data = []
current_level = level
elif level < current_level:
# 如果级别更浅,将堆栈中的项添加回数据
while stack and stack[-1][0] >= level:
old_level, old_data = stack.pop()
data = old_data + data
current_level = old_level
data.append(
{
"Title": node.get_text(),
"Content": [],
"Level": level,
"Table": [],
"Tbale_name": [],
}
)
else:
if data:
if node.get_text().startswith("表"):
if (
"Bullet" in node.paragraph_format.style.name
or "Caption" in node.paragraph_format.style.name
):
data[-1]["Tbale_name"].append(
node.get_text().strip("SEQ 表 \* ARABIC").strip("SEQ")
)
if (
node.get_text().startswith("表")
or node.get_text().startswith("来源:")
or node.get_text().startswith("图")
):
pass
else:
data[-1]["Content"].append(node.get_text())
if node.get_ancestor(aw.NodeType.TABLE) != None:
print(11111)
parent_node = node.get_ancestor(aw.NodeType.TABLE).as_table()
able_content = read_table(parent_node)
print(able_content)
data[-1]["Table"].append(able_content)
except Exception as e:
print(e)
while stack:
old_level, old_data = stack.pop()
data = old_data + data
print(data)
为什么一直循环一个段落的表格信息呢。这样不对呀
Tiaohh
21
就是我一个段落里面可能有多个表格这里需要执行完一个表格内容。执行下一个表格内容。
if node.get_ancestor(aw.NodeType.TABLE) != None:
print(11111)
parent_node = node.get_ancestor(aw.NodeType.TABLE).as_table()
able_content = read_table(parent_node)
print(able_content)
data[-1]["Table"].append(able_content)