Hi,
Need help in pasting back the content after correction of the content in shape also maintaining the styles of the document,
need to maintain the correct referenc of complete document
doc, content_blocks, clean_text_paragraphs = extract_all_content_with_styles(file_path)
URL_PATTERN = re.compile(r'https?://[^\s)]+')
def build_word_offsets(text, words):
offsets = []
index = 0
for word in words:
index = text.find(word, index)
offsets.append((index, index + len(word)))
index += len(word)
return offsets
def rgb_string_to_color(rgb_str):
if rgb_str and rgb_str.startswith("RGB("):
try:
r, g, b = map(int, rgb_str[4:-1].split(","))
return Color.from_argb(r, g, b)
except Exception:
return None
return None
def apply_builder_style(builder, style):
replacing the styles
#need help in references and hyperlink
def style_last_hyperlink_field(paragraph, style):#non clickable format
try:
last_run = paragraph.runs[-1]
font = last_run.font
font.bold = style.get("bold", False)
font.italic = style.get("italic", False)
font.underline = aw.Underline.SINGLE if style.get("underline") == "1" else aw.Underline.NONE
font.name = style.get("font_name", "Calibri")
font.size = style.get("font_size", 11.0)
color = rgb_string_to_color(style.get("color"))
highlight = rgb_string_to_color(style.get("highlight_color"))
if color:
font.color = color
if highlight:
font.highlight_color = highlight
except Exception:
pass
def tag_and_flatten_paragraph_blocks(content_blocks):
paragraphs = []
for block in content_blocks:
if block.get("type") == "paragraph":
block["source_type"] = "body"
paragraphs.append(block)
elif block.get("type") == "table":
for row in block.get("rows", []):
for cell in row.get("cells", []):
for content in cell.get("content", []):
if content.get("type") == "paragraph":
content["source_type"] = "table"
paragraphs.append(content)
elif content.get("type") == "shape":
for p in content.get("paras", []):
p["source_type"] = "shape_in_table"
p["shape_node"] = content.get("shape_node")
paragraphs.append(p)
elif block.get("type") == "shape":
for p in block.get("paras", []):
p["source_type"] = "shape"
p["shape_node"] = block.get("shape_node")
paragraphs.append(p)
return paragraphs
def pasteback(doc, content_blocks, corrected_paragraphs):
builder = aw.DocumentBuilder(doc)
para_map = {
block.get("para_id"): block
for block in tag_and_flatten_paragraph_blocks(content_blocks)
}
for para_idx, clean_text in enumerate(proofread_paragraphs):
if not clean_text.strip():
continue
block = para_map.get(para_idx + 1)
if not block:
continue
source_type = block.get("source_type")
para_node = block.get("paragraph_node")
original_runs = block["runs"]
try:
# Handle paragraphs in body or table
if source_type in ["body", "table"] and para_node:
builder.move_to(para_node)
para_node.remove_all_children()
for run in original_runs:
apply_builder_style(builder, run)
builder.write(run["text"])
# Handle paragraphs inside shapes
elif source_type in ["shape", "shape_in_table"]:
shape = block.get("shape_node")
if shape is None:
continue
shape = shape.as_shape()
# Remove all existing paragraphs in shape
for p in shape.get_child_nodes(aw.NodeType.PARAGRAPH, True):
p.remove()
# Add new paragraph
new_para = aw.Paragraph(doc)
shape.append_child(new_para)
builder.move_to(new_para)
# Write each run with style
for run in original_runs:
apply_builder_style(builder, run)
builder.write(run["text"])
except Exception as e:
print(f"Skipping para {para_idx+1} due to error: {e}")
continue
we are using this kind of approach to paste back the shape data
sample content blocks
[{'type': 'paragraph',
'para_id': 1,
'text': '',
'runs': [],
'empty': True,
'paragraph_node': <aspose.words.Paragraph object at 0x00000219CCF04750>},
{'type': 'paragraph',
'para_id': 2,
'text': '',
'runs': [],
'empty': True,
'paragraph_node': <aspose.words.Paragraph object at 0x00000219CCF046F0>},
{'type': 'paragraph',
'para_id': 3,
'text': '',
'runs': [],
'empty': True,
'paragraph_node': <aspose.words.Paragraph object at 0x00000219CCF04770>},
{'block_index': 1,
'type': 'shape',
'shape_type': 'TEXT_BOX',
'paras': [{'type': 'paragraph',
'para_id': 4,
'text': 'Key issues',
'runs': [{'text': 'Key issues',
'bold': True,
'italic': False,
'underline': '0',
'font_name': 'Arial',
'font_size': 9.5,
'color': None,
'highlight_color': None,
'shading_color': None,
'all_caps': False,
'strike_through': False,
'superscript': False,
'subscript': False,
'hyperlink': False,
'hyperlink_url': None,
'reference_type': None,
'reference_target': None,
'reference_field_code': None}],
'empty': False,
'paragraph_node': <aspose.words.Paragraph object at 0x00000219CCF04670>},
Shape_only.docx (796.0 KB)