Thank you for providing a potential solution. Would you be able to provide a working example for me to reference to?
Thanks,
Woon Gi
Thank you for providing a potential solution. Would you be able to provide a working example for me to reference to?
Thanks,
Woon Gi
Please use the following code example to convert hyperlinks to image in Word document. You can skip the email as you are doing in your code. Hope this helps you.
Document doc = new Document(MyDir + "sample_input.docx");
int i = 1;
DocumentBuilder builder = new DocumentBuilder(doc);
foreach (Field field in doc.Range.Fields)
{
if (field.Type == FieldType.FieldHyperlink)
{
FieldHyperlink hyperlink = (FieldHyperlink)field;
builder.MoveToField(field, false);
builder.StartBookmark("bookmark" + i);
builder.MoveToField(field, true);
builder.EndBookmark("bookmark" + i);
field.Unlink();
i++;
}
}
LayoutCollector collector = new LayoutCollector(doc);
LayoutEnumerator enumerator = new LayoutEnumerator(doc);
ArrayList nodes = new ArrayList();
foreach (Bookmark bookmark in doc.Range.Bookmarks)
{
Shape shape = DrawText(doc, bookmark, collector, enumerator);
shape.Stroke.On = false;
bookmark.Text = "";
nodes.Add(shape);
}
doc.UpdatePageLayout();
foreach (Shape newshape in nodes)
{
MemoryStream stream = new MemoryStream();
newshape.GetShapeRenderer().Save(stream, new ImageSaveOptions(SaveFormat.Emf));
builder.MoveTo(newshape);
Shape shape = builder.InsertImage(stream);
shape.Stroked = false;
shape.StrokeWeight = 0;
newshape.Remove();
}
doc.Save(MyDir + "output.docx");
public static Shape DrawText(Document doc, Bookmark bookmark, LayoutCollector collector, LayoutEnumerator enumerator)
{
enumerator.Current = collector.GetEntity(bookmark.BookmarkStart);
Console.WriteLine(" --> Left : " + enumerator.Rectangle.Left + " Top : " + enumerator.Rectangle.Top);
Shape shape = new Shape(doc, ShapeType.TextBox);
shape.AppendChild(new Paragraph(doc));
Node node = bookmark.BookmarkStart;
while (node != bookmark.BookmarkEnd)
{
node = node.NextSibling;
shape.FirstParagraph.AppendChild(node.Clone(false));
}
shape.TextBox.InternalMarginTop = 0;
shape.TextBox.InternalMarginBottom = 0;
shape.TextBox.InternalMarginLeft = 0;
shape.TextBox.InternalMarginRight = 0;
double start = enumerator.Rectangle.Left;
enumerator.Current = collector.GetEntity(bookmark.BookmarkEnd);
double end = enumerator.Rectangle.Left;
shape.Width = (end - start) + 5;
shape.Height = enumerator.Rectangle.Bottom - enumerator.Rectangle.Top;
bookmark.BookmarkStart.ParentNode.InsertBefore(shape, bookmark.BookmarkStart);
return shape;
}
I integrated the following logic to my source code and it looks like I am receiving an underline below the image file.
There is a shape.textpath.underline property I could use to validate whether an underline exists or not, but I am looking for a property that is similar to underline.none from the Run API. I believe the changes must be made in the DrawText() method.
The following provides an example of how I implemented your logic and the changes I am trying to make for DrawText().
Thanks,
Woon Gi
public static Shape DrawText(Aspose.Words.Document doc, Bookmark bookmark, LayoutCollector collector, LayoutEnumerator enumerator)
{
enumerator.Current = collector.GetEntity(bookmark.BookmarkStart);
Console.WriteLine(" --> Left : " + enumerator.Rectangle.Left + " Top : " + enumerator.Rectangle.Top);
Shape shape = new Shape(doc, ShapeType.TextBox);
shape.AppendChild(new Paragraph(doc));
Node node = bookmark.BookmarkStart;
while (node != bookmark.BookmarkEnd)
{
node = node.NextSibling;
shape.FirstParagraph.AppendChild(node.Clone(false));
}
shape.TextBox.InternalMarginTop = 0;
shape.TextBox.InternalMarginBottom = 0;
shape.TextBox.InternalMarginLeft = 0;
shape.TextBox.InternalMarginRight = 0;
double start = enumerator.Rectangle.Left;
enumerator.Current = collector.GetEntity(bookmark.BookmarkEnd);
double end = enumerator.Rectangle.Left;
shape.Width = (end - start) + 20;
shape.Height = enumerator.Rectangle.Bottom - enumerator.Rectangle.Top + 5;
shape.TextPath.Underline = false;
bookmark.BookmarkStart.ParentNode.InsertBefore(shape, bookmark.BookmarkStart);
return shape;
}
private void FormatUploadDocument(Aspose.Words.Document doc, bool keepFormatting)
{
DocumentBuilder builder = new DocumentBuilder(doc);
int i = 1;
bool isHyperlink = false;
foreach (Field field in doc.Range.Fields)
{
if (field.Type == FieldType.FieldHyperlink)
{
isHyperlink = true;
FieldHyperlink hyperlink = (FieldHyperlink)field;
// Do nothing if hyperlink is email address
// Otherwise remove all links to change hyperlink into text
if (Literals.RegEx.IsEmailAddress.IsMatch(hyperlink.Address))
{
continue;
}
builder.MoveToField(field, false);
builder.StartBookmark("bookmark" + i);
builder.MoveToField(field, true);
builder.EndBookmark("bookmark" + i);
//Removes underline from non-email hyperlinks
Paragraph links = (Paragraph)field.Start.GetAncestor(NodeType.Paragraph);
if (links != null)
{
foreach (Run run in links.Runs)
{
run.Font.Color = Color.Black;
run.Font.Underline = Underline.None;
}
}
field.Unlink();
i++;
}
}
The table’s cell has font formatting as underline. So, the inserted shape has this font formatting. You can remove it before inserting the Shape node as shown below.
foreach (Shape newshape in nodes)
{
MemoryStream stream = new MemoryStream();
newshape.GetShapeRenderer().Save(stream, new ImageSaveOptions(SaveFormat.Emf));
builder.MoveTo(newshape);
builder.Font.Underline = Underline.None;
Shape shape = builder.InsertImage(stream);
shape.Stroked = false;
shape.StrokeWeight = 0;
newshape.Remove();
}
doc.Save(MyDir + "output.docx");
My guess is that my word generation file unlinks() all non-email hyperlink fields. In order to get a generated PDF, it has to first consider changes that take place in the word generation file.
In this case, only the emails are hyperlink fields and the non emails are just plain text.
Please do not call Field.Unlink method for hyperlinks. If you unlink the email address, you can unlink them.
Would there be a way for me to unlink non email hyperlinks without using Field.Unlink()? The flaw lies in how my Word Generation file is handling non email hyperlinks. By unlinking the fields, the hyperlink text is no longer field.
Since the Word file has to be generated first, the PDF file will not view the non email hyperlink text as a field.
Thanks,
Woon Gi
A field in a Word document is a complex structure consisting of multiple nodes that include field start, field code, field separator, field result and field end. Fields can be nested, contain rich content and span multiple paragraphs or sections in a document. The Field class is a “facade” object that provides properties and methods that allow to work with a field as a single object.
Yes, it is true. The Field.Unlink method unlinks the field and replaces the field with its most recent result.
If you want email address as field in output PDF, please do not use field.Unlink(); in your code.
If you still face any issue, please elaborate the complete detail what exact you want to achieve using Aspose.Words. We will then guide you accordingly.
The email address is remaining as a field in the word document. The field.Unlink() process is only being implemented for non email hyperlinks. In the previous code I sent you, I included an if condition to allow email hyperlinks to remain as a field.
I am currently having an issue implementing the bookmark, layout, and shape logic. The bookmark is looking for a field in its parameter, but there is no field to reference for the regular hyperlink text.
I would need to find a way to bookmark the hyperlink text (not a field). After the location of the hyperlink text has been identified, the shape to image conversion logic should work.
I have provided an excerpt of where the Field.Unlink() logic is being invoked in the Word Generation file.
bool isHyperlink = false;
foreach (Field field in doc.Range.Fields)
{
if (field.Type == FieldType.FieldHyperlink)
{
isHyperlink = true;
FieldHyperlink hyperlink = (FieldHyperlink)field;
// Do nothing if hyperlink is email address
// Otherwise remove all links to change hyperlink into text
if (Literals.RegEx.IsEmailAddress.IsMatch(hyperlink.Address))
{
continue;
}
//Removes underline from non-email hyperlinks
Paragraph links = (Paragraph)field.Start.GetAncestor(NodeType.Paragraph);
if (links != null)
{
foreach (Run run in links.Runs)
{
run.Font.Color = Color.Black;
run.Font.Underline = Underline.None;
}
}
field.Unlink();
}
}
Just to provide clarification, I would like to figure out how to identify the location of hyperlink text after the embedded hyperlink has been removed (using Field.Unlink()).
Once it has been identified, I would like to know how to insert bookmarks by text location rather than fields.
Thanks,
Woon Gi Hong
Unfortunately, your new requirements are not clear. As per our understanding, you want to convert hyperlink fields to image and insert them at the same place. Your can achieve it by using the code example shared in my post in this thread.
You have email and hyperlinks in your document. You can identify either hyperlink field is email or not and perform the desired task.
Please note that you do not need to call Field.Unlink method for hyperlink that you want to convert to image.
Sorry for the confusion and inconvenience. The logic that you have provided works well when both emails and hyperlinks remain as fields.
However, the requirement for file output should be as follows.
There are two output documents that should be generated:
First would be the word file, which consists of a regular hyperlinks and email addresses. The Field.Unlink() is only being used to unlink all regular hyperlinks. By unlinking regular hyperlinks, the document should just be reading them as plaintext.
The same output should be taking place for PDF generation as well. However, Adobe Acrobat is embedding all hyperlink text with hyperlinks by default. Converting the hyperlink text to an image file is the appropriate work around.
After playing around with the shapes to image conversion logic, I believe this would be the best approach to follow.
To answer your questions:
I wouldn’t need to find the location after the hyperlink text has been converted to an image.
After the initial Word file generation, the regular hyperlinks should be unlinked. In this case, I would like to find a way to search for the hyperlink text in the document. Once the hyperlink text has been identified, place bookmarks on the hyperlink text.
By location I am referring to where the hyperlink text is being searched in the document. As of right now, the Aspose.Words.Fields.Field is being invoked to identify all hyperlink fields, including email addresses. However, by unlinking regular hyperlinks during Word file generation, the hyperlink field has been removed. The “if” condition I have provided is only searching for fields not the hyperlink plaintext.
Please let me know if you need further clarification.
Thanks,
Woon Gi
Test Documents.zip (81.8 KB)
You can achieve your requirement using the same code example shared in my previous post here:
To get the Word output document, please save the document after first for loop as shown below.
Document doc = new Document(MyDir + "sample_input.docx");
int i = 1;
DocumentBuilder builder = new DocumentBuilder(doc);
foreach (Field field in doc.Range.Fields)
{
if (field.Type == FieldType.FieldHyperlink)
{
try
{
System.Net.Mail.MailAddress mail = new System.Net.Mail.MailAddress(field.Result);
continue;
}
catch (Exception ex)
{
}
FieldHyperlink hyperlink = (FieldHyperlink)field;
builder.MoveToField(field, false);
builder.StartBookmark("bookmark" + i);
builder.MoveToField(field, true);
builder.EndBookmark("bookmark" + i);
field.Unlink();
i++;
}
}
doc.Save(MyDir + "output.docx");
To get the PDF document, please use the remaining code and save the document to PDF.
Please note that the only change in my previous code is that save the document after first for loop
and email validation check
.
Thank you for all your help, thus far. I am getting extremely close to the final solution, but there are some slight changes that might need to be made.
Based on the shape and image render logic that you have provided, it appears that the hyperlinks are showing up as images in the Word Document, prior to saving it in a PDF file. However, I would like to find a way to convert the hyperlink text to an image upon run time.
The DrawText() method that I have provided initially, converts hyperlink text to an image file on runtime. As of right now, if I save the generated Word Document to a PDF file the hyperlink texts are being shown as an image file. However, when I try to generate a PDF file upon runtime, the hyperlink text are still embedded with a hyperlink.
The bookmark logic is able to identify the bookmarks that has been created in the Word Generation C# file. The saved bookmarks are also being read in the PDF Generation C# file as well. However, I would like to find a way to manipulate the current shape to image logic to work upon run time.
If you need further clarification on this requirement, please let me know.
Thanks,
Woon Gi
Please ZIP and attach the problematic PDF along with screenshot that shows the hyperlink.
You can remove the bookmark before saving the document to PDF using Document.Range.Bookmarks.Clear() method.
We have tested the scenario using the latest version of Aspose.Words for .NET 20.7 with following code example and have not found the shared issue. So, please use Aspose.Words for .NET 20.7 and following code example. We have attached the output PDF with this post for your kind reference. output 20.7.pdf (53.5 KB)
If you still face problem, please share the PDF viewer that you are using. Please also share the screenshot that shows hyperlink in PDF.
Document doc = new Document(MyDir + "sample_input.docx");
int i = 1;
DocumentBuilder builder = new DocumentBuilder(doc);
foreach (Field field in doc.Range.Fields)
{
if (field.Type == FieldType.FieldHyperlink)
{
try
{
System.Net.Mail.MailAddress mail = new System.Net.Mail.MailAddress(field.Result);
continue;
}
catch (Exception ex)
{
}
FieldHyperlink hyperlink = (FieldHyperlink)field;
builder.MoveToField(field, false);
builder.StartBookmark("bookmark" + i);
builder.MoveToField(field, true);
builder.EndBookmark("bookmark" + i);
field.Unlink();
i++;
}
}
doc.Save(MyDir + "output.docx");
//Convert document to PDF
LayoutCollector collector = new LayoutCollector(doc);
LayoutEnumerator enumerator = new LayoutEnumerator(doc);
ArrayList nodes = new ArrayList();
foreach (Bookmark bookmark in doc.Range.Bookmarks)
{
if (bookmark.Name.StartsWith("bookmark"))
{
Shape shape = DrawText(doc, bookmark, collector, enumerator);
shape.Stroked = false;
shape.StrokeWeight = 0;
shape.Stroke.On = false;
bookmark.Text = "";
nodes.Add(shape);
}
}
doc.UpdatePageLayout();
foreach (Shape newshape in nodes)
{
MemoryStream stream = new MemoryStream();
newshape.GetShapeRenderer().Save(stream, new ImageSaveOptions(SaveFormat.Emf));
builder.MoveTo(newshape);
Shape shape = builder.InsertImage(stream);
newshape.Remove();
}
doc.UpdatePageLayout();
doc.Save(MyDir + "output 20.7.pdf");
public static Shape DrawText(Document doc, Bookmark bookmark, LayoutCollector collector, LayoutEnumerator enumerator)
{
enumerator.Current = collector.GetEntity(bookmark.BookmarkStart);
Console.WriteLine(" --> Left : " + enumerator.Rectangle.Left + " Top : " + enumerator.Rectangle.Top);
Shape shape = new Shape(doc, ShapeType.TextBox);
shape.AppendChild(new Paragraph(doc));
Node node = bookmark.BookmarkStart;
while (node != bookmark.BookmarkEnd)
{
node = node.NextSibling;
/*if (node.NodeType == NodeType.Run)
{
((Run)node).Font.Underline = Underline.None;
}*/
shape.FirstParagraph.AppendChild(node.Clone(false));
}
shape.TextBox.InternalMarginTop = 0;
shape.TextBox.InternalMarginBottom = 0;
shape.TextBox.InternalMarginLeft = 0;
shape.TextBox.InternalMarginRight = 0;
double start = enumerator.Rectangle.Left;
enumerator.Current = collector.GetEntity(bookmark.BookmarkEnd);
double end = enumerator.Rectangle.Left;
shape.Width = (end - start) + 5;
shape.Height = enumerator.Rectangle.Bottom - enumerator.Rectangle.Top;
bookmark.BookmarkStart.ParentNode.InsertBefore(shape, bookmark.BookmarkStart);
return shape;
}