I have an existing app that has been converted to use Aspose words to convert an existing rtf stream to html. The current code takes the following RTF string:
{\rtf1\ansi\ansicpg1252\deff0\deflang1033{\fonttbl{\f0\fnil\fcharset0 Microsoft Sans Serif;}}
\viewkind4\uc1\pard\b\f0\fs17 This is text before the optional text {{{This is optional text with text field options [ a ] [ b ] [ c ] this is the rest of the optional text}}} this is text after the optional text\b0
}
And converts it to the following HTML:
This is text before the optional text This is optional text with text field options [ a ] [ b ] [ c ] this is the rest of the optional text this is text after the optional text
The following code is used to accomplish the conversion:
Aspose.Words.Document doc;
using (Stream s = GenerateStreamFromString(rtf))
{
doc = new Aspose.Words.Document(s);
}
Aspose.Words.Saving.HtmlSaveOptions saveOptions = new Aspose.Words.Saving.HtmlSaveOptions();
saveOptions.SaveFormat = Aspose.Words.SaveFormat.Html;
saveOptions.Encoding = Encoding.UTF8;//saveOptions.Encoding = Encoding.Unicode;
saveOptions.ExportImagesAsBase64 = true;
// Save the document to stream in HTML format.
using (MemoryStream htmlStream = new MemoryStream())
{
doc.Save(htmlStream, saveOptions);
// Read the HTML from the stream as plain text.
String myHtml = Encoding.UTF8.GetString(htmlStream.ToArray());
return myHtml;
}
What I really need is HTML that looks something like this:
This is text before the optional text This is optional text with text field options [ a ] [ b ] [ c ] this is the rest of the optional text this is text after the optional text
Is there some way to control how Aspose.words parses the rtf into html. Is there a way to supply a custom formatter for a specific call to doc.Save?
The HTML did not display as a string in the post. I need the rendering to be more granular than it is…
@pballance,
Please see input/output documents in Docs.zip (728 Bytes). This 18.1.html was generated by using the latest version of Aspose.Words for .NET i.e. 18.1 and using the following code:
Document doc = new Document(MyDir + @"in.rtf");
Aspose.Words.Saving.HtmlSaveOptions saveOptions = new Aspose.Words.Saving.HtmlSaveOptions();
saveOptions.SaveFormat = Aspose.Words.SaveFormat.Html;
saveOptions.Encoding = Encoding.UTF8;//saveOptions.Encoding = Encoding.Unicode;
saveOptions.ExportImagesAsBase64 = true;
doc.Save(MyDir + @"18.1.html", saveOptions);
Please create a comparison screenshot highlighting (encircle) the problematic areas in this Aspose.Words generated HTML, ZIP it and attach it here for our reference.
In our RTF there are tag like {{{}}} and [] that have meaning and define relationships. We need that reflected in the HTML that is generated, hence the need to be able to control how the html is emitted. We also need to add custom class attributes to the items to identify them as to what type of item it is (text field, optional text, required text, etc.)
generated_vs_desired.zip (867 Bytes)
Original zip file didn’t reflect the real structure needed. I think this one is better.
generated_vs_desired.zip (871 Bytes)
@pballance,
We are working over your query and will get back to you soon.
@pballance,
Please try using the following code:
Document doc = new Document(MyDir + @"in.rtf");
// Add a dummy style
doc.Styles.Add(StyleType.Character, "unselectedtextfield");
FindReplaceOptions findReplaceOptions = new FindReplaceOptions();
findReplaceOptions.ReplacingCallback = new Replacer();
// Use some regular expression to match your token string
doc.Range.Replace(new Regex(@"\[ b \]"), "", findReplaceOptions);
HtmlSaveOptions htmlSaveOptions = new HtmlSaveOptions(SaveFormat.Html);
htmlSaveOptions.PrettyFormat = true;
htmlSaveOptions.CssStyleSheetType = CssStyleSheetType.Embedded;
doc.Save(MyDir + @"18.1.html", htmlSaveOptions);
public class Replacer : IReplacingCallback
{
public Replacer()
{
}
private static Run SplitRun(Run run, int position)
{
Run afterRun = (Run)run.Clone(true);
afterRun.Text = run.Text.Substring(position);
run.Text = run.Text.Substring(0, position);
run.ParentNode.InsertAfter(afterRun, run);
return afterRun;
}
ReplaceAction IReplacingCallback.Replacing(ReplacingArgs e)
{
// This is a Run node that contains either the beginning or the complete match.
Node currentNode = e.MatchNode;
// The first (and may be the only) run can contain text before the match,
// in this case it is necessary to split the run.
if (e.MatchOffset > 0)
currentNode = SplitRun((Run)currentNode, e.MatchOffset);
// This array is used to store all nodes of the match for further highlighting.
ArrayList runs = new ArrayList();
// Find all runs that contain parts of the match string.
int remainingLength = e.Match.Value.Length;
while ((remainingLength > 0) &&
(currentNode != null) &&
(currentNode.GetText().Length <= remainingLength))
{
runs.Add(currentNode);
remainingLength = remainingLength - currentNode.GetText().Length;
// Select the next Run node.
// Have to loop because there could be other nodes such as BookmarkStart etc.
do
{
currentNode = currentNode.NextSibling;
}
while ((currentNode != null) && (currentNode.NodeType != NodeType.Run));
}
// Split the last run that contains the match if there is any text left.
if ((currentNode != null) && (remainingLength > 0))
{
SplitRun((Run)currentNode,
remainingLength);
runs.Add(currentNode);
}
foreach (Run run in runs)
{
run.Font.StyleName = "unselectedtextfield";
}
return ReplaceAction.Skip;
}
}
Hope, this helps.