Hi! Team,
We are looking to convert the RTF to Text as below and when the encoding is done with ASCII we find a special characters ‘?’ Getting appended and if the encoding is done with UTF7 then the special character ‘’ is getting appended. However, when we try encoding to UTF8 then the special character doesn’t appear, but our compression logic detects this special character and appends ‘?’ in the later part of our code. Can you let us know how we could resolve this issue? We need are looking to encode with only UTF8.
public static string ConvertRTFtoText(string documentContent)
{
Aspose.Words.Document doc;
String strRTFText = string.Empty;
String test1 = string.Empty;
String test2 = string.Empty;
documentContent = "History";
using (Stream s = GenerateStreamFromString(documentContent))
{
doc = new Aspose.Words.Document(s);
}
Aspose.Words.Saving.TxtSaveOptions saveOptions = new
Aspose.Words.Saving.TxtSaveOptions();
saveOptions.SaveFormat = Aspose.Words.SaveFormat.Text;
//Replace Image with text
foreach (Shape shape in doc.GetChildNodes(NodeType.Shape, true))
shape.ParentParagraph.InsertBefore(
new Run(doc, "[Image removed]"),
shape);
//Replace custom placeholder tags/texts
foreach (Run run in doc.GetChildNodes(NodeType.Run, true))
if (run.Font.Hidden)
run.Remove();
// Save the document to stream in HTML format.
using (MemoryStream rtfStream = new MemoryStream())
{
doc.Save(rtfStream, saveOptions);
// Read the HTML from the stream as plain text.
strRTFText = Encoding.ASCII.GetString(rtfStream.ToArray()); //--this adds '?' to the text
test1 = Encoding.UTF7.GetString(rtfStream.ToArray()); //--this adds '' to the text
test2 = Encoding.UTF8.GetString(rtfStream.ToArray()); //--this adds some
// special invisible characters which our compression logic identifies and add a '?'
}
return strRTFText;
}