So, we have this code which uses Aspose.Words to convert multiple document formats (docx, rtf, etc) to rtf for display in a text editor.
This works fine except when: A. the source doc is already RTF, B. it contains a character with an accent mark (like ñ).
In this case the output RTF doc contains garbage in place of the character with the accent mark.
Anyone have any experience with this? Or maybe a bug fix… Does Aspose have a way to report bugs other than posting in the forums?
Here is some code that demonstrates the problem:
// .NET 4.0
using System;
using System.Collections.Generic;
using [System.IO](http://System.IO);
using System.Linq;
using System.Text;
// Referencing Aspose.Words.dll version 10.2.0.0
namespace AsposeWordsBugDemo
{
class Program
{
static void Main(string[] args)
{
byte[] rtfBytes = new byte[152] {
123, 92, 114, 116, 102, 49, 92, 97, 110, 115, 105, 92, 97, 110, 115, 105, 99, 112, 103, 49, 50, 53, 50,
92, 100, 101, 102, 102, 48, 92, 100, 101, 102, 108, 97, 110, 103, 49, 48, 51, 51, 92, 100, 101, 102,
108, 97, 110, 103, 102, 101, 49, 48, 51, 51, 13, 10, 123, 92, 102, 111, 110, 116, 116, 98, 108, 13, 10,
123, 92, 102, 48, 32, 65, 114, 105, 97, 108, 59, 125, 13, 10, 125, 13, 10, 123, 92, 99, 111, 108, 111,
114, 116, 98, 108, 13, 10, 92, 114, 101, 100, 48, 92, 103, 114, 101, 101, 110, 48, 92, 98, 108, 117,
101, 48, 59, 13, 10, 125, 13, 10, 92, 102, 48, 92, 102, 115, 50, 52, 92, 115, 97, 48, 13, 10, 95, 95,
95, 195, 177, 95, 95, 95, 92, 112, 97, 114, 13, 10, 125, 13, 10
};
Console.WriteLine(rtfBytes.Length);
/* Writes: 152 */
Console.WriteLine(UTF8Encoding.UTF8.GetString(rtfBytes));
/* Writes:
{\rtf1\ansi\ansicpg1252\deff0\deflang1033\deflangfe1033
{\fonttbl
{\f0 Arial;}
}
{\colortbl
\red0\green0\blue0;
}
\f0\fs24\sa0
***ñ***\par
}
*/
// Note that the text in this RTF doc is the "**ñ**" on the second to last line.
// Create streams
var inputStream = new MemoryStream(rtfBytes);
var outputStream = new MemoryStream();
// use Aspose to write the source RTF document to an output stream in RTF too
var doc = new Aspose.Words.Document(inputStream, new Aspose.Words.LoadOptions { Encoding = Encoding.UTF8 });
doc.Save(outputStream, Aspose.Words.SaveFormat.Rtf);
// Close and flush streams
outputStream.Flush();
outputStream.Close();
inputStream.Close();
// Get the converted bytes that Aspose wrote
var asposeDocsConvertedRtfBytes = outputStream.ToArray();
// Here is the bug:
Console.WriteLine(asposeDocsConvertedRtfBytes.Length);
/* Writes: 1215 */
Console.WriteLine(UTF8Encoding.UTF8.GetString(asposeDocsConvertedRtfBytes));
/* Writes:
{\rtf1\ansi\ansicpg1252\uc0\stshfdbch0\stshfloch0\stshfhich0\stshfbi0\deff0\adeff0{\fonttbl{\f0\fnil\fcharset0 Arial;}}{\colortbl;\red255\green0\blue0;}{\stylesheet{\s0\snext0\styrsid8412110\sqformat\spriority0\ltrpar\li0\lin0\ri0\rin0\ql\faauto\rtlch\afs24\ltrch\fs24 Normal;}{*\cs10\additive\ssemihidden\spriority0 Default Paragraph Font;}}{*\generator Aspose.Words for .NET 10.2.0.0;}{\info\version0\edmins0\nofpages0\nofwords0\nofchars0\nofcharsws0}\deflang1033\deflangfe2052\adeflang1025\jexpand\showxmlerrors1\validatexml1\viewscale100\fet0\dghspace180\dgvspace180\dghorigin1800\dgvorigin1440\dghshow1\dgvshow1\dgmargin\sectd\ltrsect\sectdefaultcl\pard\plain\itap0\s0\ltrpar\li0\lin0\ri0\rin0\ql\faauto\rtlch\afs24\ltrch\fs24{\rtlch\afs24\ltrch\b\fs24\cf1 Evaluation Only. Created with Aspose.Words. Copyright 2003-2011 Aspose Pty Ltd.}{\rtlch\afs24\ltrch\fs24\par}\pard\plain\itap0\s0\sa0\ltrpar\li0\lin0\ri0\rin0\ql\faauto\rtlch\afs24\ltrch\fs24{\rtlch\af0\afs24\ltrch\fs24\loch\af0\dbch\af0\hich\f0 ***’c3’b1***}{\rtlch\af0\afs24\ltrch\fs24\loch\af0\dbch\af0\hich\f0\par}{*\latentstyles\lsdstimax267\lsdlockeddef0\lsdsemihiddendef1\lsdunhideuseddef1\lsdqformatdef0\lsdprioritydef99{\lsdlockedexcept}}}
*/
// The text from the original document has been converted to garbage and is now: "___'c3’b1___"
}
}
}