您好pdf转word 转换后乱码
pdf文件:
pdf转word-test.PDF (175.2 KB)
您好感谢您的回复,
我使用的是 Aspose.PDF for .NET
Aspose.Pdf.DocSaveOptions options = new Aspose.Pdf.DocSaveOptions();
options.Format = fileSuffix == “.doc” ? Aspose.Pdf.DocSaveOptions.DocFormat.Doc : Aspose.Pdf.DocSaveOptions.DocFormat.DocX;
options.RecognizeBullets = true;
options.AddReturnToLineEnd = true;
options.TryMergeAdjacentSameBackgroundImages = false;
options.Mode = Aspose.Pdf.DocSaveOptions.RecognitionMode.Textbox;
string fileNameWithoutExtension = Path.GetFileNameWithoutExtension(filesPath);
Aspose.Words.Document document = new Aspose.Words.Document();
IEnumerator enumerator = document.ChildNodes.GetEnumerator();
while (enumerator.MoveNext())
{
((Aspose.Words.Node)enumerator.Current).Remove();
}
using (Aspose.Pdf.Document document2 = new Aspose.Pdf.Document(filesPath, filePwd))
{
int numMin = 0;
int numMax = 0;
if (string.IsNullOrEmpty(pageRange) || pageRange == "0-0")
{
numMin = 1;
numMax = document2.Pages.Count;
}
else
{
string[] strArray = pageRange.Split('-');
int length = strArray.Length;
numMin = (int.Parse(strArray[0]) > document2.Pages.Count) ? document2.Pages.Count : int.Parse(strArray[0]);
numMax = (int.Parse(strArray[1]) > document2.Pages.Count) ? document2.Pages.Count : int.Parse(strArray[1]);
}
DateTime lastTime = System.DateTime.Now;
for (int i = numMin; i <= numMax; i++)
{
using (Aspose.Pdf.Document document3 = new Aspose.Pdf.Document())
{
document3.Pages.Delete();
using (Aspose.Pdf.Page entity = document2.Pages[i])
{
document3.Pages.Add(entity);
using (MemoryStream stream2 = new MemoryStream(0x800000))
{
try
{
document3.Save(stream2, options);
document3.FreeMemory();
}
catch
{
}
Aspose.Words.Document srcDoc = new Aspose.Words.Document(stream2);
document.AppendDocument(srcDoc, Aspose.Words.ImportFormatMode.KeepSourceFormatting);
stream2.Close();
stream2.Dispose();
}
entity.FreeMemory();
entity.Dispose();
}
}
}
strOutFile = Utils.FileRename(outPath, fileNameWithoutExtension, fileSuffix);
using (FileStream stream = new FileStream(strOutFile, FileMode.OpenOrCreate, FileAccess.ReadWrite))
{
if (fileSuffix == ".doc")
document.Save(stream, Aspose.Words.SaveFormat.Doc);
else
document.Save(stream, Aspose.Words.SaveFormat.Docx);
stream.Flush();
stream.Close();
stream.Dispose();
}
count = document2.Pages.Count;
}
}
catch (Exception ex)
{
}
这是我pdf 转word 时候用的代码
我们仅使用与Aspose.PDF相关的代码片段将PDF转换为DOCX / DOC时就复制了该问题。 我们注意到在输出的Word文件中,文本丢失并且出现乱码。
为了更正,我们在问题跟踪系统中记录了一个问题PDFNET-47168。 我们将进一步调查该问题的详细信息,并向您发布其纠正状态。 请耐心等待,并节省我们一点时间。
我们对造成的不便很抱歉。