My excel file has only 1 column and over 150 thousand records.
I wanto sort and remove duplicate records.
source.zip (1.5 MB)
button 1:
I call Aspose.cells.RemoveDuplicates.
It takes too much time.
I don’t get the result.
button 2:
I remove duplicate records by myself.
It takes about 50 seconds.
Can it be faster?
Aspose.Cells for .Net.
Version:20.6
Visual Studio Community 2019
private void button1_Click(object sender, EventArgs e)
{
Stopwatch sw = new System.Diagnostics.Stopwatch();
sw.Start();
Aspose.Cells.Workbook wb = new Aspose.Cells.Workbook("source.xlsx");
Aspose.Cells.Cells cells = wb.Worksheets[0].Cells;
int RowNumbers = cells.MaxDataRow + 1;
int ColNumbers = cells.MaxDataColumn + 1;
wb.DataSorter.AddKey(0, Aspose.Cells.SortOrder.Ascending);
wb.DataSorter.Sort(cells, 1, 0, RowNumbers, ColNumbers);
cells.RemoveDuplicates(1, 0, RowNumbers, ColNumbers);
wb.Save(DateTime.Now.ToString("yyyy-MM-dd HH.mm.ss")+"dest.xlsx");
sw.Stop();
double elapse = sw.ElapsedMilliseconds / 1000.0;
MessageBox.Show("Time elapsed:" + elapse.ToString());
}
private void button2_Click(object sender, EventArgs e)
{
Stopwatch sw = new System.Diagnostics.Stopwatch();
sw.Start();
Aspose.Cells.Workbook wb = new Aspose.Cells.Workbook("source.xlsx");
Aspose.Cells.Cells cells = wb.Worksheets[0].Cells;
int RowNumbers = cells.MaxDataRow + 1;
int ColNumbers = cells.MaxDataColumn + 1;
wb.DataSorter.AddKey(0, Aspose.Cells.SortOrder.Ascending);
wb.DataSorter.Sort(cells, 1, 0, RowNumbers, ColNumbers);
for (int r = 1; r < RowNumbers; ++r)
{
string v1 = cells[r, 0].StringValue;
int r2 = r + 1;
for (; r2 < RowNumbers; ++r2)
{
string v2 = cells[r2, 0].StringValue;
if (v1 != v2) break;
}
if (r + 1 < r2)
{
cells.DeleteRows(r + 1, r2 - r - 1);
RowNumbers -= r2 - r - 1;
}
}
wb.Save(DateTime.Now.ToString("yyyy-MM-dd HH.mm.ss") + "dest.xlsx");
sw.Stop();
double elapse = sw.ElapsedMilliseconds / 1000.0;
MessageBox.Show("Time elapsed:" + elapse.ToString());
}