List <DiffResult <string> > DiffSheetRow(ISheet sheet1, int row1, ISheet sheet2, int row2, SheetDiffStatus status) { var list1 = new List <string>(); var list2 = new List <string>(); if (sheet1.GetRow(row1) != null) { var row = sheet1.GetRow(row1); var columnCount = books["src"].SheetValideColumn[sheet1.SheetName]; for (int i = 0; i < columnCount; ++i) { list1.Add(Util.GetCellValue(row.GetCell(i))); } } if (sheet2.GetRow(row2) != null) { var row = sheet2.GetRow(row2); var columnCount = books["dst"].SheetValideColumn[sheet2.SheetName]; for (int i = 0; i < columnCount; ++i) { list2.Add(Util.GetCellValue(row.GetCell(i))); } } var diff = DiffUtil.Diff(list1, list2); //var optimized = diff.ToList();// DiffUtil.OptimizeCaseDeletedFirst(diff); var optimized = DiffUtil.OptimizeCaseDeletedFirst(diff); optimized = DiffUtil.OptimizeCaseInsertedFirst(optimized); return(optimized.ToList()); }
private IEnumerable <DiffResult <string> > DiffCellValue(IEnumerable <string> src, IEnumerable <string> dst) { var r = DiffUtil.Diff(src, dst); r = DiffUtil.Order(r, DiffOrderType.LazyDeleteFirst); return(DiffUtil.OptimizeCaseDeletedFirst(r)); }
public void OptimizeCaseDeleteFirst() { var str1 = "aaa"; var str2 = "bbb"; /* * obj1 a a a * obj2 b b b * - - M + + */ var results = DiffUtil.Diff(str1, str2); results = DiffUtil.Order(results, DiffOrderType.GreedyDeleteFirst); results = DiffUtil.OptimizeCaseDeletedFirst(results); Assert.AreEqual(DiffStatus.Deleted, results.ElementAt(0).Status); Assert.AreEqual(DiffStatus.Deleted, results.ElementAt(1).Status); Assert.AreEqual(DiffStatus.Modified, results.ElementAt(2).Status); Assert.AreEqual(DiffStatus.Inserted, results.ElementAt(3).Status); Assert.AreEqual(DiffStatus.Inserted, results.ElementAt(4).Status); /* * obj1 a a a * obj2 b b b + + M - - */ results = DiffUtil.Diff(str1, str2); results = DiffUtil.Order(results, DiffOrderType.GreedyInsertFirst); results = DiffUtil.OptimizeCaseInsertedFirst(results); Assert.AreEqual(DiffStatus.Inserted, results.ElementAt(0).Status); Assert.AreEqual(DiffStatus.Inserted, results.ElementAt(1).Status); Assert.AreEqual(DiffStatus.Modified, results.ElementAt(2).Status); Assert.AreEqual(DiffStatus.Deleted, results.ElementAt(3).Status); Assert.AreEqual(DiffStatus.Deleted, results.ElementAt(4).Status); /* * obj1 a a a * obj2 b b b * M M M */ results = DiffUtil.Diff(str1, str2); results = DiffUtil.Order(results, DiffOrderType.LazyDeleteFirst); results = DiffUtil.OptimizeCaseDeletedFirst(results); Assert.AreEqual(DiffStatus.Modified, results.ElementAt(0).Status); Assert.AreEqual(DiffStatus.Modified, results.ElementAt(1).Status); Assert.AreEqual(DiffStatus.Modified, results.ElementAt(2).Status); /* * obj1 a a a * obj2 b b b * M M M */ results = DiffUtil.Diff(str1, str2); results = DiffUtil.Order(results, DiffOrderType.LazyInsertFirst); results = DiffUtil.OptimizeCaseInsertedFirst(results); Assert.AreEqual(DiffStatus.Modified, results.ElementAt(0).Status); Assert.AreEqual(DiffStatus.Modified, results.ElementAt(1).Status); Assert.AreEqual(DiffStatus.Modified, results.ElementAt(2).Status); }
private void UpdateValueDiff(string srcValue, string dstValue) { SrcValueTextBox.Document.Blocks.First().ContentStart.Paragraph.Inlines.Clear(); DstValueTextBox.Document.Blocks.First().ContentStart.Paragraph.Inlines.Clear(); var srcLines = srcValue.Split('\n').Select(s => s.TrimEnd()); var dstLines = dstValue.Split('\n').Select(s => s.TrimEnd()); var lineDiffResults = DiffCellValue(srcLines, dstLines).ToList(); var srcRange = new List <Tuple <string, Color?> >(); var dstRange = new List <Tuple <string, Color?> >(); foreach (var lineDiffResult in lineDiffResults) { if (lineDiffResult.Status == DiffStatus.Equal) { DiffEqualLine(lineDiffResult, srcRange); DiffEqualLine(lineDiffResult, dstRange); } else if (lineDiffResult.Status == DiffStatus.Modified) { var charDiffResults = DiffUtil.Diff(lineDiffResult.Obj1, lineDiffResult.Obj2); charDiffResults = DiffUtil.Order(charDiffResults, DiffOrderType.LazyDeleteFirst); charDiffResults = DiffUtil.OptimizeCaseDeletedFirst(charDiffResults); DiffModifiedLine(charDiffResults.Where(r => r.Status != DiffStatus.Inserted), srcRange, true); DiffModifiedLine(charDiffResults.Where(r => r.Status != DiffStatus.Deleted), dstRange, false); } else if (lineDiffResult.Status == DiffStatus.Deleted) { DiffDeletedLine(lineDiffResult, srcRange, true); DiffDeletedLine(lineDiffResult, dstRange, false); } else if (lineDiffResult.Status == DiffStatus.Inserted) { DiffInsertedLine(lineDiffResult, srcRange, true); DiffInsertedLine(lineDiffResult, dstRange, false); } } foreach (var r in srcRange) { var bc = r.Item2.HasValue ? new SolidColorBrush(r.Item2.Value) : new SolidColorBrush(); SrcValueTextBox.Document.Blocks.First().ContentStart.Paragraph.Inlines.Add(new Run(r.Item1) { Background = bc }); } foreach (var r in dstRange) { var bc = r.Item2.HasValue ? new SolidColorBrush(r.Item2.Value) : new SolidColorBrush(); DstValueTextBox.Document.Blocks.First().ContentStart.Paragraph.Inlines.Add(new Run(r.Item1) { Background = bc }); } }
private static Dictionary <int, ExcelColumnStatus> CreateColumnStatusMap( IEnumerable <ExcelColumn> srcColumns, IEnumerable <ExcelColumn> dstColumns, ExcelSheetDiffConfig config) { var option = new DiffOption <ExcelColumn>(); if (config.SrcHeaderIndex >= 0) { option.EqualityComparer = new HeaderComparer(); foreach (var sc in srcColumns) { sc.HeaderIndex = config.SrcHeaderIndex; } } if (config.DstHeaderIndex >= 0) { foreach (var dc in dstColumns) { dc.HeaderIndex = config.DstHeaderIndex; } } var results = DiffUtil.Diff(srcColumns, dstColumns, option); results = DiffUtil.Order(results, DiffOrderType.LazyDeleteFirst); results = DiffUtil.OptimizeCaseDeletedFirst(results); var ret = new Dictionary <int, ExcelColumnStatus>(); var columnIndex = 0; foreach (var result in results) { var status = ExcelColumnStatus.None; if (result.Status == DiffStatus.Deleted) { status = ExcelColumnStatus.Deleted; } else if (result.Status == DiffStatus.Inserted) { status = ExcelColumnStatus.Inserted; } ret.Add(columnIndex, status); columnIndex++; } return(ret); }
List <DiffResult <string> > DiffSheetRow(ISheet sheet1, int row1, ISheet sheet2, int row2, SheetDiffStatus status, out int maxLineCount) { var list1 = new List <string>(); var list2 = new List <string>(); maxLineCount = 0; if (sheet1.GetRow(row1) != null) { var row = sheet1.GetRow(row1); var columnCount = books["src"].SheetValideColumn[sheet1.SheetName]; for (int i = 0; i < columnCount; ++i) { var value = Util.GetCellValue(row.GetCell(i)); maxLineCount = Math.Max(maxLineCount, value.Count((c) => { return(c == '\n'); }) + 1); list1.Add(value); } } if (sheet2.GetRow(row2) != null) { var row = sheet2.GetRow(row2); var columnCount = books["dst"].SheetValideColumn[sheet2.SheetName]; for (int i = 0; i < columnCount; ++i) { var value = Util.GetCellValue(row.GetCell(i)); maxLineCount = Math.Max(maxLineCount, value.Count((c) => { return(c == '\n'); }) + 1); list2.Add(value); } } var diff = DiffUtil.Diff(list1, list2); //var optimized = diff.ToList();// DiffUtil.OptimizeCaseDeletedFirst(diff); var optimized = DiffUtil.OptimizeCaseDeletedFirst(diff); optimized = DiffUtil.OptimizeCaseInsertedFirst(optimized); var tlist = optimized.ToList(); optimized = DiffUtil.OptimizeShift(tlist, false); optimized = DiffUtil.OptimizeShift(optimized, true); return(optimized.ToList()); }
// 把第一列认为是id列,检查增删, <value, 行id> List <DiffResult <string2int> > GetIDDiffList(ISheet sheet1, ISheet sheet2, int checkCellCount, bool addRowID = false) { var list1 = new List <string2int>(); var list2 = new List <string2int>(); var nameHash = new HashSet <string>(); var startIdx = DiffStartIdx(); // 尝试找一个id不会重复的前几列的值作为key for (int i = startIdx; ; i++) { var row = sheet1.GetRow(i); if (row == null || !Util.CheckValideRow(row)) { books["src"].SheetValideRow[sheet1.SheetName] = i; break; } ; var val = ""; for (var j = 0; j < checkCellCount; ++j) { val += Util.GetCellValue(row.GetCell(j)); } var hash_val = val; if (addRowID) { hash_val = hash_val + "." + i; } if (nameHash.Contains(hash_val)) { if (checkCellCount < 6) { return(GetIDDiffList(sheet1, sheet2, checkCellCount + 1, addRowID)); } else { // 已经找不到能作为key的了。把id和行号连一块 return(GetIDDiffList(sheet1, sheet2, 1, true)); } } nameHash.Add(hash_val); list1.Add(new string2int(val, i)); } list1.Sort(delegate(string2int a, string2int b) { var cmp = a.Key.CompareTo(b.Key); if (cmp == 0) { return(a.Value.CompareTo(b.Value)); } return(cmp); }); nameHash.Clear(); for (int i = startIdx; ; i++) { var row = sheet2.GetRow(i); if (row == null || !Util.CheckValideRow(row)) { books["dst"].SheetValideRow[sheet2.SheetName] = i; break; } var val = ""; for (var j = 0; j < checkCellCount; ++j) { val += Util.GetCellValue(row.GetCell(j)); } var hash_val = val; if (addRowID) { hash_val = hash_val + "." + i; } if (nameHash.Contains(hash_val)) { if (checkCellCount < 6) { return(GetIDDiffList(sheet1, sheet2, checkCellCount + 1, addRowID)); } else { // 已经找不到能作为key的了。把id和行号连一块 return(GetIDDiffList(sheet1, sheet2, 1, true)); } } nameHash.Add(hash_val); list2.Add(new string2int(val, i)); } list2.Sort(delegate(string2int a, string2int b) { var cmp = a.Key.CompareTo(b.Key); if (cmp == 0) { return(a.Value.CompareTo(b.Value)); } return(cmp); }); var option = new DiffOption <string2int>(); option.Optimize = false; option.EqualityComparer = new SheetIDComparer(); var result = DiffUtil.Diff(list1, list2, option); //var optimize = result.ToList();// var optimize = DiffUtil.OptimizeCaseDeletedFirst(result); return(optimize.ToList()); }
public void Diff(string file1, string file2, bool resetInitFile = true) { if (string.IsNullOrEmpty(file1) || string.IsNullOrEmpty(file2)) { return; } if (resetInitFile) { SrcFile = file1; DstFile = file2; } string oldsheetName = null; if (books.ContainsKey("src")) { oldsheetName = books["src"].sheetname; } var src = InitWorkWrap(file1); var dst = InitWorkWrap(file2); var option = new DiffOption <SheetNameCombo>(); option.EqualityComparer = new SheetNameComboComparer(); var result = DiffUtil.Diff(src.sheetNameCombos, dst.sheetNameCombos, option); //diffSheetName = result.ToList();// diffSheetName = DiffUtil.OptimizeCaseDeletedFirst(result).ToList(); books["src"] = src; books["dst"] = dst; var srcSheetID = -1; var dstSheetID = -1; for (int i = 0; i < diffSheetName.Count; ++i) { var sheetname = diffSheetName[i]; var name = sheetname.Obj1 == null ? sheetname.Obj2.Name : sheetname.Obj1.Name; // 只有sheet名字一样的可以diff, 先这么处理 if (sheetname.Status == DiffStatus.Equal) { var sheet1 = sheetname.Obj1.ID; var sheet2 = sheetname.Obj2.ID; sheetsDiff[name] = DiffSheet(src.book.GetSheetAt(sheet1), dst.book.GetSheetAt(sheet2)); if (sheetsDiff[name] != null) { oldsheetName = sheetname.Obj1.Name; var sheetidx = 0; if (!string.IsNullOrEmpty(oldsheetName)) { sheetidx = src.book.GetSheetIndex(oldsheetName); } if (sheetsDiff[name].changed || srcSheetID == -1) { src.sheet = sheetidx; srcSheetID = sheetidx; } if (!string.IsNullOrEmpty(oldsheetName)) { sheetidx = dst.book.GetSheetIndex(oldsheetName); } if (sheetsDiff[name].changed || dstSheetID == -1) { dst.sheet = sheetidx; dstSheetID = sheetidx; } } } } // refresh ui SrcFilePath.Content = file1; DstFilePath.Content = file2; SrcFileSheetsCombo.Items.Clear(); foreach (var item in src.sheetCombo) { int index = diffSheetName.FindIndex(a => a.Obj1 != null && a.Obj1.ID == (item.Content as SheetNameCombo).ID); SolidColorBrush color = null; DiffStatus status = diffSheetName[index].Status; if (status != DiffStatus.Equal) { color = Util.GetColorByDiffStatus(status); } else { var name = diffSheetName[index].Obj1.Name; color = Util.GetColorByDiffStatus(sheetsDiff.ContainsKey(name) && sheetsDiff[name] != null && sheetsDiff[name].changed ? DiffStatus.Modified : DiffStatus.Equal); } if (color != null) { item.Background = color; } SrcFileSheetsCombo.Items.Add(item); } var comboidx = src.ItemID2ComboIdx[src.sheet]; SrcFileSheetsCombo.SelectedItem = src.sheetCombo[comboidx]; DstFileSheetsCombo.Items.Clear(); foreach (var item in dst.sheetCombo) { int index = diffSheetName.FindIndex(a => a.Obj2 != null && a.Obj2.ID == (item.Content as SheetNameCombo).ID); SolidColorBrush color = null; DiffStatus status = diffSheetName[index].Status; if (status != DiffStatus.Equal) { color = Util.GetColorByDiffStatus(status); } else { var name = diffSheetName[index].Obj1.Name; color = Util.GetColorByDiffStatus(sheetsDiff.ContainsKey(name) && sheetsDiff[name] != null && sheetsDiff[name].changed ? DiffStatus.Modified : DiffStatus.Equal); } if (color != null) { item.Background = color; } DstFileSheetsCombo.Items.Add(item); } comboidx = dst.ItemID2ComboIdx[dst.sheet]; DstFileSheetsCombo.SelectedItem = dst.sheetCombo[comboidx]; DstDataGrid.RefreshData(); SrcDataGrid.RefreshData(); }
SheetDiffStatus DiffSheet(ISheet src, ISheet dst, SheetDiffStatus status = null) { status = status ?? new SheetDiffStatus(); bool changed = false; var head1 = GetHeaderStrList(src); var head2 = GetHeaderStrList(dst); if (head1 == null || head2 == null) { return(null); } var diff = NetDiff.DiffUtil.Diff(head1, head2); //var optimized = diff.ToList();// NetDiff.DiffUtil.OptimizeCaseDeletedFirst(diff); var optimized = DiffUtil.OptimizeCaseDeletedFirst(diff); changed = changed || optimized.Any(a => a.Status != DiffStatus.Equal); status.diffHead = new SheetRowDiff() { diffcells = optimized.ToList() }; status.column2diff1 = new Dictionary <int, int[]>(); status.column2diff2 = new Dictionary <int, int[]>(); status.column2diff1[0] = getColumn2Diff(status.diffHead.diffcells, true); status.column2diff2[0] = getColumn2Diff(status.diffHead.diffcells, false); books["src"].SheetValideColumn[src.SheetName] = head1.Count; books["dst"].SheetValideColumn[dst.SheetName] = head2.Count; status.diffFistColumn = GetIDDiffList(src, dst, 1); changed = changed || status.diffFistColumn.Any(a => a.Status != DiffStatus.Equal); status.diffSheet = new List <SheetRowDiff>(); status.rowID2DiffMap1 = new Dictionary <int, int>(); status.rowID2DiffMap2 = new Dictionary <int, int>(); status.Diff2RowID1 = new Dictionary <int, int>(); status.Diff2RowID2 = new Dictionary <int, int>(); status.RowEdited1 = status.RowEdited1 ?? new Dictionary <int, Dictionary <int, CellEditMode> >(); status.RowEdited2 = status.RowEdited2 ?? new Dictionary <int, Dictionary <int, CellEditMode> >(); foreach (var diffkv in status.diffFistColumn) { var rowid1 = diffkv.Obj1.Value; var rowid2 = diffkv.Obj2.Value; if (diffkv.Obj1.Key == null) { // 创建新行,方便比较 rowid1 = -1; } if (diffkv.Obj2.Key == null) { rowid2 = -1; } var diffrow = DiffSheetRow(src, rowid1, dst, rowid2, status); if (diffkv.Obj1.Key == null) { // 创建新行,方便比较,放在后面是为了保证diff的时候是new,delete的形式,而不是modify rowid1 = books["src"].SheetValideRow[src.SheetName]; //src.CreateRow(rowid1); } if (diffkv.Obj2.Key == null) { rowid2 = books["dst"].SheetValideRow[dst.SheetName]; //dst.CreateRow(rowid2); } status.column2diff1[rowid1] = getColumn2Diff(diffrow, true); status.column2diff2[rowid2] = getColumn2Diff(diffrow, false); int diffIdx = status.diffSheet.Count; status.rowID2DiffMap1[rowid1] = diffIdx; status.rowID2DiffMap2[rowid2] = diffIdx; status.Diff2RowID1[diffIdx] = rowid1; status.Diff2RowID2[diffIdx] = rowid2; if (!status.RowEdited1.ContainsKey(rowid1)) { status.RowEdited1[rowid1] = new Dictionary <int, CellEditMode>(); } if (!status.RowEdited2.ContainsKey(rowid2)) { status.RowEdited2[rowid2] = new Dictionary <int, CellEditMode>(); } var rowdiff = new SheetRowDiff(); rowdiff.diffcells = diffrow; rowdiff.changed = diffrow.Any(a => a.Status != DiffStatus.Equal); if (rowdiff.changed) { rowdiff.diffcell_details = new List <List <DiffResult <char> > >(); foreach (var cell in diffrow) { if (cell.Status == DiffStatus.Modified) { var cell_diff = NetDiff.DiffUtil.Diff(cell.Obj1, cell.Obj2); //var optimized = diff.ToList();// NetDiff.DiffUtil.OptimizeCaseDeletedFirst(diff); var opt_cell_diff = DiffUtil.OptimizeCaseDeletedFirst(cell_diff); rowdiff.diffcell_details.Add(opt_cell_diff.ToList()); } else { rowdiff.diffcell_details.Add(null); } } } status.diffSheet.Add(rowdiff); changed = changed || rowdiff.changed; if (changed) { changed = true; } } status.changed = changed; return(status); }
public static ExcelSheetDiff Diff(ExcelSheet src, ExcelSheet dst, ExcelSheetDiffConfig config) { var srcColumns = src.CreateColumns(); var dstColumns = dst.CreateColumns(); var columnStatusMap = CreateColumnStatusMap(srcColumns, dstColumns, config); var option = new DiffOption <ExcelRow>(); option.EqualityComparer = new RowComparer(new HashSet <int>(columnStatusMap.Where(i => i.Value != ExcelColumnStatus.None).Select(i => i.Key))); // 这里实际上计算的是有没有插入新列 foreach (var row in src.Rows.Values) { var shifted = new List <ExcelCell>(); var index = 0; var queue = new Queue <ExcelCell>(row.Cells); while (queue.Any()) { if (columnStatusMap[index] == ExcelColumnStatus.Inserted) { shifted.Add(new ExcelCell(string.Empty, 0, 0)); } else { shifted.Add(queue.Dequeue()); } index++; } row.UpdateCells(shifted); } foreach (var row in dst.Rows.Values) { var shifted = new List <ExcelCell>(); var index = 0; var queue = new Queue <ExcelCell>(row.Cells); while (queue.Any()) { if (columnStatusMap[index] == ExcelColumnStatus.Deleted) { shifted.Add(new ExcelCell(string.Empty, 0, 0)); } else { shifted.Add(queue.Dequeue()); } index++; } row.UpdateCells(shifted); } var r = DiffUtil.Diff(src.Rows.Values, dst.Rows.Values, option); r = DiffUtil.Order(r, DiffOrderType.LazyDeleteFirst); var resultArray = DiffUtil.OptimizeCaseDeletedFirst(r).ToArray(); if (resultArray.Length > 10000) { var count = 0; var indices = Enumerable.Range(0, 100).ToList(); foreach (var result in resultArray) { if (result.Status != DiffStatus.Equal) { indices.AddRange(Enumerable.Range(Math.Max(0, count - 100), 200)); } count++; } indices = indices.Distinct().ToList(); resultArray = indices.Where(i => i < resultArray.Length).Select(i => resultArray[i]).ToArray(); } var sheetDiff = new ExcelSheetDiff(src, dst); DiffCells(resultArray, sheetDiff, columnStatusMap); return(sheetDiff); }