public void SpecifiedComparer() { var str1 = "abc"; var str2 = "dBf"; var option = new DiffOption <char>(); option.EqualityComparer = new CaseInsensitiveComparer(); var results = DiffUtil.Diff(str1, str2, option); results = DiffUtil.Order(results, DiffOrderType.LazyDeleteFirst); Assert.AreEqual(DiffStatus.Deleted, results.ElementAt(0).Status); Assert.AreEqual(DiffStatus.Inserted, results.ElementAt(1).Status); Assert.AreEqual(DiffStatus.Equal, results.ElementAt(2).Status); Assert.AreEqual(DiffStatus.Deleted, results.ElementAt(3).Status); Assert.AreEqual(DiffStatus.Inserted, results.ElementAt(4).Status); }
public void None_None_LimitOptionWorks() { var str1 = Enumerable.Repeat("Good dog", 1000).SelectMany(c => c); var str2 = Enumerable.Repeat("Bad dog", 1000).SelectMany(c => c); var option = new DiffOption <char> { Limit = 1000 }; var sw = new System.Diagnostics.Stopwatch(); sw.Start(); DiffUtil.OptimizedDiff(str1, str2); sw.Stop(); var time1 = sw.Elapsed; sw.Restart(); DiffUtil.OptimizedDiff(str1, str2, option); sw.Stop(); var time2 = sw.Elapsed; Assert.IsTrue(time2 < time1); }
// 把第一列认为是id列,检查增删, <value, 行id> List <DiffResult <string2int> > GetIDDiffList(ISheet sheet1, ISheet sheet2, int checkCellCount, bool addRowID = false) { var list1 = new List <string2int>(); var list2 = new List <string2int>(); var nameHash = new HashSet <string>(); var startIdx = DiffStartIdx(); // 尝试找一个id不会重复的前几列的值作为key for (int i = startIdx; ; i++) { var row = sheet1.GetRow(i); if (row == null || !Util.CheckValideRow(row)) { books["src"].SheetValideRow[sheet1.SheetName] = i; break; } ; var val = ""; for (var j = 0; j < checkCellCount; ++j) { val += Util.GetCellValue(row.GetCell(j)); } var hash_val = val; if (addRowID) { hash_val = hash_val + "." + i; } if (nameHash.Contains(hash_val)) { if (checkCellCount < 6) { return(GetIDDiffList(sheet1, sheet2, checkCellCount + 1, addRowID)); } else { // 已经找不到能作为key的了。把id和行号连一块 return(GetIDDiffList(sheet1, sheet2, 1, true)); } } nameHash.Add(hash_val); list1.Add(new string2int(val, i)); } list1.Sort(delegate(string2int a, string2int b) { var cmp = a.Key.CompareTo(b.Key); if (cmp == 0) { return(a.Value.CompareTo(b.Value)); } return(cmp); }); nameHash.Clear(); for (int i = startIdx; ; i++) { var row = sheet2.GetRow(i); if (row == null || !Util.CheckValideRow(row)) { books["dst"].SheetValideRow[sheet2.SheetName] = i; break; } var val = ""; for (var j = 0; j < checkCellCount; ++j) { val += Util.GetCellValue(row.GetCell(j)); } var hash_val = val; if (addRowID) { hash_val = hash_val + "." + i; } if (nameHash.Contains(hash_val)) { if (checkCellCount < 6) { return(GetIDDiffList(sheet1, sheet2, checkCellCount + 1, addRowID)); } else { // 已经找不到能作为key的了。把id和行号连一块 return(GetIDDiffList(sheet1, sheet2, 1, true)); } } nameHash.Add(hash_val); list2.Add(new string2int(val, i)); } list2.Sort(delegate(string2int a, string2int b) { var cmp = a.Key.CompareTo(b.Key); if (cmp == 0) { return(a.Value.CompareTo(b.Value)); } return(cmp); }); var option = new DiffOption <string2int>(); option.Optimize = false; option.EqualityComparer = new SheetIDComparer(); var result = DiffUtil.Diff(list1, list2, option); //var optimize = result.ToList();// var optimize = DiffUtil.OptimizeCaseDeletedFirst(result); return(optimize.ToList()); }
public void Diff(string file1, string file2, bool resetInitFile = true) { if (string.IsNullOrEmpty(file1) || string.IsNullOrEmpty(file2)) { return; } if (resetInitFile) { SrcFile = file1; DstFile = file2; } string oldsheetName = null; if (books.ContainsKey("src")) { oldsheetName = books["src"].sheetname; } var src = InitWorkWrap(file1); var dst = InitWorkWrap(file2); var option = new DiffOption <SheetNameCombo>(); option.EqualityComparer = new SheetNameComboComparer(); var result = DiffUtil.Diff(src.sheetNameCombos, dst.sheetNameCombos, option); //diffSheetName = result.ToList();// diffSheetName = DiffUtil.OptimizeCaseDeletedFirst(result).ToList(); books["src"] = src; books["dst"] = dst; var srcSheetID = -1; var dstSheetID = -1; for (int i = 0; i < diffSheetName.Count; ++i) { var sheetname = diffSheetName[i]; var name = sheetname.Obj1 == null ? sheetname.Obj2.Name : sheetname.Obj1.Name; // 只有sheet名字一样的可以diff, 先这么处理 if (sheetname.Status == DiffStatus.Equal) { var sheet1 = sheetname.Obj1.ID; var sheet2 = sheetname.Obj2.ID; sheetsDiff[name] = DiffSheet(src.book.GetSheetAt(sheet1), dst.book.GetSheetAt(sheet2)); if (sheetsDiff[name] != null) { oldsheetName = sheetname.Obj1.Name; var sheetidx = 0; if (!string.IsNullOrEmpty(oldsheetName)) { sheetidx = src.book.GetSheetIndex(oldsheetName); } if (sheetsDiff[name].changed || srcSheetID == -1) { src.sheet = sheetidx; srcSheetID = sheetidx; } if (!string.IsNullOrEmpty(oldsheetName)) { sheetidx = dst.book.GetSheetIndex(oldsheetName); } if (sheetsDiff[name].changed || dstSheetID == -1) { dst.sheet = sheetidx; dstSheetID = sheetidx; } } } } // refresh ui SrcFilePath.Content = file1; DstFilePath.Content = file2; SrcFileSheetsCombo.Items.Clear(); foreach (var item in src.sheetCombo) { int index = diffSheetName.FindIndex(a => a.Obj1 != null && a.Obj1.ID == (item.Content as SheetNameCombo).ID); SolidColorBrush color = null; DiffStatus status = diffSheetName[index].Status; if (status != DiffStatus.Equal) { color = Util.GetColorByDiffStatus(status); } else { var name = diffSheetName[index].Obj1.Name; color = Util.GetColorByDiffStatus(sheetsDiff.ContainsKey(name) && sheetsDiff[name] != null && sheetsDiff[name].changed ? DiffStatus.Modified : DiffStatus.Equal); } if (color != null) { item.Background = color; } SrcFileSheetsCombo.Items.Add(item); } var comboidx = src.ItemID2ComboIdx[src.sheet]; SrcFileSheetsCombo.SelectedItem = src.sheetCombo[comboidx]; DstFileSheetsCombo.Items.Clear(); foreach (var item in dst.sheetCombo) { int index = diffSheetName.FindIndex(a => a.Obj2 != null && a.Obj2.ID == (item.Content as SheetNameCombo).ID); SolidColorBrush color = null; DiffStatus status = diffSheetName[index].Status; if (status != DiffStatus.Equal) { color = Util.GetColorByDiffStatus(status); } else { var name = diffSheetName[index].Obj1.Name; color = Util.GetColorByDiffStatus(sheetsDiff.ContainsKey(name) && sheetsDiff[name] != null && sheetsDiff[name].changed ? DiffStatus.Modified : DiffStatus.Equal); } if (color != null) { item.Background = color; } DstFileSheetsCombo.Items.Add(item); } comboidx = dst.ItemID2ComboIdx[dst.sheet]; DstFileSheetsCombo.SelectedItem = dst.sheetCombo[comboidx]; DstDataGrid.RefreshData(); SrcDataGrid.RefreshData(); }
public static ExcelSheetDiff Diff(ExcelSheet src, ExcelSheet dst, ExcelSheetDiffConfig config) { var srcColumns = src.CreateColumns(); var dstColumns = dst.CreateColumns(); var columnStatusMap = CreateColumnStatusMap(srcColumns, dstColumns, config); var option = new DiffOption <ExcelRow>(); option.EqualityComparer = new RowComparer(new HashSet <int>(columnStatusMap.Where(i => i.Value != ExcelColumnStatus.None).Select(i => i.Key))); // 这里实际上计算的是有没有插入新列 foreach (var row in src.Rows.Values) { var shifted = new List <ExcelCell>(); var index = 0; var queue = new Queue <ExcelCell>(row.Cells); while (queue.Any()) { if (columnStatusMap[index] == ExcelColumnStatus.Inserted) { shifted.Add(new ExcelCell(string.Empty, 0, 0)); } else { shifted.Add(queue.Dequeue()); } index++; } row.UpdateCells(shifted); } foreach (var row in dst.Rows.Values) { var shifted = new List <ExcelCell>(); var index = 0; var queue = new Queue <ExcelCell>(row.Cells); while (queue.Any()) { if (columnStatusMap[index] == ExcelColumnStatus.Deleted) { shifted.Add(new ExcelCell(string.Empty, 0, 0)); } else { shifted.Add(queue.Dequeue()); } index++; } row.UpdateCells(shifted); } var r = DiffUtil.Diff(src.Rows.Values, dst.Rows.Values, option); r = DiffUtil.Order(r, DiffOrderType.LazyDeleteFirst); var resultArray = DiffUtil.OptimizeCaseDeletedFirst(r).ToArray(); if (resultArray.Length > 10000) { var count = 0; var indices = Enumerable.Range(0, 100).ToList(); foreach (var result in resultArray) { if (result.Status != DiffStatus.Equal) { indices.AddRange(Enumerable.Range(Math.Max(0, count - 100), 200)); } count++; } indices = indices.Distinct().ToList(); resultArray = indices.Where(i => i < resultArray.Length).Select(i => resultArray[i]).ToArray(); } var sheetDiff = new ExcelSheetDiff(src, dst); DiffCells(resultArray, sheetDiff, columnStatusMap); return(sheetDiff); }
// 把第一列认为是id列,检查增删, <value, 行id> List <DiffResult <string2int> > GetIDDiffList(ISheet sheet1, ISheet sheet2, int checkCellCount, bool addRowID = false, int startCheckCell = 0) { var list1 = new List <string2int>(); var list2 = new List <string2int>(); var nameHash = new HashSet <string>(); var startIdx = DiffStartIdx(); bool allNum = checkCellCount == 1; int ignoreEmptyLine = config.EmptyLine; // 尝试找一个id不会重复的前几列的值作为key for (int i = startIdx; ; i++) { var row = sheet1.GetRow(i); if (row == null || !Util.CheckValideRow(row)) { if (ignoreEmptyLine-- > 0) { continue; } else { books["src"].SheetValideRow[sheet1.SheetName] = i; break; } } ; var val = ""; for (var j = startCheckCell; j < startCheckCell + checkCellCount; ++j) { if (row.GetCell(j) == null || row.GetCell(j).CellType != CellType.Numeric) { allNum = false; } val += Util.GetCellValue(row.GetCell(j)); } var hash_val = val; if (addRowID) { hash_val = hash_val + "." + i; } if (nameHash.Contains(hash_val)) { if (checkCellCount < 6) { return(GetIDDiffList(sheet1, sheet2, checkCellCount + 1, addRowID, startCheckCell)); } else { // 已经找不到能作为key的了。把id和行号连一块 return(GetIDDiffList(sheet1, sheet2, 1, true, startCheckCell)); } } nameHash.Add(hash_val); list1.Add(new string2int(val, i)); } nameHash.Clear(); ignoreEmptyLine = config.EmptyLine; for (int i = startIdx; ; i++) { var row = sheet2.GetRow(i); if (row == null || !Util.CheckValideRow(row)) { if (ignoreEmptyLine-- > 0) { continue; } else { books["dst"].SheetValideRow[sheet2.SheetName] = i; break; } } var val = ""; for (var j = startCheckCell; j < startCheckCell + checkCellCount; ++j) { if (row.GetCell(j) == null || row.GetCell(j).CellType != CellType.Numeric) { allNum = false; } val += Util.GetCellValue(row.GetCell(j)); } var hash_val = val; if (addRowID) { hash_val = hash_val + "." + i; } if (nameHash.Contains(hash_val)) { if (checkCellCount < 6) { return(GetIDDiffList(sheet1, sheet2, checkCellCount + 1, addRowID, startCheckCell)); } else { // 已经找不到能作为key的了。把id和行号连一块 return(GetIDDiffList(sheet1, sheet2, 1, true, startCheckCell)); } } nameHash.Add(hash_val); list2.Add(new string2int(val, i)); } list1.Sort(delegate(string2int a, string2int b) { int cmp = 0; if (allNum) { cmp = Double.Parse(a.Key).CompareTo(Double.Parse(b.Key)); } else { cmp = a.Key.CompareTo(b.Key); } if (cmp == 0) { return(a.Value.CompareTo(b.Value)); } return(cmp); }); list2.Sort(delegate(string2int a, string2int b) { int cmp = 0; if (allNum) { cmp = Double.Parse(a.Key).CompareTo(Double.Parse(b.Key)); } else { cmp = a.Key.CompareTo(b.Key); } if (cmp == 0) { return(a.Value.CompareTo(b.Value)); } return(cmp); }); var option = new DiffOption <string2int>(); option.EqualityComparer = new SheetIDComparer(); var result = DiffUtil.Diff(list1, list2, option); //var optimize = result.ToList();// // id列不应该把delete/add优化成modify // var optimize = DiffUtil.OptimizeCaseDeletedFirst(result); return(result.ToList()); }