public static ClosedSets LoadOriginalMatches(Row[] allData) { IEnumerable <string> lines = File.ReadLines(@"..\..\..\..\submission1.csv"); ClosedSets closedSets = new ClosedSets(allData); foreach (string line in lines) { var eidsAsStrings = line.Split(','); List <int> eids = new List <int>(); for (int i = 0; i < eidsAsStrings.Length - 1; i++) { int eid; int.TryParse(eidsAsStrings[i], out eid); eids.Add(eid); } closedSets.AddMatch(eids); } return(closedSets); }
public List <int> AddMatches(string label, Row[] data, Func <Row, string> groupingValue, int softEqualsCount, Func <Row, Row, int> softEquals, ref ClosedSets matches, int originalNumberOfMatches) { _newMatchingRows.Add($"STARTING {label} MATCHES"); List <int> toReturn = new List <int>(); Console.WriteLine(); Console.WriteLine(label); var grouped = data.GroupBy(groupingValue); int thrownOutCounter = 0; int addedCounter = 0; int modifiedCounter = 0; foreach (var group in grouped) { if (group.Count() > 100) { if (_printLargeGroupValues) { Console.WriteLine(group.Key); } continue; } if (group.Count() < 2) { continue; } if (group.Key == "BADFORMAT") // Skip unentered or cleaned data { continue; } // Loop over each pair in the group, test soft equality, and add if appropriate foreach (Row row1 in group) { foreach (Row row2 in group) { if (row2 != row1) { if (softEquals(row1, row2) >= softEqualsCount) { if (matches.AddMatch(row1, row2)) { addedCounter++; if (_printActuals) { //PrintingLibrary.PrintPair(row1, row2); _newMatchingRows.Add(row1.ToString()); _newMatchingRows.Add(row2.ToString()); _newMatchingRows.Add(""); } } } else { thrownOutCounter++; if (_printErrors) { PrintingLibrary.PrintPair(row1, row2); } } } } } } Console.WriteLine($"Groups thrown out: {thrownOutCounter}"); Console.WriteLine($"Match added: {addedCounter}"); Console.WriteLine($"Match modified: {modifiedCounter}"); Console.WriteLine($"Cumulative Matches Found: {matches.NumberOfMatches - originalNumberOfMatches}"); PrintingLibrary.PrintRemainingRowCount(data, matches); return(toReturn); }