public static ConditionedDistribution SeparateGenderCalculation(Row[] allData, List <List <int> > matches) { ConditionedDistribution toReturn = new ConditionedDistribution(); var eidToRow = new Dictionary <int, Row>(); foreach (var row in allData) { eidToRow[row.EnterpriseID] = row; } long[] givenMatch = new long[3]; foreach (var set in matches) { for (int i = 0; i < set.Count; i++) { for (int j = i + 1; j < set.Count; j++) { var ri = eidToRow[set[i]]; var rj = eidToRow[set[j]]; if (ri.GENDER == "" || rj.GENDER == "") { givenMatch[0]++; } else if (ri.GENDER == rj.GENDER) { givenMatch[1]++; } else { givenMatch[2]++; } } } } long blank = allData.Count(r => r.GENDER == ""); long male = allData.Count(r => r.GENDER == "M"); long female = allData.Count(r => r.GENDER == "F"); long[] givenNoMatch = new long[3]; givenNoMatch[0] = blank * (blank - 1) / 2 + blank * (male + female) - givenMatch[0]; givenNoMatch[1] = male * (male - 1) / 2 + female * (female - 1) / 2 - givenMatch[1]; givenNoMatch[2] = male * female - givenMatch[2]; toReturn.GivenMatch = CountsToDistribution(givenMatch); toReturn.GivenNoMatch = CountsToDistribution(givenNoMatch); return(toReturn); }
public static ConditionedDistribution ComputeConditionedDistribution(List <List <int> > knownMatches, RowMatchObject fieldMatches, int totalRowCount) { int largestDistance = fieldMatches.Matches.MatchArray.Max(l => l.Max(e => e.Distance)); var distanceCounts = DistanceCounts(fieldMatches, totalRowCount, largestDistance); var distanceCountsGivenMatched = DistanceCountsGivenMatched(knownMatches, fieldMatches, largestDistance); long[] distanceCountsGivenUnmatched = new long[largestDistance + 2]; for (int i = 0; i < largestDistance + 2; i++) { distanceCountsGivenUnmatched[i] = distanceCounts[i] - distanceCountsGivenMatched[i]; } ConditionedDistribution toReturn = new ConditionedDistribution(); toReturn.GivenMatch = CountsToDistribution(distanceCountsGivenMatched); toReturn.GivenNoMatch = CountsToDistribution(distanceCountsGivenUnmatched); return(toReturn); }