예제 #1
0
        public static ConditionedDistribution SeparateGenderCalculation(Row[] allData, List <List <int> > matches)
        {
            ConditionedDistribution toReturn = new ConditionedDistribution();

            var eidToRow = new Dictionary <int, Row>();

            foreach (var row in allData)
            {
                eidToRow[row.EnterpriseID] = row;
            }


            long[] givenMatch = new long[3];
            foreach (var set in matches)
            {
                for (int i = 0; i < set.Count; i++)
                {
                    for (int j = i + 1; j < set.Count; j++)
                    {
                        var ri = eidToRow[set[i]];
                        var rj = eidToRow[set[j]];

                        if (ri.GENDER == "" || rj.GENDER == "")
                        {
                            givenMatch[0]++;
                        }
                        else if (ri.GENDER == rj.GENDER)
                        {
                            givenMatch[1]++;
                        }
                        else
                        {
                            givenMatch[2]++;
                        }
                    }
                }
            }

            long blank  = allData.Count(r => r.GENDER == "");
            long male   = allData.Count(r => r.GENDER == "M");
            long female = allData.Count(r => r.GENDER == "F");

            long[] givenNoMatch = new long[3];
            givenNoMatch[0] = blank * (blank - 1) / 2 + blank * (male + female) - givenMatch[0];
            givenNoMatch[1] = male * (male - 1) / 2 + female * (female - 1) / 2 - givenMatch[1];
            givenNoMatch[2] = male * female - givenMatch[2];

            toReturn.GivenMatch   = CountsToDistribution(givenMatch);
            toReturn.GivenNoMatch = CountsToDistribution(givenNoMatch);

            return(toReturn);
        }
예제 #2
0
        public static ConditionedDistribution ComputeConditionedDistribution(List <List <int> > knownMatches, RowMatchObject fieldMatches, int totalRowCount)
        {
            int largestDistance = fieldMatches.Matches.MatchArray.Max(l => l.Max(e => e.Distance));

            var distanceCounts             = DistanceCounts(fieldMatches, totalRowCount, largestDistance);
            var distanceCountsGivenMatched = DistanceCountsGivenMatched(knownMatches, fieldMatches, largestDistance);

            long[] distanceCountsGivenUnmatched = new long[largestDistance + 2];
            for (int i = 0; i < largestDistance + 2; i++)
            {
                distanceCountsGivenUnmatched[i] = distanceCounts[i] - distanceCountsGivenMatched[i];
            }

            ConditionedDistribution toReturn = new ConditionedDistribution();

            toReturn.GivenMatch   = CountsToDistribution(distanceCountsGivenMatched);
            toReturn.GivenNoMatch = CountsToDistribution(distanceCountsGivenUnmatched);


            return(toReturn);
        }