public void HarmonizePolarities(GenotypeDictionary dict1, GenotypeDictionary dict2, out GenotypeDictionary harmonizedDict1, out GenotypeDictionary harmonizedDict2)
        {
            //Harmonizes dict2 after dict1 and removes all entries from A/T or G/C experiments and incompatible
            //SNP types and items from SNPs which are monomorphic in both source.
            string                      tempExp, tempAlleles1, tempAlleles2;
            CIStringCollection          uniqueExps;
            Dictionary <string, string> allelesDict1, allelesDict2;
            int counter;

            //Find all experiments and their SNP types (the experiment names should be the same in both dictionaries, use dict1 here).
            uniqueExps   = new CIStringCollection();
            allelesDict1 = new Dictionary <string, string>(StringComparer.InvariantCultureIgnoreCase);
            allelesDict2 = new Dictionary <string, string>(StringComparer.InvariantCultureIgnoreCase);
            OnStatusChange("Extracting experiments and SNP types");
            counter = 0;
            foreach (string tempKey in dict1.Keys)
            {
                tempExp      = dict1.GetExperiment(tempKey);
                tempAlleles1 = dict1[tempKey].ToUpper();
                tempAlleles2 = dict2[tempKey].ToUpper();
                if (!uniqueExps.Contains(tempExp))
                {
                    //This experiment has not been processed before.
                    uniqueExps.Add(tempExp);
                    //Get the unique alleles from dictionary 1.
                    if (tempAlleles1.Substring(0, 1) != tempAlleles1.Substring(2, 1))
                    {
                        //The alleles are different, add both.
                        allelesDict1.Add(tempExp, tempAlleles1.Substring(0, 1) + tempAlleles1.Substring(2, 1));
                    }
                    else
                    {
                        //The alleles are the same, add only the first.
                        allelesDict1.Add(tempExp, tempAlleles1.Substring(0, 1));
                    }
                    //Get the unique alleles from dictionary 2.
                    if (tempAlleles2.Substring(0, 1) != tempAlleles2.Substring(2, 1))
                    {
                        //The alleles are different, add both.
                        allelesDict2.Add(tempExp, tempAlleles2.Substring(0, 1) + tempAlleles2.Substring(2, 1));
                    }
                    else
                    {
                        //The alleles are the same, add only the first.
                        allelesDict2.Add(tempExp, tempAlleles2.Substring(0, 1));
                    }
                }
                else
                {
                    //This experiment has been processed before.
                    //Add the first allele from dictionary one if it does not already exist in the string.
                    if (!allelesDict1[tempExp].Contains(tempAlleles1.Substring(0, 1)))
                    {
                        allelesDict1[tempExp] = allelesDict1[tempExp] + tempAlleles1.Substring(0, 1);
                    }
                    //Add the second allele from dictionary one if it does not already exist in the string.
                    if (!allelesDict1[tempExp].Contains(tempAlleles1.Substring(2, 1)))
                    {
                        allelesDict1[tempExp] = allelesDict1[tempExp] + tempAlleles1.Substring(2, 1);
                    }

                    //Add the first allele from dictionary two if it does not already exist in the string.
                    if (!allelesDict2[tempExp].Contains(tempAlleles2.Substring(0, 1)))
                    {
                        allelesDict2[tempExp] = allelesDict2[tempExp] + tempAlleles2.Substring(0, 1);
                    }
                    //Add the second allele from dictionary one if it does not already exist in the string.
                    if (!allelesDict2[tempExp].Contains(tempAlleles2.Substring(2, 1)))
                    {
                        allelesDict2[tempExp] = allelesDict2[tempExp] + tempAlleles2.Substring(2, 1);
                    }
                }
                counter++;
                if (counter % 100 == 0)
                {
                    OnStatusChange("Extracting experiments and SNP types (processed genotype " + counter + ")");
                }
            }

            //Remove data from SNPs with more than two alleles in source 1 from both dictionaries and save.
            MyRemovedExpTooManyAlleles1 = new CIStringCollection();
            OnStatusChange("Extracting harmonizable data step 1/6");
            counter = 0;
            foreach (string exp in allelesDict1.Keys)
            {
                if (allelesDict1[exp].Length > 2)
                {
                    dict1.RemoveExperiment(exp);
                    dict2.RemoveExperiment(exp);
                    MyRemovedExpTooManyAlleles1.Add(exp);
                }
                counter++;
                if (counter % 100 == 0)
                {
                    OnStatusChange("Extracting harmonizable data step 1/6 (processed experiment " + counter + ")");
                }
            }

            //Remove data from SNPs with more than two alleles in source 2 from both dictionaries and save.
            MyRemovedExpTooManyAlleles2 = new CIStringCollection();
            OnStatusChange("Extracting harmonizable data step 2/6");
            counter = 0;
            foreach (string exp in allelesDict2.Keys)
            {
                if (allelesDict2[exp].Length > 2)
                {
                    dict1.RemoveExperiment(exp);
                    dict2.RemoveExperiment(exp);
                    MyRemovedExpTooManyAlleles2.Add(exp);
                }
                counter++;
                if (counter % 100 == 0)
                {
                    OnStatusChange("Extracting harmonizable data step 2/6 (processed experiment " + counter + ")");
                }
            }

            //Remove data from SNPs which are monomorphic in both sources and save.
            MyRemovedExpMonomorphicInBoth = new CIStringCollection();
            OnStatusChange("Extracting harmonizable data step 3/6");
            counter = 0;
            foreach (string exp in allelesDict1.Keys)
            {
                if (allelesDict1[exp].Length == 1 && allelesDict2[exp].Length == 1)
                {
                    dict1.RemoveExperiment(exp);
                    dict2.RemoveExperiment(exp);
                    MyRemovedExpMonomorphicInBoth.Add(exp);
                }
                counter++;
                if (counter % 100 == 0)
                {
                    OnStatusChange("Extracting harmonizable data step 3/6 (processed experiment " + counter + ")");
                }
            }

            //Remove data from SNPs with incompatible SNP types from both dictionaries and save.
            MyRemovedExpIncompatibleSNPTypes = new CIStringCollection();
            OnStatusChange("Extracting harmonizable data step 4/6");
            counter = 0;
            foreach (string exp in allelesDict1.Keys)
            {
                if (!MyRemovedExpTooManyAlleles1.Contains(exp) && !MyRemovedExpTooManyAlleles2.Contains(exp) &&
                    !MyRemovedExpMonomorphicInBoth.Contains(exp))
                {
                    if (!this.IsCompatibleSNPTypes(allelesDict1[exp], allelesDict2[exp]))
                    {
                        dict1.RemoveExperiment(exp);
                        dict2.RemoveExperiment(exp);
                        MyRemovedExpIncompatibleSNPTypes.Add(exp);
                    }
                }
                counter++;
                if (counter % 100 == 0)
                {
                    OnStatusChange("Extracting harmonizable data step 4/6 (processed experiment " + counter + ")");
                }
            }

            //Remove data from SNPs with A/T or G/C SNPs from both dictionaries and save.
            MyRemovedExpATCG = new CIStringCollection();
            OnStatusChange("Extracting harmonizable data step 5/6");
            counter = 0;
            foreach (string exp in allelesDict1.Keys)
            {
                if (!MyRemovedExpTooManyAlleles1.Contains(exp) && !MyRemovedExpTooManyAlleles2.Contains(exp) &&
                    !MyRemovedExpMonomorphicInBoth.Contains(exp) && !MyRemovedExpIncompatibleSNPTypes.Contains(exp))
                {
                    if (allelesDict1[exp].Length > 1)
                    {
                        if (this.GetNumberOfATalleles(allelesDict1[exp]) != 1)
                        {
                            //Either 0 or 2 A or T alleles.
                            dict1.RemoveExperiment(exp);
                            dict2.RemoveExperiment(exp);
                            MyRemovedExpATCG.Add(exp);
                        }
                    }
                    else
                    {
                        if (this.GetNumberOfATalleles(allelesDict2[exp]) != 1)
                        {
                            dict1.RemoveExperiment(exp);
                            dict2.RemoveExperiment(exp);
                            MyRemovedExpATCG.Add(exp);
                        }
                    }
                }
                counter++;
                if (counter % 100 == 0)
                {
                    OnStatusChange("Extracting harmonizable data step 5/6 (processed experiment " + counter + ")");
                }
            }

            //Store only data from experiments passing all tests.
            harmonizedDict1 = new GenotypeDictionary();
            harmonizedDict2 = new GenotypeDictionary();
            OnStatusChange("Extracting harmonizable data step 6/6");
            counter = 0;
            foreach (string tempKey in dict1.Keys)
            {
                tempExp = dict1.GetExperiment(tempKey);
                if (!MyRemovedExpTooManyAlleles1.Contains(tempExp) &&
                    !MyRemovedExpTooManyAlleles2.Contains(tempExp) &&
                    !MyRemovedExpMonomorphicInBoth.Contains(tempExp) &&
                    !MyRemovedExpIncompatibleSNPTypes.Contains(tempExp) &&
                    !MyRemovedExpATCG.Contains(tempExp))
                {
                    harmonizedDict1.Add(tempKey, dict1[tempKey]);
                    harmonizedDict2.Add(tempKey, dict2[tempKey]);
                }
                counter++;
                if (counter % 100 == 0)
                {
                    OnStatusChange("Extracting harmonizable data step 6/6 (processed genotype " + counter + ")");
                }
            }


            //Go through dict1. If the SNP type for the current row is different than
            //the SNP type in dict2, convert the row in dict2.
            OnStatusChange("Harmonizing");
            counter = 0;
            foreach (string tempKey in harmonizedDict1.Keys)
            {
                tempExp      = harmonizedDict1.GetExperiment(tempKey);
                tempAlleles1 = allelesDict1[tempExp];
                tempAlleles2 = allelesDict2[tempExp];
                if (tempAlleles1.Length == 2 && tempAlleles2.Length == 2)
                {
                    if ((
                            (tempAlleles1.Substring(0, 1) == tempAlleles2.Substring(0, 1)) &&
                            (tempAlleles1.Substring(1, 1) == tempAlleles2.Substring(1, 1))
                            ) ||
                        (
                            (tempAlleles1.Substring(0, 1) == tempAlleles2.Substring(1, 1)) &&
                            (tempAlleles1.Substring(1, 1) == tempAlleles2.Substring(0, 1))
                        )
                        )
                    {
                        //They are the same.
                    }
                    else
                    {
                        harmonizedDict2[tempKey] = this.ReverseComplement(harmonizedDict2[tempKey]);
                    }
                }
                else if (tempAlleles1.Length == 1 && tempAlleles2.Length == 2)
                {
                    if (tempAlleles1 != tempAlleles2.Substring(0, 1) && tempAlleles1 != tempAlleles2.Substring(1, 1))
                    {
                        harmonizedDict2[tempKey] = this.ReverseComplement(harmonizedDict2[tempKey]);
                    }
                }
                else if (tempAlleles1.Length == 2 && tempAlleles2.Length == 1)
                {
                    if (tempAlleles2 != tempAlleles1.Substring(0, 1) && tempAlleles2 != tempAlleles1.Substring(1, 1))
                    {
                        harmonizedDict2[tempKey] = this.ReverseComplement(harmonizedDict2[tempKey]);
                    }
                }
                else
                {
                    throw new Exception("Inappropriate number of alleles detected when converting SNP types");
                }
                counter++;
                if (counter % 100 == 0)
                {
                    OnStatusChange("Harmonizing (processed genotype " + counter + ")");
                }
            }
        }
Ejemplo n.º 2
0
        public void Compare()
        {
            GenotypeDictionary unionDict1, unionDict2;
            GenotypeDictionary harmonizedDict1, harmonizedDict2;
            string             tempValue1, tempValue2;
            int counter;

            MyMissingIn1 = new GenotypeDictionary();
            MyMissingIn2 = new GenotypeDictionary();

            MyCompared  = new GenotypeResultPairCollection();
            MyIdentical = new GenotypeResultPairCollection();
            MyDifferent = new GenotypeResultPairCollection();

            unionDict1 = new GenotypeDictionary();
            unionDict2 = new GenotypeDictionary();

            MyHarmonizer = new ComparisonHarmonizer();

            //Sort out keys from results1 which are also in results2.
            counter = 0;
            OnStatusChange("Find common genotypes, step 1");
            foreach (string key1 in MySourceInfo1.ValidGenotypes.Keys)
            {
                if (MySourceInfo2.ValidGenotypes.Contains(key1))
                {
                    unionDict1.Add(key1, MySourceInfo1.ValidGenotypes[key1]);
                }
                else
                {
                    MyMissingIn2.Add(key1);
                }
                counter++;
                if (counter % 100 == 0)
                {
                    OnStatusChange("Find common genotypes, step 1 (processed genotype " + counter + ")");
                }
            }

            //Sort out keys from results2 which are also in results1.
            counter = 0;
            OnStatusChange("Find common genotypes, step 2");
            foreach (string key2 in MySourceInfo2.ValidGenotypes.Keys)
            {
                if (MySourceInfo1.ValidGenotypes.Contains(key2))
                {
                    unionDict2.Add(key2, MySourceInfo2.ValidGenotypes[key2]);
                }
                else
                {
                    MyMissingIn1.Add(key2);
                }
                counter++;
                if (counter % 100 == 0)
                {
                    OnStatusChange("Find common genotypes, step 2 (processed genotype " + counter + ")");
                }
            }

            //Make sure the two union dictionaries are of the same length.
            if (unionDict1.Count != unionDict2.Count)
            {
                throw new Exception("Comparison error. Lists are of unequal length.");
            }

            //Harmonize polarities if the settings say so.
            if (MySettings.Mode == InvestigationMode.Genotype && MySettings.HarmonizePolarities)
            {
                OnStatusChange("Harmonizing polarities");
                MyHarmonizer.MyStatusChangeHandler += new InvestigationStatusChangeHandler(MyHarmonizer_StatusChanged);
                MyHarmonizer.HarmonizePolarities(unionDict1, unionDict2, out harmonizedDict1, out harmonizedDict2);
                unionDict1 = harmonizedDict1;
                unionDict2 = harmonizedDict2;
            }

            //Perform comparison.
            counter = 0;
            OnStatusChange("Comparing genotypes");
            foreach (string key in unionDict1.Keys)
            {
                tempValue1 = unionDict1[key];
                tempValue2 = unionDict2[key];

                if (this.IsEqual(tempValue1, tempValue2))
                {
                    MyIdentical.Add(key, MySourceInfo1.ValidGenotypes[key], MySourceInfo2.ValidGenotypes[key]);
                }
                else
                {
                    MyDifferent.Add(key, MySourceInfo1.ValidGenotypes[key], MySourceInfo2.ValidGenotypes[key]);
                }
                MyCompared.Add(key, MySourceInfo1.ValidGenotypes[key], MySourceInfo2.ValidGenotypes[key]);
                counter++;
                if (counter % 100 == 0)
                {
                    OnStatusChange("Comparing genotypes (processed genotype " + counter + ")");
                }
            }
        }
Ejemplo n.º 3
0
        public void Load(DataTable results, InvestigationMode cmpType)
        {
            string                   tempItem, tempExp, tempAlleles;
            GenotypeDictionary       tempDuplFailures, tempGenotypeDict, tempDuplTestDict;
            GenotypeCollection       tempNoResults;
            GenotypeResultCollection tempInvalidAlleles;

            MyTotalGenotypesCount = results.Rows.Count;

            //Initiate dictionaries and collections.
            tempGenotypeDict = new GenotypeDictionary();
            tempDuplTestDict = new GenotypeDictionary();
            tempDuplFailures = new GenotypeDictionary();

            tempInvalidAlleles = new GenotypeResultCollection();

            tempNoResults = new GenotypeCollection();

            //First perform duplicate test.
            foreach (DataRow row in results.Rows)
            {
                //Read values.
                tempItem    = row["Item"].ToString();
                tempExp     = row["Experiment"].ToString();
                tempAlleles = row["Alleles"].ToString();

                if (tempAlleles.ToUpper() != MyMissingValueCode.ToUpper())
                {
                    if (this.IsValidAlleleCombination(tempAlleles, cmpType))
                    {
                        if (tempDuplTestDict.Contains(tempItem, tempExp))
                        {
                            //This key already exists, check if the alleles are different.
                            if (tempDuplTestDict[tempItem, tempExp].ToUpper() != tempAlleles.ToUpper())
                            {
                                //Remember this key as having a duplicate failure.
                                if (!tempDuplFailures.Contains(tempItem, tempExp))
                                {
                                    tempDuplFailures.Add(tempItem, tempExp, "");
                                }
                            }
                        }
                        else
                        {
                            //Did not exist already.
                            tempDuplTestDict.Add(tempItem, tempExp, tempAlleles);
                        }
                    }
                }
            }

            //Now go through the values again to avoid those with duplicate failures.
            foreach (DataRow row in results.Rows)
            {
                //Read values.
                tempItem    = row["Item"].ToString();
                tempExp     = row["Experiment"].ToString();
                tempAlleles = row["Alleles"].ToString();

                if (tempAlleles.ToUpper() == MyMissingValueCode.ToUpper())
                {
                    //Skipped because of no result.
                    tempNoResults.Add(tempItem, tempExp);
                }
                else if (!this.IsValidAlleleCombination(tempAlleles, cmpType))
                {
                    //Skipped because invalid alleles.
                    tempInvalidAlleles.Add(tempItem, tempExp, tempAlleles);
                }
                else if (!tempDuplFailures.Contains(tempItem, tempExp) && !tempGenotypeDict.Contains(tempItem, tempExp))
                {
                    //Neither a duplicate failure nor already existing, go ahead and add.
                    tempGenotypeDict.Add(tempItem, tempExp, tempAlleles);
                }
            }

            MyValidGenotypes = tempGenotypeDict;
            MyDuplFailures   = tempDuplFailures;
            MyNoResults      = tempNoResults;
            MyInvalidAlleles = tempInvalidAlleles;
        }