public void HarmonizePolarities(GenotypeDictionary dict1, GenotypeDictionary dict2, out GenotypeDictionary harmonizedDict1, out GenotypeDictionary harmonizedDict2) { //Harmonizes dict2 after dict1 and removes all entries from A/T or G/C experiments and incompatible //SNP types and items from SNPs which are monomorphic in both source. string tempExp, tempAlleles1, tempAlleles2; CIStringCollection uniqueExps; Dictionary <string, string> allelesDict1, allelesDict2; int counter; //Find all experiments and their SNP types (the experiment names should be the same in both dictionaries, use dict1 here). uniqueExps = new CIStringCollection(); allelesDict1 = new Dictionary <string, string>(StringComparer.InvariantCultureIgnoreCase); allelesDict2 = new Dictionary <string, string>(StringComparer.InvariantCultureIgnoreCase); OnStatusChange("Extracting experiments and SNP types"); counter = 0; foreach (string tempKey in dict1.Keys) { tempExp = dict1.GetExperiment(tempKey); tempAlleles1 = dict1[tempKey].ToUpper(); tempAlleles2 = dict2[tempKey].ToUpper(); if (!uniqueExps.Contains(tempExp)) { //This experiment has not been processed before. uniqueExps.Add(tempExp); //Get the unique alleles from dictionary 1. if (tempAlleles1.Substring(0, 1) != tempAlleles1.Substring(2, 1)) { //The alleles are different, add both. allelesDict1.Add(tempExp, tempAlleles1.Substring(0, 1) + tempAlleles1.Substring(2, 1)); } else { //The alleles are the same, add only the first. allelesDict1.Add(tempExp, tempAlleles1.Substring(0, 1)); } //Get the unique alleles from dictionary 2. if (tempAlleles2.Substring(0, 1) != tempAlleles2.Substring(2, 1)) { //The alleles are different, add both. allelesDict2.Add(tempExp, tempAlleles2.Substring(0, 1) + tempAlleles2.Substring(2, 1)); } else { //The alleles are the same, add only the first. allelesDict2.Add(tempExp, tempAlleles2.Substring(0, 1)); } } else { //This experiment has been processed before. //Add the first allele from dictionary one if it does not already exist in the string. if (!allelesDict1[tempExp].Contains(tempAlleles1.Substring(0, 1))) { allelesDict1[tempExp] = allelesDict1[tempExp] + tempAlleles1.Substring(0, 1); } //Add the second allele from dictionary one if it does not already exist in the string. if (!allelesDict1[tempExp].Contains(tempAlleles1.Substring(2, 1))) { allelesDict1[tempExp] = allelesDict1[tempExp] + tempAlleles1.Substring(2, 1); } //Add the first allele from dictionary two if it does not already exist in the string. if (!allelesDict2[tempExp].Contains(tempAlleles2.Substring(0, 1))) { allelesDict2[tempExp] = allelesDict2[tempExp] + tempAlleles2.Substring(0, 1); } //Add the second allele from dictionary one if it does not already exist in the string. if (!allelesDict2[tempExp].Contains(tempAlleles2.Substring(2, 1))) { allelesDict2[tempExp] = allelesDict2[tempExp] + tempAlleles2.Substring(2, 1); } } counter++; if (counter % 100 == 0) { OnStatusChange("Extracting experiments and SNP types (processed genotype " + counter + ")"); } } //Remove data from SNPs with more than two alleles in source 1 from both dictionaries and save. MyRemovedExpTooManyAlleles1 = new CIStringCollection(); OnStatusChange("Extracting harmonizable data step 1/6"); counter = 0; foreach (string exp in allelesDict1.Keys) { if (allelesDict1[exp].Length > 2) { dict1.RemoveExperiment(exp); dict2.RemoveExperiment(exp); MyRemovedExpTooManyAlleles1.Add(exp); } counter++; if (counter % 100 == 0) { OnStatusChange("Extracting harmonizable data step 1/6 (processed experiment " + counter + ")"); } } //Remove data from SNPs with more than two alleles in source 2 from both dictionaries and save. MyRemovedExpTooManyAlleles2 = new CIStringCollection(); OnStatusChange("Extracting harmonizable data step 2/6"); counter = 0; foreach (string exp in allelesDict2.Keys) { if (allelesDict2[exp].Length > 2) { dict1.RemoveExperiment(exp); dict2.RemoveExperiment(exp); MyRemovedExpTooManyAlleles2.Add(exp); } counter++; if (counter % 100 == 0) { OnStatusChange("Extracting harmonizable data step 2/6 (processed experiment " + counter + ")"); } } //Remove data from SNPs which are monomorphic in both sources and save. MyRemovedExpMonomorphicInBoth = new CIStringCollection(); OnStatusChange("Extracting harmonizable data step 3/6"); counter = 0; foreach (string exp in allelesDict1.Keys) { if (allelesDict1[exp].Length == 1 && allelesDict2[exp].Length == 1) { dict1.RemoveExperiment(exp); dict2.RemoveExperiment(exp); MyRemovedExpMonomorphicInBoth.Add(exp); } counter++; if (counter % 100 == 0) { OnStatusChange("Extracting harmonizable data step 3/6 (processed experiment " + counter + ")"); } } //Remove data from SNPs with incompatible SNP types from both dictionaries and save. MyRemovedExpIncompatibleSNPTypes = new CIStringCollection(); OnStatusChange("Extracting harmonizable data step 4/6"); counter = 0; foreach (string exp in allelesDict1.Keys) { if (!MyRemovedExpTooManyAlleles1.Contains(exp) && !MyRemovedExpTooManyAlleles2.Contains(exp) && !MyRemovedExpMonomorphicInBoth.Contains(exp)) { if (!this.IsCompatibleSNPTypes(allelesDict1[exp], allelesDict2[exp])) { dict1.RemoveExperiment(exp); dict2.RemoveExperiment(exp); MyRemovedExpIncompatibleSNPTypes.Add(exp); } } counter++; if (counter % 100 == 0) { OnStatusChange("Extracting harmonizable data step 4/6 (processed experiment " + counter + ")"); } } //Remove data from SNPs with A/T or G/C SNPs from both dictionaries and save. MyRemovedExpATCG = new CIStringCollection(); OnStatusChange("Extracting harmonizable data step 5/6"); counter = 0; foreach (string exp in allelesDict1.Keys) { if (!MyRemovedExpTooManyAlleles1.Contains(exp) && !MyRemovedExpTooManyAlleles2.Contains(exp) && !MyRemovedExpMonomorphicInBoth.Contains(exp) && !MyRemovedExpIncompatibleSNPTypes.Contains(exp)) { if (allelesDict1[exp].Length > 1) { if (this.GetNumberOfATalleles(allelesDict1[exp]) != 1) { //Either 0 or 2 A or T alleles. dict1.RemoveExperiment(exp); dict2.RemoveExperiment(exp); MyRemovedExpATCG.Add(exp); } } else { if (this.GetNumberOfATalleles(allelesDict2[exp]) != 1) { dict1.RemoveExperiment(exp); dict2.RemoveExperiment(exp); MyRemovedExpATCG.Add(exp); } } } counter++; if (counter % 100 == 0) { OnStatusChange("Extracting harmonizable data step 5/6 (processed experiment " + counter + ")"); } } //Store only data from experiments passing all tests. harmonizedDict1 = new GenotypeDictionary(); harmonizedDict2 = new GenotypeDictionary(); OnStatusChange("Extracting harmonizable data step 6/6"); counter = 0; foreach (string tempKey in dict1.Keys) { tempExp = dict1.GetExperiment(tempKey); if (!MyRemovedExpTooManyAlleles1.Contains(tempExp) && !MyRemovedExpTooManyAlleles2.Contains(tempExp) && !MyRemovedExpMonomorphicInBoth.Contains(tempExp) && !MyRemovedExpIncompatibleSNPTypes.Contains(tempExp) && !MyRemovedExpATCG.Contains(tempExp)) { harmonizedDict1.Add(tempKey, dict1[tempKey]); harmonizedDict2.Add(tempKey, dict2[tempKey]); } counter++; if (counter % 100 == 0) { OnStatusChange("Extracting harmonizable data step 6/6 (processed genotype " + counter + ")"); } } //Go through dict1. If the SNP type for the current row is different than //the SNP type in dict2, convert the row in dict2. OnStatusChange("Harmonizing"); counter = 0; foreach (string tempKey in harmonizedDict1.Keys) { tempExp = harmonizedDict1.GetExperiment(tempKey); tempAlleles1 = allelesDict1[tempExp]; tempAlleles2 = allelesDict2[tempExp]; if (tempAlleles1.Length == 2 && tempAlleles2.Length == 2) { if (( (tempAlleles1.Substring(0, 1) == tempAlleles2.Substring(0, 1)) && (tempAlleles1.Substring(1, 1) == tempAlleles2.Substring(1, 1)) ) || ( (tempAlleles1.Substring(0, 1) == tempAlleles2.Substring(1, 1)) && (tempAlleles1.Substring(1, 1) == tempAlleles2.Substring(0, 1)) ) ) { //They are the same. } else { harmonizedDict2[tempKey] = this.ReverseComplement(harmonizedDict2[tempKey]); } } else if (tempAlleles1.Length == 1 && tempAlleles2.Length == 2) { if (tempAlleles1 != tempAlleles2.Substring(0, 1) && tempAlleles1 != tempAlleles2.Substring(1, 1)) { harmonizedDict2[tempKey] = this.ReverseComplement(harmonizedDict2[tempKey]); } } else if (tempAlleles1.Length == 2 && tempAlleles2.Length == 1) { if (tempAlleles2 != tempAlleles1.Substring(0, 1) && tempAlleles2 != tempAlleles1.Substring(1, 1)) { harmonizedDict2[tempKey] = this.ReverseComplement(harmonizedDict2[tempKey]); } } else { throw new Exception("Inappropriate number of alleles detected when converting SNP types"); } counter++; if (counter % 100 == 0) { OnStatusChange("Harmonizing (processed genotype " + counter + ")"); } } }
public void Compare() { GenotypeDictionary unionDict1, unionDict2; GenotypeDictionary harmonizedDict1, harmonizedDict2; string tempValue1, tempValue2; int counter; MyMissingIn1 = new GenotypeDictionary(); MyMissingIn2 = new GenotypeDictionary(); MyCompared = new GenotypeResultPairCollection(); MyIdentical = new GenotypeResultPairCollection(); MyDifferent = new GenotypeResultPairCollection(); unionDict1 = new GenotypeDictionary(); unionDict2 = new GenotypeDictionary(); MyHarmonizer = new ComparisonHarmonizer(); //Sort out keys from results1 which are also in results2. counter = 0; OnStatusChange("Find common genotypes, step 1"); foreach (string key1 in MySourceInfo1.ValidGenotypes.Keys) { if (MySourceInfo2.ValidGenotypes.Contains(key1)) { unionDict1.Add(key1, MySourceInfo1.ValidGenotypes[key1]); } else { MyMissingIn2.Add(key1); } counter++; if (counter % 100 == 0) { OnStatusChange("Find common genotypes, step 1 (processed genotype " + counter + ")"); } } //Sort out keys from results2 which are also in results1. counter = 0; OnStatusChange("Find common genotypes, step 2"); foreach (string key2 in MySourceInfo2.ValidGenotypes.Keys) { if (MySourceInfo1.ValidGenotypes.Contains(key2)) { unionDict2.Add(key2, MySourceInfo2.ValidGenotypes[key2]); } else { MyMissingIn1.Add(key2); } counter++; if (counter % 100 == 0) { OnStatusChange("Find common genotypes, step 2 (processed genotype " + counter + ")"); } } //Make sure the two union dictionaries are of the same length. if (unionDict1.Count != unionDict2.Count) { throw new Exception("Comparison error. Lists are of unequal length."); } //Harmonize polarities if the settings say so. if (MySettings.Mode == InvestigationMode.Genotype && MySettings.HarmonizePolarities) { OnStatusChange("Harmonizing polarities"); MyHarmonizer.MyStatusChangeHandler += new InvestigationStatusChangeHandler(MyHarmonizer_StatusChanged); MyHarmonizer.HarmonizePolarities(unionDict1, unionDict2, out harmonizedDict1, out harmonizedDict2); unionDict1 = harmonizedDict1; unionDict2 = harmonizedDict2; } //Perform comparison. counter = 0; OnStatusChange("Comparing genotypes"); foreach (string key in unionDict1.Keys) { tempValue1 = unionDict1[key]; tempValue2 = unionDict2[key]; if (this.IsEqual(tempValue1, tempValue2)) { MyIdentical.Add(key, MySourceInfo1.ValidGenotypes[key], MySourceInfo2.ValidGenotypes[key]); } else { MyDifferent.Add(key, MySourceInfo1.ValidGenotypes[key], MySourceInfo2.ValidGenotypes[key]); } MyCompared.Add(key, MySourceInfo1.ValidGenotypes[key], MySourceInfo2.ValidGenotypes[key]); counter++; if (counter % 100 == 0) { OnStatusChange("Comparing genotypes (processed genotype " + counter + ")"); } } }
public void Load(DataTable results, InvestigationMode cmpType) { string tempItem, tempExp, tempAlleles; GenotypeDictionary tempDuplFailures, tempGenotypeDict, tempDuplTestDict; GenotypeCollection tempNoResults; GenotypeResultCollection tempInvalidAlleles; MyTotalGenotypesCount = results.Rows.Count; //Initiate dictionaries and collections. tempGenotypeDict = new GenotypeDictionary(); tempDuplTestDict = new GenotypeDictionary(); tempDuplFailures = new GenotypeDictionary(); tempInvalidAlleles = new GenotypeResultCollection(); tempNoResults = new GenotypeCollection(); //First perform duplicate test. foreach (DataRow row in results.Rows) { //Read values. tempItem = row["Item"].ToString(); tempExp = row["Experiment"].ToString(); tempAlleles = row["Alleles"].ToString(); if (tempAlleles.ToUpper() != MyMissingValueCode.ToUpper()) { if (this.IsValidAlleleCombination(tempAlleles, cmpType)) { if (tempDuplTestDict.Contains(tempItem, tempExp)) { //This key already exists, check if the alleles are different. if (tempDuplTestDict[tempItem, tempExp].ToUpper() != tempAlleles.ToUpper()) { //Remember this key as having a duplicate failure. if (!tempDuplFailures.Contains(tempItem, tempExp)) { tempDuplFailures.Add(tempItem, tempExp, ""); } } } else { //Did not exist already. tempDuplTestDict.Add(tempItem, tempExp, tempAlleles); } } } } //Now go through the values again to avoid those with duplicate failures. foreach (DataRow row in results.Rows) { //Read values. tempItem = row["Item"].ToString(); tempExp = row["Experiment"].ToString(); tempAlleles = row["Alleles"].ToString(); if (tempAlleles.ToUpper() == MyMissingValueCode.ToUpper()) { //Skipped because of no result. tempNoResults.Add(tempItem, tempExp); } else if (!this.IsValidAlleleCombination(tempAlleles, cmpType)) { //Skipped because invalid alleles. tempInvalidAlleles.Add(tempItem, tempExp, tempAlleles); } else if (!tempDuplFailures.Contains(tempItem, tempExp) && !tempGenotypeDict.Contains(tempItem, tempExp)) { //Neither a duplicate failure nor already existing, go ahead and add. tempGenotypeDict.Add(tempItem, tempExp, tempAlleles); } } MyValidGenotypes = tempGenotypeDict; MyDuplFailures = tempDuplFailures; MyNoResults = tempNoResults; MyInvalidAlleles = tempInvalidAlleles; }