コード例 #1
0
        protected DataTable ParseFromFile(string filePath, CIStringCollection items, CIStringCollection experiments)
        {
            DataTable  results;
            FileServer fServer;

            string[][] fileContents;

            fServer = new FileServer();
            OnStatusChange("Retrieving genotypes from file");
            fileContents = fServer.ReadMultipleColumns(filePath, 3);

            results = new DataTable("Genotype");
            results.Columns.Add("Item", Type.GetType("System.String"));
            results.Columns.Add("Experiment", Type.GetType("System.String"));
            results.Columns.Add("Alleles", Type.GetType("System.String"));

            for (int i = 0; i < fileContents.GetLength(0); i++)
            {
                if (items.Contains(fileContents[i][0].Trim()) && experiments.Contains(fileContents[i][1].Trim()))
                {
                    results.Rows.Add(new object[] { fileContents[i][0], fileContents[i][1], fileContents[i][2] });
                }
                if (i % 1000 == 0)
                {
                    OnStatusChange("Finished reading line " + i.ToString());
                }
            }

            return(results);
        }
コード例 #2
0
        private CIStringCollection Union(CIStringCollection values1, CIStringCollection values2, out CIStringCollection values1NotIn2, out CIStringCollection values2NotIn1)
        {
            //Sort out values which are both in source 1 and source 2, and also save
            //those which are only in source 1.
            CIStringCollection valuesUnion;

            valuesUnion   = new CIStringCollection();
            values1NotIn2 = new CIStringCollection();
            values2NotIn1 = new CIStringCollection();

            foreach (string tempValue in values1)
            {
                if (values2.Contains(tempValue))
                {
                    if (!valuesUnion.Contains(tempValue))
                    {
                        valuesUnion.Add(tempValue);
                    }
                }
                else
                {
                    if (!values1NotIn2.Contains(tempValue))
                    {
                        values1NotIn2.Add(tempValue);
                    }
                }
            }

            //Save values which are only in source 2.
            foreach (string tempValue in values2)
            {
                if (!values1.Contains(tempValue))
                {
                    if (!values2NotIn1.Contains(tempValue))
                    {
                        values2NotIn1.Add(tempValue);
                    }
                }
            }

            return(valuesUnion);
        }
コード例 #3
0
        public void HarmonizePolarities(GenotypeDictionary dict1, GenotypeDictionary dict2, out GenotypeDictionary harmonizedDict1, out GenotypeDictionary harmonizedDict2)
        {
            //Harmonizes dict2 after dict1 and removes all entries from A/T or G/C experiments and incompatible
            //SNP types and items from SNPs which are monomorphic in both source.
            string                      tempExp, tempAlleles1, tempAlleles2;
            CIStringCollection          uniqueExps;
            Dictionary <string, string> allelesDict1, allelesDict2;
            int counter;

            //Find all experiments and their SNP types (the experiment names should be the same in both dictionaries, use dict1 here).
            uniqueExps   = new CIStringCollection();
            allelesDict1 = new Dictionary <string, string>(StringComparer.InvariantCultureIgnoreCase);
            allelesDict2 = new Dictionary <string, string>(StringComparer.InvariantCultureIgnoreCase);
            OnStatusChange("Extracting experiments and SNP types");
            counter = 0;
            foreach (string tempKey in dict1.Keys)
            {
                tempExp      = dict1.GetExperiment(tempKey);
                tempAlleles1 = dict1[tempKey].ToUpper();
                tempAlleles2 = dict2[tempKey].ToUpper();
                if (!uniqueExps.Contains(tempExp))
                {
                    //This experiment has not been processed before.
                    uniqueExps.Add(tempExp);
                    //Get the unique alleles from dictionary 1.
                    if (tempAlleles1.Substring(0, 1) != tempAlleles1.Substring(2, 1))
                    {
                        //The alleles are different, add both.
                        allelesDict1.Add(tempExp, tempAlleles1.Substring(0, 1) + tempAlleles1.Substring(2, 1));
                    }
                    else
                    {
                        //The alleles are the same, add only the first.
                        allelesDict1.Add(tempExp, tempAlleles1.Substring(0, 1));
                    }
                    //Get the unique alleles from dictionary 2.
                    if (tempAlleles2.Substring(0, 1) != tempAlleles2.Substring(2, 1))
                    {
                        //The alleles are different, add both.
                        allelesDict2.Add(tempExp, tempAlleles2.Substring(0, 1) + tempAlleles2.Substring(2, 1));
                    }
                    else
                    {
                        //The alleles are the same, add only the first.
                        allelesDict2.Add(tempExp, tempAlleles2.Substring(0, 1));
                    }
                }
                else
                {
                    //This experiment has been processed before.
                    //Add the first allele from dictionary one if it does not already exist in the string.
                    if (!allelesDict1[tempExp].Contains(tempAlleles1.Substring(0, 1)))
                    {
                        allelesDict1[tempExp] = allelesDict1[tempExp] + tempAlleles1.Substring(0, 1);
                    }
                    //Add the second allele from dictionary one if it does not already exist in the string.
                    if (!allelesDict1[tempExp].Contains(tempAlleles1.Substring(2, 1)))
                    {
                        allelesDict1[tempExp] = allelesDict1[tempExp] + tempAlleles1.Substring(2, 1);
                    }

                    //Add the first allele from dictionary two if it does not already exist in the string.
                    if (!allelesDict2[tempExp].Contains(tempAlleles2.Substring(0, 1)))
                    {
                        allelesDict2[tempExp] = allelesDict2[tempExp] + tempAlleles2.Substring(0, 1);
                    }
                    //Add the second allele from dictionary one if it does not already exist in the string.
                    if (!allelesDict2[tempExp].Contains(tempAlleles2.Substring(2, 1)))
                    {
                        allelesDict2[tempExp] = allelesDict2[tempExp] + tempAlleles2.Substring(2, 1);
                    }
                }
                counter++;
                if (counter % 100 == 0)
                {
                    OnStatusChange("Extracting experiments and SNP types (processed genotype " + counter + ")");
                }
            }

            //Remove data from SNPs with more than two alleles in source 1 from both dictionaries and save.
            MyRemovedExpTooManyAlleles1 = new CIStringCollection();
            OnStatusChange("Extracting harmonizable data step 1/6");
            counter = 0;
            foreach (string exp in allelesDict1.Keys)
            {
                if (allelesDict1[exp].Length > 2)
                {
                    dict1.RemoveExperiment(exp);
                    dict2.RemoveExperiment(exp);
                    MyRemovedExpTooManyAlleles1.Add(exp);
                }
                counter++;
                if (counter % 100 == 0)
                {
                    OnStatusChange("Extracting harmonizable data step 1/6 (processed experiment " + counter + ")");
                }
            }

            //Remove data from SNPs with more than two alleles in source 2 from both dictionaries and save.
            MyRemovedExpTooManyAlleles2 = new CIStringCollection();
            OnStatusChange("Extracting harmonizable data step 2/6");
            counter = 0;
            foreach (string exp in allelesDict2.Keys)
            {
                if (allelesDict2[exp].Length > 2)
                {
                    dict1.RemoveExperiment(exp);
                    dict2.RemoveExperiment(exp);
                    MyRemovedExpTooManyAlleles2.Add(exp);
                }
                counter++;
                if (counter % 100 == 0)
                {
                    OnStatusChange("Extracting harmonizable data step 2/6 (processed experiment " + counter + ")");
                }
            }

            //Remove data from SNPs which are monomorphic in both sources and save.
            MyRemovedExpMonomorphicInBoth = new CIStringCollection();
            OnStatusChange("Extracting harmonizable data step 3/6");
            counter = 0;
            foreach (string exp in allelesDict1.Keys)
            {
                if (allelesDict1[exp].Length == 1 && allelesDict2[exp].Length == 1)
                {
                    dict1.RemoveExperiment(exp);
                    dict2.RemoveExperiment(exp);
                    MyRemovedExpMonomorphicInBoth.Add(exp);
                }
                counter++;
                if (counter % 100 == 0)
                {
                    OnStatusChange("Extracting harmonizable data step 3/6 (processed experiment " + counter + ")");
                }
            }

            //Remove data from SNPs with incompatible SNP types from both dictionaries and save.
            MyRemovedExpIncompatibleSNPTypes = new CIStringCollection();
            OnStatusChange("Extracting harmonizable data step 4/6");
            counter = 0;
            foreach (string exp in allelesDict1.Keys)
            {
                if (!MyRemovedExpTooManyAlleles1.Contains(exp) && !MyRemovedExpTooManyAlleles2.Contains(exp) &&
                    !MyRemovedExpMonomorphicInBoth.Contains(exp))
                {
                    if (!this.IsCompatibleSNPTypes(allelesDict1[exp], allelesDict2[exp]))
                    {
                        dict1.RemoveExperiment(exp);
                        dict2.RemoveExperiment(exp);
                        MyRemovedExpIncompatibleSNPTypes.Add(exp);
                    }
                }
                counter++;
                if (counter % 100 == 0)
                {
                    OnStatusChange("Extracting harmonizable data step 4/6 (processed experiment " + counter + ")");
                }
            }

            //Remove data from SNPs with A/T or G/C SNPs from both dictionaries and save.
            MyRemovedExpATCG = new CIStringCollection();
            OnStatusChange("Extracting harmonizable data step 5/6");
            counter = 0;
            foreach (string exp in allelesDict1.Keys)
            {
                if (!MyRemovedExpTooManyAlleles1.Contains(exp) && !MyRemovedExpTooManyAlleles2.Contains(exp) &&
                    !MyRemovedExpMonomorphicInBoth.Contains(exp) && !MyRemovedExpIncompatibleSNPTypes.Contains(exp))
                {
                    if (allelesDict1[exp].Length > 1)
                    {
                        if (this.GetNumberOfATalleles(allelesDict1[exp]) != 1)
                        {
                            //Either 0 or 2 A or T alleles.
                            dict1.RemoveExperiment(exp);
                            dict2.RemoveExperiment(exp);
                            MyRemovedExpATCG.Add(exp);
                        }
                    }
                    else
                    {
                        if (this.GetNumberOfATalleles(allelesDict2[exp]) != 1)
                        {
                            dict1.RemoveExperiment(exp);
                            dict2.RemoveExperiment(exp);
                            MyRemovedExpATCG.Add(exp);
                        }
                    }
                }
                counter++;
                if (counter % 100 == 0)
                {
                    OnStatusChange("Extracting harmonizable data step 5/6 (processed experiment " + counter + ")");
                }
            }

            //Store only data from experiments passing all tests.
            harmonizedDict1 = new GenotypeDictionary();
            harmonizedDict2 = new GenotypeDictionary();
            OnStatusChange("Extracting harmonizable data step 6/6");
            counter = 0;
            foreach (string tempKey in dict1.Keys)
            {
                tempExp = dict1.GetExperiment(tempKey);
                if (!MyRemovedExpTooManyAlleles1.Contains(tempExp) &&
                    !MyRemovedExpTooManyAlleles2.Contains(tempExp) &&
                    !MyRemovedExpMonomorphicInBoth.Contains(tempExp) &&
                    !MyRemovedExpIncompatibleSNPTypes.Contains(tempExp) &&
                    !MyRemovedExpATCG.Contains(tempExp))
                {
                    harmonizedDict1.Add(tempKey, dict1[tempKey]);
                    harmonizedDict2.Add(tempKey, dict2[tempKey]);
                }
                counter++;
                if (counter % 100 == 0)
                {
                    OnStatusChange("Extracting harmonizable data step 6/6 (processed genotype " + counter + ")");
                }
            }


            //Go through dict1. If the SNP type for the current row is different than
            //the SNP type in dict2, convert the row in dict2.
            OnStatusChange("Harmonizing");
            counter = 0;
            foreach (string tempKey in harmonizedDict1.Keys)
            {
                tempExp      = harmonizedDict1.GetExperiment(tempKey);
                tempAlleles1 = allelesDict1[tempExp];
                tempAlleles2 = allelesDict2[tempExp];
                if (tempAlleles1.Length == 2 && tempAlleles2.Length == 2)
                {
                    if ((
                            (tempAlleles1.Substring(0, 1) == tempAlleles2.Substring(0, 1)) &&
                            (tempAlleles1.Substring(1, 1) == tempAlleles2.Substring(1, 1))
                            ) ||
                        (
                            (tempAlleles1.Substring(0, 1) == tempAlleles2.Substring(1, 1)) &&
                            (tempAlleles1.Substring(1, 1) == tempAlleles2.Substring(0, 1))
                        )
                        )
                    {
                        //They are the same.
                    }
                    else
                    {
                        harmonizedDict2[tempKey] = this.ReverseComplement(harmonizedDict2[tempKey]);
                    }
                }
                else if (tempAlleles1.Length == 1 && tempAlleles2.Length == 2)
                {
                    if (tempAlleles1 != tempAlleles2.Substring(0, 1) && tempAlleles1 != tempAlleles2.Substring(1, 1))
                    {
                        harmonizedDict2[tempKey] = this.ReverseComplement(harmonizedDict2[tempKey]);
                    }
                }
                else if (tempAlleles1.Length == 2 && tempAlleles2.Length == 1)
                {
                    if (tempAlleles2 != tempAlleles1.Substring(0, 1) && tempAlleles2 != tempAlleles1.Substring(1, 1))
                    {
                        harmonizedDict2[tempKey] = this.ReverseComplement(harmonizedDict2[tempKey]);
                    }
                }
                else
                {
                    throw new Exception("Inappropriate number of alleles detected when converting SNP types");
                }
                counter++;
                if (counter % 100 == 0)
                {
                    OnStatusChange("Harmonizing (processed genotype " + counter + ")");
                }
            }
        }