Ejemplo n.º 1
0
        private void PrepareDeNovoRegistries()
        {
            deNovoRegistryList = new List <DeNovoRegistry>();

            if (PeptideList == null || PeptideList.Equals(""))
            {
                if (Arguments.MPexResultPath.Length > 0)
                {
                    Console.WriteLine("Loading " + Arguments.MPexResultPath);
                    ResultPckg2 pkg = ResultPckg2.DeserializeResultPackage(Arguments.MPexResultPath);

                    deNovoRegistryList = new List <DeNovoRegistry>();

                    foreach (KeyValuePair <string, List <DeNovoRegistry> > kvp in pkg.MyUnmappedRegistries)
                    {
                        deNovoRegistryList.AddRange(kvp.Value);
                    }
                }
                else
                {
                    deNovoRegistryList = PreprareDeNovoRegistries();
                }
                //Eliminate denovo registries not having the minimum score
                int removedForNotHavingMinDenovoScore = deNovoRegistryList.RemoveAll(a => a.DeNovoScore < Arguments.MinDeNovoScore);
                Console.WriteLine("Removed elements for not attending the minimum denovo score: " + removedForNotHavingMinDenovoScore);
            }
            else
            {
                List <string> peptides = Regex.Split(PeptideList, "\n").ToList();
                for (int i = 0; i < peptides.Count; i++)
                {
                    string cleanSequence = PatternTools.pTools.CleanPeptide(peptides[i], true);
                    cleanSequence = Regex.Replace(cleanSequence, " ", "");
                    cleanSequence = Regex.Replace(cleanSequence, "\t", "");
                    deNovoRegistryList.Add(new DeNovoRegistry(0, i, 1, cleanSequence, 1, "Provided"));
                }
            }

            //Lets make sure we are satisfying the top hit constraint
            if (Arguments.DeNovoOption == DeNovoOption.PepNovoFull || Arguments.DeNovoOption == DeNovoOption.PepNovo)
            {
                deNovoRegistryList.RemoveAll(a => a.Rank + 1 > Arguments.TopHits);
            }
            else
            {
                deNovoRegistryList.RemoveAll(a => a.Rank > Arguments.TopHits);
            }

            //Finally, lets remove registries that do not satisfy the minimum left criterion
            int removedForNotHavingMinimumLength = deNovoRegistryList.RemoveAll(a => a.CleanSequence.Length < Arguments.PeptideMinNumAA);

            Console.WriteLine("Removed for not attending the minimum number of aa : " + removedForNotHavingMinimumLength);
        }
Ejemplo n.º 2
0
        private void FeedLocusFastaDictPex(DirectoryClassDescription myDir, FileInfo[] resultFilteredFiles)
        {
            foreach (FileInfo fi in resultFilteredFiles)
            {
                Console.WriteLine("Parsing " + fi.FullName);

                ResultPckg2 thisPckg = ResultPckg2.DeserializeResultPackage(fi.FullName);

                myResultPackages.Add(new FileInfoResultPackage(fi, thisPckg, myDir.ClassLabel));

                foreach (FastaItem fastaItem in thisPckg.MyFasta)
                {
                    if (!locusFastaDict.ContainsKey(fastaItem.SequenceIdentifier))
                    {
                        locusFastaDict.Add(fastaItem.SequenceIdentifier, fastaItem.Sequence);
                    }
                }
            }
        }
Ejemplo n.º 3
0
        public void ParseDirs(List <DirectoryClassDescription> dirs)
        {
            MyDirectoryDescriptionDictionary = dirs;
            MyResultPackages = new List <ThePackage>();

            foreach (DirectoryClassDescription dcd in dirs)
            {
                //Get all Sepro Files in this and in deeper directories

                FileInfo[] fileInfo = new DirectoryInfo(dcd.MyDirectoryFullName).GetFiles("*.mpex", SearchOption.AllDirectories);

                foreach (FileInfo file in fileInfo)
                {
                    //First lets unserialize the object
                    Console.WriteLine("Loading .. " + file.FullName);
                    ResultPckg2 rp = ResultPckg2.DeserializeResultPackage(file.FullName);
                    MyResultPackages.Add(new ThePackage(rp, file, dcd.ClassLabel));
                }

                Console.WriteLine("Done loading.");
            }
        }
Ejemplo n.º 4
0
 public ThePackage(ResultPckg2 rp, FileInfo file, int classLabel)
 {
     MyPackage    = rp;
     MyFileInfo   = file;
     MyClassLabel = classLabel;
 }
Ejemplo n.º 5
0
        /// <summary>
        /// Multicore alignment method.  It will feed the top alignments into the DenovoRegistryObject
        /// </summary>
        /// <param name="database">A list with a parsed database</param>
        /// <param name="deNovoRegistryList">A lsit with parsed peptide registries</param>
        /// <returns>A list with aligned registries.</returns>
        private ResultPckg2 AlignMAlign(List <FastaItem> database, List <DeNovoRegistry> deNovoRegistryList)
        {
            int removedPeptides = deNovoRegistryList.RemoveAll(a => a.Sequence.Length < Arguments.PeptideMinNumAA);


            Console.WriteLine("Total denovo registries: " + deNovoRegistryList.Count);

            Dictionary <string, List <DeNovoRegistry> > dnovoRegistryDict = (from registry in deNovoRegistryList
                                                                             group registry by registry.CleanSequence into registryGroup
                                                                             select new { Seq = registryGroup.Key, Registries = registryGroup.ToList() }).ToDictionary(a => a.Seq, a => a.Registries);

            Console.WriteLine("Total denovo registries after grouping exact sequences: " + dnovoRegistryDict.Keys.Count);


            //We need to feed the alignment List
            KeyValuePair <int[, ], Dictionary <char, int> > m = MAligner.Utils.LoadSubstitutionMatrixFromString(PepExplorer2.Properties.Resources.PAM30MS);

            Aligner mAligner = new Aligner(m);
            int     count    = 0;

            ConcurrentBag <AlignmentResult> alignments = new ConcurrentBag <AlignmentResult>();
            ConcurrentBag <KeyValuePair <string, List <DeNovoRegistry> > > unmappedRegistries = new ConcurrentBag <KeyValuePair <string, List <DeNovoRegistry> > >();

            Parallel.ForEach(dnovoRegistryDict, kvp =>
                             //foreach (KeyValuePair<string, List<DeNovoRegistry>> kvp in dnovoRegistryDict)
            {
                Console.WriteLine("Aligning: " + kvp.Key);
                List <DeNovoR> dRegistries = (from r in kvp.Value
                                              select new DeNovoR(r.ScanNumber, r.DeNovoScore, r.PtmSequence, r.Charge, r.FileName)).ToList();


                byte[] peptideBytes = Encoding.ASCII.GetBytes(kvp.Key);

                //Lets take care of palindromic hits that might spoil our result.
                List <FastaItem> validItems = database.FindAll(a => ((double)mAligner.IDScore(peptideBytes, a.SequenceInBytes) / (double)peptideBytes.Length) > Arguments.MinIdentity).ToList();
                if (validItems.Count(a => a.SequenceIdentifier.StartsWith(Arguments.DecoyLabel)) < validItems.Count)
                {
                    validItems.RemoveAll(a => a.SequenceIdentifier.StartsWith(Arguments.DecoyLabel));
                }
                //----------------

                if (validItems.Exists(a => a.SequenceIdentifier.StartsWith(Arguments.DecoyLabel)))
                {
                    Console.WriteLine("Decoy hit included");
                }

                if (validItems.Count > 0)
                {
                    //Now we need to find the best proteins
                    char[] peptide = kvp.Key.ToCharArray();

                    KeyValuePair <List <int>, List <string> > alg = mAligner.GetClosestPeptideInDB(peptide, validItems);


                    //Find the max value and only store the best IDs for this peptide
                    int bestScore = alg.Key.Max();


                    if (alg.Key.Count == validItems.Count)
                    {
                        List <string> protIDs = new List <string>();

                        for (int i = 0; i < validItems.Count; i++)
                        {
                            if (alg.Key[i] == bestScore)
                            {
                                protIDs.Add(validItems[i].SequenceIdentifier);
                            }
                        }

                        alignments.Add(new AlignmentResult(dRegistries, bestScore, protIDs));
                    }
                    else
                    {
                        Console.WriteLine("shouldn't be here, failed in peptide " + kvp.Key);
                    }
                }
                else
                {
                    unmappedRegistries.Add(kvp);
                }

                Interlocked.Increment(ref count);
                Console.WriteLine("Done aligning sequence no {0} ({1})  ", count, kvp.Key);

                //Report progress
                Progress = Math.Round((count / (double)dnovoRegistryDict.Keys.Count), 2) * 100;
            }
                             );


            //Lets get only the stuff we used from the DB
            List <string> usedFastaIDs = (from b in alignments
                                          from pID in b.ProtIDs
                                          select pID).Distinct().ToList();

            List <FastaItem> usedFasta = database.FindAll(a => usedFastaIDs.Contains(a.SequenceIdentifier));

            ResultPckg2 resultPckg = new ResultPckg2(usedFasta, Arguments, alignments.ToList(), unmappedRegistries.ToList());

            return(resultPckg);
        }