private void PrepareDeNovoRegistries() { deNovoRegistryList = new List <DeNovoRegistry>(); if (PeptideList == null || PeptideList.Equals("")) { if (Arguments.MPexResultPath.Length > 0) { Console.WriteLine("Loading " + Arguments.MPexResultPath); ResultPckg2 pkg = ResultPckg2.DeserializeResultPackage(Arguments.MPexResultPath); deNovoRegistryList = new List <DeNovoRegistry>(); foreach (KeyValuePair <string, List <DeNovoRegistry> > kvp in pkg.MyUnmappedRegistries) { deNovoRegistryList.AddRange(kvp.Value); } } else { deNovoRegistryList = PreprareDeNovoRegistries(); } //Eliminate denovo registries not having the minimum score int removedForNotHavingMinDenovoScore = deNovoRegistryList.RemoveAll(a => a.DeNovoScore < Arguments.MinDeNovoScore); Console.WriteLine("Removed elements for not attending the minimum denovo score: " + removedForNotHavingMinDenovoScore); } else { List <string> peptides = Regex.Split(PeptideList, "\n").ToList(); for (int i = 0; i < peptides.Count; i++) { string cleanSequence = PatternTools.pTools.CleanPeptide(peptides[i], true); cleanSequence = Regex.Replace(cleanSequence, " ", ""); cleanSequence = Regex.Replace(cleanSequence, "\t", ""); deNovoRegistryList.Add(new DeNovoRegistry(0, i, 1, cleanSequence, 1, "Provided")); } } //Lets make sure we are satisfying the top hit constraint if (Arguments.DeNovoOption == DeNovoOption.PepNovoFull || Arguments.DeNovoOption == DeNovoOption.PepNovo) { deNovoRegistryList.RemoveAll(a => a.Rank + 1 > Arguments.TopHits); } else { deNovoRegistryList.RemoveAll(a => a.Rank > Arguments.TopHits); } //Finally, lets remove registries that do not satisfy the minimum left criterion int removedForNotHavingMinimumLength = deNovoRegistryList.RemoveAll(a => a.CleanSequence.Length < Arguments.PeptideMinNumAA); Console.WriteLine("Removed for not attending the minimum number of aa : " + removedForNotHavingMinimumLength); }
private void FeedLocusFastaDictPex(DirectoryClassDescription myDir, FileInfo[] resultFilteredFiles) { foreach (FileInfo fi in resultFilteredFiles) { Console.WriteLine("Parsing " + fi.FullName); ResultPckg2 thisPckg = ResultPckg2.DeserializeResultPackage(fi.FullName); myResultPackages.Add(new FileInfoResultPackage(fi, thisPckg, myDir.ClassLabel)); foreach (FastaItem fastaItem in thisPckg.MyFasta) { if (!locusFastaDict.ContainsKey(fastaItem.SequenceIdentifier)) { locusFastaDict.Add(fastaItem.SequenceIdentifier, fastaItem.Sequence); } } } }
public void ParseDirs(List <DirectoryClassDescription> dirs) { MyDirectoryDescriptionDictionary = dirs; MyResultPackages = new List <ThePackage>(); foreach (DirectoryClassDescription dcd in dirs) { //Get all Sepro Files in this and in deeper directories FileInfo[] fileInfo = new DirectoryInfo(dcd.MyDirectoryFullName).GetFiles("*.mpex", SearchOption.AllDirectories); foreach (FileInfo file in fileInfo) { //First lets unserialize the object Console.WriteLine("Loading .. " + file.FullName); ResultPckg2 rp = ResultPckg2.DeserializeResultPackage(file.FullName); MyResultPackages.Add(new ThePackage(rp, file, dcd.ClassLabel)); } Console.WriteLine("Done loading."); } }
public ThePackage(ResultPckg2 rp, FileInfo file, int classLabel) { MyPackage = rp; MyFileInfo = file; MyClassLabel = classLabel; }
/// <summary> /// Multicore alignment method. It will feed the top alignments into the DenovoRegistryObject /// </summary> /// <param name="database">A list with a parsed database</param> /// <param name="deNovoRegistryList">A lsit with parsed peptide registries</param> /// <returns>A list with aligned registries.</returns> private ResultPckg2 AlignMAlign(List <FastaItem> database, List <DeNovoRegistry> deNovoRegistryList) { int removedPeptides = deNovoRegistryList.RemoveAll(a => a.Sequence.Length < Arguments.PeptideMinNumAA); Console.WriteLine("Total denovo registries: " + deNovoRegistryList.Count); Dictionary <string, List <DeNovoRegistry> > dnovoRegistryDict = (from registry in deNovoRegistryList group registry by registry.CleanSequence into registryGroup select new { Seq = registryGroup.Key, Registries = registryGroup.ToList() }).ToDictionary(a => a.Seq, a => a.Registries); Console.WriteLine("Total denovo registries after grouping exact sequences: " + dnovoRegistryDict.Keys.Count); //We need to feed the alignment List KeyValuePair <int[, ], Dictionary <char, int> > m = MAligner.Utils.LoadSubstitutionMatrixFromString(PepExplorer2.Properties.Resources.PAM30MS); Aligner mAligner = new Aligner(m); int count = 0; ConcurrentBag <AlignmentResult> alignments = new ConcurrentBag <AlignmentResult>(); ConcurrentBag <KeyValuePair <string, List <DeNovoRegistry> > > unmappedRegistries = new ConcurrentBag <KeyValuePair <string, List <DeNovoRegistry> > >(); Parallel.ForEach(dnovoRegistryDict, kvp => //foreach (KeyValuePair<string, List<DeNovoRegistry>> kvp in dnovoRegistryDict) { Console.WriteLine("Aligning: " + kvp.Key); List <DeNovoR> dRegistries = (from r in kvp.Value select new DeNovoR(r.ScanNumber, r.DeNovoScore, r.PtmSequence, r.Charge, r.FileName)).ToList(); byte[] peptideBytes = Encoding.ASCII.GetBytes(kvp.Key); //Lets take care of palindromic hits that might spoil our result. List <FastaItem> validItems = database.FindAll(a => ((double)mAligner.IDScore(peptideBytes, a.SequenceInBytes) / (double)peptideBytes.Length) > Arguments.MinIdentity).ToList(); if (validItems.Count(a => a.SequenceIdentifier.StartsWith(Arguments.DecoyLabel)) < validItems.Count) { validItems.RemoveAll(a => a.SequenceIdentifier.StartsWith(Arguments.DecoyLabel)); } //---------------- if (validItems.Exists(a => a.SequenceIdentifier.StartsWith(Arguments.DecoyLabel))) { Console.WriteLine("Decoy hit included"); } if (validItems.Count > 0) { //Now we need to find the best proteins char[] peptide = kvp.Key.ToCharArray(); KeyValuePair <List <int>, List <string> > alg = mAligner.GetClosestPeptideInDB(peptide, validItems); //Find the max value and only store the best IDs for this peptide int bestScore = alg.Key.Max(); if (alg.Key.Count == validItems.Count) { List <string> protIDs = new List <string>(); for (int i = 0; i < validItems.Count; i++) { if (alg.Key[i] == bestScore) { protIDs.Add(validItems[i].SequenceIdentifier); } } alignments.Add(new AlignmentResult(dRegistries, bestScore, protIDs)); } else { Console.WriteLine("shouldn't be here, failed in peptide " + kvp.Key); } } else { unmappedRegistries.Add(kvp); } Interlocked.Increment(ref count); Console.WriteLine("Done aligning sequence no {0} ({1}) ", count, kvp.Key); //Report progress Progress = Math.Round((count / (double)dnovoRegistryDict.Keys.Count), 2) * 100; } ); //Lets get only the stuff we used from the DB List <string> usedFastaIDs = (from b in alignments from pID in b.ProtIDs select pID).Distinct().ToList(); List <FastaItem> usedFasta = database.FindAll(a => usedFastaIDs.Contains(a.SequenceIdentifier)); ResultPckg2 resultPckg = new ResultPckg2(usedFasta, Arguments, alignments.ToList(), unmappedRegistries.ToList()); return(resultPckg); }