public ProteinLabelFreeQuantificationBuilder(IProteinLabelfreeQuantificationCalculator calculator, Dictionary <string, List <string> > expsMap, IAccessNumberParser parser) { this.calculator = calculator; this.expsMap = expsMap; this.parser = parser; this.MinSpectrumCount = 5; }
public ITraqUniquePeptideStatisticBuilder(ITraqProteinStatisticOption option, bool isSiteLevel, string fastaFile, IAccessNumberParser parser) : base(option) { this.isSiteLevel = isSiteLevel; this.fastaFile = fastaFile; this.parser = parser; }
public void TestBuild() { List <IIdentifiedSpectrum> spectra = new SequestPeptideTextFormat().ReadFromFile(TestContext.CurrentContext.TestDirectory + "/../../../data/TestBuilder.peptides"); Assert.AreEqual(4, spectra.Count); IAccessNumberParser parser = AccessNumberParserFactory.FindOrCreateParser(@"(IPI\d+)", "IPI"); List <IIdentifiedProtein> proteins = new IdentifiedProteinBuilder().Build(spectra); Assert.AreEqual(4, proteins.Count); List <IIdentifiedProteinGroup> groups = new IdentifiedProteinGroupBuilder().Build(proteins); Assert.AreEqual(2, groups.Count); Assert.AreEqual(1, groups[0].Count); Assert.AreEqual("IPI:IPI00784154.1|SW", groups[0][0].Name); Assert.AreEqual(2, groups[1].Count); Assert.AreEqual("REVERSED_00000001", groups[1][0].Name); Assert.AreEqual("REVERSED_00000002", groups[1][1].Name); IIdentifiedResult result = new IdentifiedResultBuilder(parser, "").Build(groups); }
private static void InitParsers() { ParserFormatList lstFormat = ParserFormatList.ReadFromOptionFile(SECTION_NAME); parsers = GetParsers(lstFormat); autoParser = parsers.Find(m => m.FormatName.Equals(AutoAccessNumberParser.FORMAT_NAME)); if (autoParser == null) { autoParser = DefaultAccessNumberParser.GetInstance(); } }
public MascotSAPValidator(string mutationPattern, string fastaFile, IAccessNumberParser acParser, HashSet <int> charges, string pNovoPeptideFile) { IgnoreNtermMutation = true; IgnoreDeamidatedMutation = true; IgnoreMultipleNucleotideMutation = true; UniprotXmlFile = string.Empty; this.mutationReg = new Regex(mutationPattern); this.fastaFile = fastaFile; this.acParser = acParser; this.charges = charges; this.pNovoPeptideFile = pNovoPeptideFile; }
public static Dictionary<string, string> ReadAccessNumberReferenceMap(ISequenceFormat sf, string filename, IAccessNumberParser parser) { Dictionary<string, string> result = new Dictionary<string, string>(); using (StreamReader sr = new StreamReader(filename)) { Sequence seq; while ((seq = sf.ReadSequence(sr)) != null) { result[parser.GetValue(seq.Name)] = seq.Reference; } } return result; }
public static IAccessNumberParser FindOrCreateParser(string regexString, string formatName) { IAccessNumberParser result = FindParserByRegexPattern(regexString); if (null == result) { result = FindParserByName(formatName); } if (null == result) { result = new NoExceptionAccessNumberParser(new AccessNumberParser(regexString, formatName)); } return(result); }
public static void ResetProteinByAccessNumberParser <T>(List <T> result, IAccessNumberParser acParser) where T : IIdentifiedSpectrum { string pro; foreach (T spectrum in result) { foreach (IIdentifiedPeptide peptide in spectrum.Peptides) { for (int i = 0; i < peptide.Proteins.Count; i++) { if (acParser.TryParse(peptide.Proteins[i], out pro)) { peptide.SetProtein(i, pro); } } } } }
protected override IEnumerable <string> DoProcess(string filename, List <string> result, Dictionary <IFilter <IIdentifiedSpectrum>, SpectrumEntry> map) { string database = filename + ".fasta"; if (!File.Exists(database)) { throw new Exception("Fasta file not exists : " + database); } IAccessNumberParser acParser = AccessNumberParserFactory.GuessParser(database); Dictionary <string, Sequence> seqMap = DatabaseUtils.GetAccessNumberMap(database, acParser); try { using (IdentifiedProteinGroupEnumerator iter = new IdentifiedProteinGroupEnumerator(filename)) { foreach (IFilter <IIdentifiedSpectrum> filter in map.Keys) { SpectrumEntry entry = map[filter]; entry.ResultWriter.WriteLine(iter.ProteinFormat.GetHeader()); entry.ResultWriter.WriteLine(iter.PeptideFormat.GetHeader()); } while (iter.MoveNext()) { IIdentifiedProteinGroup group = iter.Current; List <IIdentifiedSpectrum> spectra = group[0].GetSpectra(); foreach (IFilter <IIdentifiedSpectrum> filter in map.Keys) { SpectrumEntry entry = map[filter]; entry.Spectra.Clear(); foreach (IIdentifiedSpectrum spectrum in spectra) { if (filter.Accept(spectrum)) { entry.Spectra.Add(spectrum); } } if (entry.Spectra.Count > 0) { for (int i = 0; i < group.Count; i++) { entry.ResultWriter.WriteLine("${0}-{1}{2}", group.Index, i + 1, iter.ProteinFormat.GetString(group[i])); string ac = acParser.GetValue(group[i].Name); Sequence seq = seqMap[ac]; entry.FastaWriter.WriteLine(">" + seq.Reference); entry.FastaWriter.WriteLine(seq.SeqString); } foreach (IIdentifiedSpectrum spectrum in entry.Spectra) { entry.ResultWriter.WriteLine(iter.PeptideFormat.GetString(spectrum)); } } } } return(result); } } finally { foreach (SpectrumEntry entry in map.Values) { entry.Dispose(); } } }
public NoExceptionAccessNumberParser(IAccessNumberParser parser) { this.parser = parser; }
public static Dictionary <string, string> ReadAccessNumberReferenceMap(ISequenceFormat sf, string filename, IAccessNumberParser parser) { Dictionary <string, string> result = new Dictionary <string, string>(); using (StreamReader sr = new StreamReader(filename)) { Sequence seq; while ((seq = sf.ReadSequence(sr)) != null) { result[parser.GetValue(seq.Name)] = seq.Reference; } } return(result); }
public NGlycanPeptideBuilder(IAccessNumberParser acParser) { this.parser = acParser; }
protected override List <IIdentifiedSpectrum> DoParse() { IAccessNumberParser parser = options.Parent.Database.GetAccessNumberParser(); var peptideFormat = new SequestPeptideTextFormat() { Progress = this.Progress }; Progress.SetRange(0, options.PathNames.Count + 1); var result = new List <IIdentifiedSpectrum>(); IFilter <IIdentifiedSpectrum> spectrumFilter = options.GetFilter(); SequestOutDirectoryParser outDirParser; SequestOutsParser outsParser; SequestOutZipParser outZipParser; string modStr = ""; if (options.SkipSamePeptideButDifferentModificationSite) { modStr = MyConvert.Format(".M{0:0.00}", options.MaxModificationDeltaCn); outsParser = new SequestOutsParser(true, options.MaxModificationDeltaCn); outDirParser = new SequestOutDirectoryParser(true, options.MaxModificationDeltaCn); outZipParser = new SequestOutZipParser(true, options.MaxModificationDeltaCn); } else { outsParser = new SequestOutsParser(true); outDirParser = new SequestOutDirectoryParser(true); outZipParser = new SequestOutZipParser(true); } outsParser.Progress = Progress; outDirParser.Progress = Progress; outZipParser.Progress = Progress; long afterFirstMemory = 0; DateTime afterFirstTime = DateTime.Now; int stepCount = 0; foreach (string pathName in options.PathNames) { stepCount++; if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } AbstractSequestSpectraDistiller distiller; string engine; if (Directory.Exists(pathName)) { var dir = new DirectoryInfo(pathName); if (dir.GetFiles("*.outs").Length > 0 || dir.GetFiles("*.outs.zip").Length > 0) { distiller = new SequestOutsDistiller(outsParser, peptideFormat); } else { distiller = new SequestOutDirectoryDistiller(outDirParser, peptideFormat); } engine = "SEQUEST"; } else if (pathName.ToLower().EndsWith(".xml")) { distiller = new CometSpectraDistiller(peptideFormat); engine = "COMET"; } else if (pathName.ToLower().EndsWith(".msf")) { distiller = new MsfSpectraDistiller(peptideFormat); engine = "PD"; } else //zipfile { ISpectrumParser zipParser; if (ZipUtils.HasFile(pathName, m => m.ToLower().EndsWith(".out"))) { zipParser = outZipParser; } else { zipParser = outsParser; } distiller = new SequestOutZipDistiller(zipParser, peptideFormat); engine = "SEQUEST"; } distiller.Progress = this.Progress; List <IIdentifiedSpectrum> curPeptides = distiller.ParseSpectra(pathName, modStr, stepCount, options.PathNames.Count); int curPeptideCount = curPeptides.Count; if (null != spectrumFilter) { curPeptides.RemoveAll(m => !spectrumFilter.Accept(m)); } curPeptides.ForEach(m => { m.Tag = options.Name; m.Engine = engine; }); result.AddRange(curPeptides); curPeptides = null; GC.Collect(); GC.WaitForPendingFinalizers(); if (stepCount == 1) { afterFirstMemory = Process.GetCurrentProcess().WorkingSet64 / (1024 * 1024); afterFirstTime = DateTime.Now; } else { long currMemory = Process.GetCurrentProcess().WorkingSet64 / (1024 * 1024); double averageCost = (double)(currMemory - afterFirstMemory) / (stepCount - 1); double estimatedCost = afterFirstMemory + averageCost * options.PathNames.Count; DateTime currTime = DateTime.Now; var averageTime = currTime.Subtract(afterFirstTime).TotalMinutes / (stepCount - 1); var finishTime = afterFirstTime.AddMinutes(averageTime * (options.PathNames.Count - 1)); Console.WriteLine("{0}/{1}, cost {2}M, avg {3:0.0}M, need {4:0.0}M, will finish at {5:MM-dd HH:mm:ss}", stepCount, options.PathNames.Count, currMemory, averageCost, estimatedCost, finishTime.ToString()); } } return(result); }
public static void FillSequenceFromFasta(string fastaFilename, IIdentifiedResult t, IProgressCallback progress) { IAccessNumberParser acParser = AccessNumberParserFactory.GuessParser(fastaFilename); FillSequenceFromFasta(acParser, fastaFilename, t, progress); }
public ExtractFastaByAccessNumberProcessor(IAccessNumberParser parser, string database, bool replaceName) { this.parser = parser; this.database = database; this.replaceName = replaceName; }
protected override void InitializeFromOption(string optionFile) { base.InitializeFromOption(optionFile); parser = AccessNumberParserFactory.GetAutoParser(); }