private void mnuOpen_Click(object sender, EventArgs e) { var dlg = new ITraqOpenFileDialog(); dlg.LoadOption(); if (dlg.ShowDialog(this) == System.Windows.Forms.DialogResult.OK) { proteinFile = dlg.ProteinFile; itrapFile = dlg.ITraqFile; var paramFile = Path.ChangeExtension(dlg.ProteinFile, "param"); if (!File.Exists(paramFile)) { MessageBox.Show(this, "Error find parameter file " + paramFile); return; } option = new ITraqProteinStatisticOption(); option.LoadFromFile(paramFile); Progress.Begin(); var task = Task.Factory.StartNew(() => LoadData(), TaskCreationOptions.LongRunning); task.ContinueWith((m) => this.Invoke(new Action(UpdateData)), TaskContinuationOptions.OnlyOnRanToCompletion); } }
protected List <PeakList <Peak> > MergePeakList(List <PeakList <Peak> > pklList) { int index = 0; Progress.SetRange(0, pklList.Count); Progress.Begin(); try { while (index < pklList.Count) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } Progress.SetPosition(index); PeakList <Peak> currentPkl = pklList[index]; double maxGap = PrecursorUtils.ppm2mz(currentPkl.PrecursorMZ, this.ppmPrecursorTolerance); int next = index + 1; while (next < pklList.Count) { PeakList <Peak> nextPkl = pklList[next]; double retentionTimeGap = nextPkl.ScanTimes[0].RetentionTime - currentPkl.ScanTimes[0].RetentionTime; if (retentionTimeGap > this.retentionTimeTolerance) { break; } if (nextPkl.PrecursorCharge != currentPkl.PrecursorCharge) { next++; continue; } double precursorMzGap = Math.Abs(nextPkl.PrecursorMZ - currentPkl.PrecursorMZ); if (precursorMzGap < maxGap) { currentPkl.MergeByMZFirst(nextPkl, this.ppmPeakTolerance); pklList.RemoveAt(next); continue; } next++; } index++; } Progress.SetPosition(pklList.Count); } finally { Progress.End(); } return(pklList); }
public override IEnumerable <string> Process() { var expRawfileMap = options.RawFiles.ToDictionary(m => Path.GetFileNameWithoutExtension(m)); Progress.SetMessage("Reading library file ..."); var liblist = new MS2ItemXmlFormat().ReadFromFile(options.LibraryFile); PreprocessingMS2ItemList(liblist); var lib = liblist.GroupBy(m => m.Charge).ToDictionary(m => m.Key, m => m.ToList()); Progress.SetMessage("Building library sequence amino acid composition ..."); lib.ForEach(m => m.Value.ForEach(l => l.AminoacidCompsition = (from a in l.Peptide where options.SubstitutionDeltaMassMap.ContainsKey(a) select a).Distinct().OrderBy(k => k).ToArray())); var expScanMap = (from p in liblist from sq in p.FileScans select sq).ToList().GroupBy(m => m.Experimental).ToDictionary(m => m.Key, m => new HashSet <int>(from l in m select l.FirstScan)); if (File.Exists(options.PeptidesFile)) { Progress.SetMessage("Reading peptides file used for excluding scan ..."); var peptides = new MascotPeptideTextFormat().ReadFromFile(options.PeptidesFile); foreach (var pep in peptides) { HashSet <int> scans; if (!expScanMap.TryGetValue(pep.Query.FileScan.Experimental, out scans)) { scans = new HashSet <int>(); expScanMap[pep.Query.FileScan.Experimental] = scans; } scans.Add(pep.Query.FileScan.FirstScan); } } Progress.SetMessage("Reading MS2/MS3 data ..."); var result = GetCandidateMs2ItemList(expRawfileMap, expScanMap); PreprocessingMS2ItemList(result); //new MS2ItemXmlFormat().WriteToFile(options.OutputFile + ".xml", result); Progress.SetMessage("Finding SAP ..."); List <SapPredicted> predicted = new List <SapPredicted>(); var minDeltaMass = options.SubstitutionDeltaMassMap.Values.Min(l => l.Min(k => k.DeltaMass)); var maxDeltaMass = options.SubstitutionDeltaMassMap.Values.Max(l => l.Max(k => k.DeltaMass)); Progress.SetRange(0, result.Count); Progress.Begin(); FindCandidates(lib, result, predicted, minDeltaMass, maxDeltaMass); var groups = predicted.ToGroupDictionary(m => m.Ms2.GetFileScans()); predicted.Clear(); foreach (var g in groups.Values) { var gg = g.ToGroupDictionary(m => m.LibMs2).Values.ToList(); gg.Sort((m1, m2) => { return(CompareSapPrecitedList(m1, m2)); }); var expect = gg[0].FirstOrDefault(m => m.IsExpect); if (expect != null) { predicted.Add(expect); } else { predicted.AddRange(gg[0]); for (int i = 1; i < gg.Count; i++) { if (CompareSapPrecitedList(gg[0], gg[i]) == 0) { predicted.AddRange(gg[i]); } else { break; } } } } if (File.Exists(options.MatchedFile)) { new SapPredictedValidationWriter(options.MatchedFile).WriteToFile(options.OutputFile, predicted); } else { new SapPredictedWriter().WriteToFile(options.OutputTableFile, predicted); Progress.SetMessage("Generating SAP sequence ..."); List <Sequence> predictedSeq = new List <Sequence>(); foreach (var predict in predicted) { var seq = PeptideUtils.GetPureSequence(predict.LibMs2.Peptide); if (predict.Target.TargetType == VariantType.SingleAminoacidPolymorphism) { for (int i = 0; i < seq.Length; i++) { if (seq[i] == predict.Target.Source[0]) { foreach (var t in predict.Target.Target) { string targetSeq; if (i == 0) { targetSeq = t + seq.Substring(1); } else { targetSeq = seq.Substring(0, i) + t + seq.Substring(i + 1); } var reference = string.Format("sp|SAP_{0}_{1}|{2}_{3}_{4}_{5}", targetSeq, predict.Target.TargetType, seq, predict.Target.Source, i + 1, t); predictedSeq.Add(new Sequence(reference, targetSeq)); } } } } else { foreach (var tseq in predict.Target.Target) { string reference; if (predict.Target.TargetType == VariantType.NTerminalLoss) { reference = string.Format("sp|SAP_{0}_{1}|{2}_loss_{3}", tseq, predict.Target.TargetType, seq, seq.Substring(0, seq.Length - tseq.Length)); } else if (predict.Target.TargetType == VariantType.CTerminalLoss) { reference = string.Format("sp|SAP_{0}_{1}|{2}_loss_{3}", tseq, predict.Target.TargetType, seq, seq.Substring(tseq.Length)); } else if (predict.Target.TargetType == VariantType.NTerminalExtension) { reference = string.Format("sp|SAP_{0}_{1}|{2}_ext_{3}", tseq, predict.Target.TargetType, seq, tseq.Substring(0, tseq.Length - seq.Length)); } else if (predict.Target.TargetType == VariantType.CTerminalExtension) { reference = string.Format("sp|SAP_{0}_{1}|{2}_ext_{3}", tseq, predict.Target.TargetType, seq, tseq.Substring(seq.Length)); } else { throw new Exception("I don't know how to deal with " + predict.Target.TargetType.ToString()); } predictedSeq.Add(new Sequence(reference, tseq)); } } } predictedSeq = (from g in predictedSeq.GroupBy(m => m.SeqString) select g.First()).ToList(); Progress.SetMessage("Reading database {0} ...", options.DatabaseFastaFile); var databases = SequenceUtils.Read(options.DatabaseFastaFile); Progress.SetMessage("Removing variant sequences which are already existed in database ..."); for (int i = predictedSeq.Count - 1; i >= 0; i--) { foreach (var db in databases) { if (db.SeqString.Contains(predictedSeq[i].SeqString)) { predictedSeq.RemoveAt(i); break; } } } databases.AddRange(predictedSeq); Progress.SetMessage("Writing SAP sequence and original database to {0} ...", options.OutputFile); SequenceUtils.Write(new FastaFormat(), options.OutputFile, databases); } Progress.End(); return(new string[] { options.OutputFile, options.OutputTableFile }); }
public void Begin() { Progress.Begin(); }
protected List <PeakList <Peak> > ReadTandemMassFromRaw(FileInfo rawFilename, List <int> ignoreScans) { string experimental = FileUtils.ChangeExtension(rawFilename.Name, ""); var result = new List <PeakList <Peak> >(); bool bReadAgain = false; rawReader.Open(rawFilename.FullName); try { int firstSpectrumNumber = rawReader.GetFirstSpectrumNumber(); int lastSpectrumNumber = rawReader.GetLastSpectrumNumber(); Progress.SetRange(firstSpectrumNumber, lastSpectrumNumber); Progress.Begin(); try { for (int scan = firstSpectrumNumber; scan <= lastSpectrumNumber; scan++) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } if (ignoreScans.Contains(scan)) { continue; } Progress.SetPosition(scan); int msLevel = rawReader.GetMsLevel(scan); if (msLevel > 1) { PeakList <Peak> pkl; try { pkl = rawReader.GetPeakList(scan); } catch (RawReadException ex) { ignoreScans.Add(ex.Scan); File.WriteAllLines(GetIgnoreScanFile(rawFilename), (from i in ignoreScans let s = i.ToString() select s).ToArray()); bReadAgain = true; break; } pkl.Precursor = rawReader.GetPrecursorPeakWithMasterScan(scan); pkl.MsLevel = msLevel; pkl.Experimental = experimental; pkl.ScanTimes.Add(new ScanTime(scan, rawReader.ScanToRetentionTime(scan))); pkl.ScanMode = rawReader.GetScanMode(scan); if (pkl.PrecursorCharge == 0) { pkl.PrecursorCharge = PrecursorUtils.GuessPrecursorCharge(pkl, pkl.PrecursorMZ); } PeakList <Peak> pklProcessed = this.pklProcessor.Process(pkl); if (null != pklProcessed && pklProcessed.Count > 0) { result.Add(pklProcessed); } } } } finally { Progress.End(); } } finally { rawReader.Close(); } if (bReadAgain) { return(ReadTandemMassFromRaw(rawFilename, ignoreScans)); } else { return(result); } }