private void mnuOpen_Click(object sender, EventArgs e)
        {
            var dlg = new ITraqOpenFileDialog();

            dlg.LoadOption();

            if (dlg.ShowDialog(this) == System.Windows.Forms.DialogResult.OK)
            {
                proteinFile = dlg.ProteinFile;
                itrapFile   = dlg.ITraqFile;

                var paramFile = Path.ChangeExtension(dlg.ProteinFile, "param");
                if (!File.Exists(paramFile))
                {
                    MessageBox.Show(this, "Error find parameter file " + paramFile);
                    return;
                }

                option = new ITraqProteinStatisticOption();
                option.LoadFromFile(paramFile);

                Progress.Begin();

                var task = Task.Factory.StartNew(() => LoadData(), TaskCreationOptions.LongRunning);
                task.ContinueWith((m) => this.Invoke(new Action(UpdateData)), TaskContinuationOptions.OnlyOnRanToCompletion);
            }
        }
        protected List <PeakList <Peak> > MergePeakList(List <PeakList <Peak> > pklList)
        {
            int index = 0;

            Progress.SetRange(0, pklList.Count);
            Progress.Begin();
            try
            {
                while (index < pklList.Count)
                {
                    if (Progress.IsCancellationPending())
                    {
                        throw new UserTerminatedException();
                    }
                    Progress.SetPosition(index);

                    PeakList <Peak> currentPkl = pklList[index];
                    double          maxGap     = PrecursorUtils.ppm2mz(currentPkl.PrecursorMZ, this.ppmPrecursorTolerance);

                    int next = index + 1;
                    while (next < pklList.Count)
                    {
                        PeakList <Peak> nextPkl          = pklList[next];
                        double          retentionTimeGap = nextPkl.ScanTimes[0].RetentionTime -
                                                           currentPkl.ScanTimes[0].RetentionTime;
                        if (retentionTimeGap > this.retentionTimeTolerance)
                        {
                            break;
                        }

                        if (nextPkl.PrecursorCharge != currentPkl.PrecursorCharge)
                        {
                            next++;
                            continue;
                        }

                        double precursorMzGap = Math.Abs(nextPkl.PrecursorMZ - currentPkl.PrecursorMZ);
                        if (precursorMzGap < maxGap)
                        {
                            currentPkl.MergeByMZFirst(nextPkl, this.ppmPeakTolerance);
                            pklList.RemoveAt(next);
                            continue;
                        }

                        next++;
                    }
                    index++;
                }
                Progress.SetPosition(pklList.Count);
            }
            finally
            {
                Progress.End();
            }

            return(pklList);
        }
        public override IEnumerable <string> Process()
        {
            var expRawfileMap = options.RawFiles.ToDictionary(m => Path.GetFileNameWithoutExtension(m));

            Progress.SetMessage("Reading library file ...");
            var liblist = new MS2ItemXmlFormat().ReadFromFile(options.LibraryFile);

            PreprocessingMS2ItemList(liblist);

            var lib = liblist.GroupBy(m => m.Charge).ToDictionary(m => m.Key, m => m.ToList());

            Progress.SetMessage("Building library sequence amino acid composition ...");
            lib.ForEach(m => m.Value.ForEach(l => l.AminoacidCompsition = (from a in l.Peptide
                                                                           where options.SubstitutionDeltaMassMap.ContainsKey(a)
                                                                           select a).Distinct().OrderBy(k => k).ToArray()));

            var expScanMap = (from p in liblist
                              from sq in p.FileScans
                              select sq).ToList().GroupBy(m => m.Experimental).ToDictionary(m => m.Key, m => new HashSet <int>(from l in m select l.FirstScan));

            if (File.Exists(options.PeptidesFile))
            {
                Progress.SetMessage("Reading peptides file used for excluding scan ...");
                var peptides = new MascotPeptideTextFormat().ReadFromFile(options.PeptidesFile);
                foreach (var pep in peptides)
                {
                    HashSet <int> scans;
                    if (!expScanMap.TryGetValue(pep.Query.FileScan.Experimental, out scans))
                    {
                        scans = new HashSet <int>();
                        expScanMap[pep.Query.FileScan.Experimental] = scans;
                    }
                    scans.Add(pep.Query.FileScan.FirstScan);
                }
            }

            Progress.SetMessage("Reading MS2/MS3 data ...");
            var result = GetCandidateMs2ItemList(expRawfileMap, expScanMap);

            PreprocessingMS2ItemList(result);

            //new MS2ItemXmlFormat().WriteToFile(options.OutputFile + ".xml", result);

            Progress.SetMessage("Finding SAP ...");
            List <SapPredicted> predicted = new List <SapPredicted>();

            var minDeltaMass = options.SubstitutionDeltaMassMap.Values.Min(l => l.Min(k => k.DeltaMass));
            var maxDeltaMass = options.SubstitutionDeltaMassMap.Values.Max(l => l.Max(k => k.DeltaMass));

            Progress.SetRange(0, result.Count);
            Progress.Begin();

            FindCandidates(lib, result, predicted, minDeltaMass, maxDeltaMass);

            var groups = predicted.ToGroupDictionary(m => m.Ms2.GetFileScans());

            predicted.Clear();
            foreach (var g in groups.Values)
            {
                var gg = g.ToGroupDictionary(m => m.LibMs2).Values.ToList();
                gg.Sort((m1, m2) =>
                {
                    return(CompareSapPrecitedList(m1, m2));
                });

                var expect = gg[0].FirstOrDefault(m => m.IsExpect);
                if (expect != null)
                {
                    predicted.Add(expect);
                }
                else
                {
                    predicted.AddRange(gg[0]);
                    for (int i = 1; i < gg.Count; i++)
                    {
                        if (CompareSapPrecitedList(gg[0], gg[i]) == 0)
                        {
                            predicted.AddRange(gg[i]);
                        }
                        else
                        {
                            break;
                        }
                    }
                }
            }

            if (File.Exists(options.MatchedFile))
            {
                new SapPredictedValidationWriter(options.MatchedFile).WriteToFile(options.OutputFile, predicted);
            }
            else
            {
                new SapPredictedWriter().WriteToFile(options.OutputTableFile, predicted);

                Progress.SetMessage("Generating SAP sequence ...");
                List <Sequence> predictedSeq = new List <Sequence>();
                foreach (var predict in predicted)
                {
                    var seq = PeptideUtils.GetPureSequence(predict.LibMs2.Peptide);
                    if (predict.Target.TargetType == VariantType.SingleAminoacidPolymorphism)
                    {
                        for (int i = 0; i < seq.Length; i++)
                        {
                            if (seq[i] == predict.Target.Source[0])
                            {
                                foreach (var t in predict.Target.Target)
                                {
                                    string targetSeq;
                                    if (i == 0)
                                    {
                                        targetSeq = t + seq.Substring(1);
                                    }
                                    else
                                    {
                                        targetSeq = seq.Substring(0, i) + t + seq.Substring(i + 1);
                                    }

                                    var reference = string.Format("sp|SAP_{0}_{1}|{2}_{3}_{4}_{5}", targetSeq, predict.Target.TargetType, seq, predict.Target.Source, i + 1, t);
                                    predictedSeq.Add(new Sequence(reference, targetSeq));
                                }
                            }
                        }
                    }
                    else
                    {
                        foreach (var tseq in predict.Target.Target)
                        {
                            string reference;
                            if (predict.Target.TargetType == VariantType.NTerminalLoss)
                            {
                                reference = string.Format("sp|SAP_{0}_{1}|{2}_loss_{3}", tseq, predict.Target.TargetType, seq, seq.Substring(0, seq.Length - tseq.Length));
                            }
                            else if (predict.Target.TargetType == VariantType.CTerminalLoss)
                            {
                                reference = string.Format("sp|SAP_{0}_{1}|{2}_loss_{3}", tseq, predict.Target.TargetType, seq, seq.Substring(tseq.Length));
                            }
                            else if (predict.Target.TargetType == VariantType.NTerminalExtension)
                            {
                                reference = string.Format("sp|SAP_{0}_{1}|{2}_ext_{3}", tseq, predict.Target.TargetType, seq, tseq.Substring(0, tseq.Length - seq.Length));
                            }
                            else if (predict.Target.TargetType == VariantType.CTerminalExtension)
                            {
                                reference = string.Format("sp|SAP_{0}_{1}|{2}_ext_{3}", tseq, predict.Target.TargetType, seq, tseq.Substring(seq.Length));
                            }
                            else
                            {
                                throw new Exception("I don't know how to deal with " + predict.Target.TargetType.ToString());
                            }

                            predictedSeq.Add(new Sequence(reference, tseq));
                        }
                    }
                }

                predictedSeq = (from g in predictedSeq.GroupBy(m => m.SeqString)
                                select g.First()).ToList();

                Progress.SetMessage("Reading database {0} ...", options.DatabaseFastaFile);
                var databases = SequenceUtils.Read(options.DatabaseFastaFile);

                Progress.SetMessage("Removing variant sequences which are already existed in database ...");
                for (int i = predictedSeq.Count - 1; i >= 0; i--)
                {
                    foreach (var db in databases)
                    {
                        if (db.SeqString.Contains(predictedSeq[i].SeqString))
                        {
                            predictedSeq.RemoveAt(i);
                            break;
                        }
                    }
                }
                databases.AddRange(predictedSeq);

                Progress.SetMessage("Writing SAP sequence and original database to {0} ...", options.OutputFile);

                SequenceUtils.Write(new FastaFormat(), options.OutputFile, databases);
            }

            Progress.End();

            return(new string[] { options.OutputFile, options.OutputTableFile });
        }
Пример #4
0
 public void Begin()
 {
     Progress.Begin();
 }
        protected List <PeakList <Peak> > ReadTandemMassFromRaw(FileInfo rawFilename, List <int> ignoreScans)
        {
            string experimental = FileUtils.ChangeExtension(rawFilename.Name, "");

            var result = new List <PeakList <Peak> >();

            bool bReadAgain = false;

            rawReader.Open(rawFilename.FullName);
            try
            {
                int firstSpectrumNumber = rawReader.GetFirstSpectrumNumber();
                int lastSpectrumNumber  = rawReader.GetLastSpectrumNumber();

                Progress.SetRange(firstSpectrumNumber, lastSpectrumNumber);
                Progress.Begin();
                try
                {
                    for (int scan = firstSpectrumNumber; scan <= lastSpectrumNumber; scan++)
                    {
                        if (Progress.IsCancellationPending())
                        {
                            throw new UserTerminatedException();
                        }

                        if (ignoreScans.Contains(scan))
                        {
                            continue;
                        }

                        Progress.SetPosition(scan);

                        int msLevel = rawReader.GetMsLevel(scan);

                        if (msLevel > 1)
                        {
                            PeakList <Peak> pkl;
                            try
                            {
                                pkl = rawReader.GetPeakList(scan);
                            }
                            catch (RawReadException ex)
                            {
                                ignoreScans.Add(ex.Scan);
                                File.WriteAllLines(GetIgnoreScanFile(rawFilename), (from i in ignoreScans
                                                                                    let s = i.ToString()
                                                                                            select s).ToArray());
                                bReadAgain = true;
                                break;
                            }
                            pkl.Precursor    = rawReader.GetPrecursorPeakWithMasterScan(scan);
                            pkl.MsLevel      = msLevel;
                            pkl.Experimental = experimental;
                            pkl.ScanTimes.Add(new ScanTime(scan, rawReader.ScanToRetentionTime(scan)));
                            pkl.ScanMode = rawReader.GetScanMode(scan);

                            if (pkl.PrecursorCharge == 0)
                            {
                                pkl.PrecursorCharge = PrecursorUtils.GuessPrecursorCharge(pkl, pkl.PrecursorMZ);
                            }

                            PeakList <Peak> pklProcessed = this.pklProcessor.Process(pkl);
                            if (null != pklProcessed && pklProcessed.Count > 0)
                            {
                                result.Add(pklProcessed);
                            }
                        }
                    }
                }
                finally
                {
                    Progress.End();
                }
            }
            finally
            {
                rawReader.Close();
            }

            if (bReadAgain)
            {
                return(ReadTandemMassFromRaw(rawFilename, ignoreScans));
            }
            else
            {
                return(result);
            }
        }