public static bool Run()
        {
            string      outputPath = @"C:\_IRIC\DATA\Test\testMhc\Stats\";
            vsCSVWriter writer     = new vsCSVWriter(outputPath + "output.csv");

            writer.AddLine("File,# MS1s,# MSMS,1 Charge,2 Charge,3 Charge,4 Charge,5 Charge,6 Charge,7 Charge,8 Charge,9 Charge,10 Charge,11 Charge,12 Charge,13 Charge,14 Charge");

            DBOptions options = MhcSample.CreateOptions(outputPath);

            string[] files = new string[] { @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL.raw",
                                            @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL_MS60_MSMS15.raw",
                                            @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL_MS60_MSMS30.raw",
                                            @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL_MS60_MSMS60.raw",
                                            @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL_MS120_MSMS15.raw",
                                            @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL_MS120_MSMS30.raw",
                                            @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL_MS120_MSMS60.raw" };
            foreach (string file in files)
            {
                pwiz.CLI.msdata.MSDataFile msFile = new pwiz.CLI.msdata.MSDataFile(file);
                Spectra spectra = Spectra.Load(msFile, options, file);
                spectra.Sort(ProductSpectrum.AscendingPrecursorMassComparison);

                Dictionary <Track, Precursor> DicOfComputedTracks = new Dictionary <Track, Precursor>();
                int[] charges = new int[14];
                foreach (Track track in spectra.tracks)
                {
                    if (!DicOfComputedTracks.ContainsKey(track))
                    {
                        DicOfComputedTracks.Add(track, null);
                        int charge = 0;
                        foreach (Precursor precursor in Queries.GetIsotopes(track, options, spectra.tracks, null))
                        {
                            if (precursor.Charge > 0)
                            {
                                charge = precursor.Charge;
                            }
                            if (!DicOfComputedTracks.ContainsKey(precursor.Track))
                            {
                                DicOfComputedTracks.Add(precursor.Track, precursor);
                            }
                        }
                        charges[charge]++;
                    }
                }
                string line = file + "," + spectra.MS1s.Count + "," + spectra.Count;
                for (int i = 0; i < charges.Length; i++)
                {
                    line += "," + charges[i];
                }
                writer.AddLine(line);
            }
            writer.WriteToFile();
            return(true);
        }
Exemple #2
0
        /// <summary>
        /// Loads spectra from Raw files
        /// </summary>
        /// <returns></returns>
        public Dictionary <Sample, Spectra> LoadSpectras(bool loadMS = true, bool filterMS2 = true)
        {
            //TODO test compatibility with QExactive, mzML ... other known formats
            AllSpectras = new Dictionary <Sample, Spectra>();
            for (int i = 0; i < Project.Count; i++)
            {
                Sample sample      = Project[i];
                string trackFile   = vsCSV.GetFolder(sample.sSDF) + vsCSV.GetFileName_NoExtension(sample.sSDF) + "_Tracks.csv";
                string msmsIonFile = vsCSV.GetFolder(sample.sSDF) + vsCSV.GetFileName_NoExtension(sample.sSDF) + "_MSMSIons.csv";
                if (dbOptions.LoadSpectraIfFound && System.IO.File.Exists(trackFile) &&
                    System.IO.File.Exists(msmsIonFile))
                {
                    dbOptions.ConSole.WriteLine("Loading Sectra from " + trackFile + " AND " + msmsIonFile);
                    if (loadMS)
                    {
                        AllSpectras.Add(sample, Spectra.Import(msmsIonFile, trackFile, dbOptions));
                    }
                    else
                    {
                        AllSpectras.Add(sample, Spectra.Import(msmsIonFile, null, dbOptions));
                    }
                }
                else
                {
                    dbOptions.ConSole.WriteLine("Loading Sectra " + sample.sSDF);

                    pwiz.CLI.msdata.MSDataFile msFile = new pwiz.CLI.msdata.MSDataFile(sample.sSDF);
                    Spectra spectra = Spectra.Load(msFile, dbOptions, sample.sSDF, loadMS, filterMS2);
                    spectra.Sort(ProductSpectrum.AscendingPrecursorMassComparison);

                    dbOptions.ConSole.WriteLine(sample.sSDF + " [" + spectra.Count + " msms scans]");
                    if (dbOptions.SaveMS1Peaks)
                    {
                        spectra.ExportTracks(trackFile);
                    }

                    if (dbOptions.SaveMSMSPeaks)
                    {
                        spectra.ExportMSMS(msmsIonFile);
                    }

                    AllSpectras.Add(sample, spectra);
                }
            }
            return(AllSpectras);
        }
Exemple #3
0
        public static void ToCSV(string rawFileName, string csvOutFileName)
        {
            vsCSVWriter csvWriter = new vsCSVWriter(csvOutFileName);

            csvWriter.AddLine("Scan Number,Retention Time (min),Ms Level");

            pwiz.CLI.msdata.MSDataFile msFile = new pwiz.CLI.msdata.MSDataFile(rawFileName);

            int num_spectra = msFile.run.spectrumList.size();

            for (int i = 0; i < num_spectra; i++)
            {
                //Spectrum
                pwiz.CLI.msdata.Spectrum mySpec = msFile.run.spectrumList.spectrum(i, false);

                double retention_time = mySpec.scanList.scans[0].cvParam(pwiz.CLI.cv.CVID.MS_scan_start_time).timeInSeconds() / 60.0;
                csvWriter.AddLine((i + 1) + "," + retention_time + "," + mySpec.cvParam(pwiz.CLI.cv.CVID.MS_ms_level).value);
            }
            csvWriter.WriteToFile();
        }
Exemple #4
0
        private void CreateNewLibrary(BackgroundWorker bg)
        {
            bg.ReportProgress(-1, "Querying spectra...");
            IList<object[]> queryRows;
            lock (_session)
                queryRows = _session.CreateSQLQuery(@"SELECT s.Id, source.Name, NativeID, PrecursorMZ
                                                        FROM UnfilteredSpectrum s
                                                        JOIN SpectrumSource source ON s.Source = source.Id
                                                        JOIN UnfilteredPeptideSpectrumMatch psm ON s.Id = psm.Spectrum
                                                        JOIN Peptide p ON p.Id = psm.Peptide
                                                        JOIN PeptideSpectrumMatchScore psmScore ON psm.Id = psmScore.PsmId
                                                        JOIN PeptideSpectrumMatchScoreName scoreName ON psmScore.ScoreNameId=scoreName.Id
                                                        GROUP BY s.Id"
                    ).List<object[]>();
            var foundSpectraList =
                _session.CreateSQLQuery(@"SELECT distinct spectrum FROM PeptideSpectrumMatch").List<object>();
            var foundSpectra = new HashSet<long>();
            {
                long tempLong;
                foreach (var item in foundSpectraList)
                    if (long.TryParse(item.ToString(), out tempLong))
                        foundSpectra.Add(tempLong);
            }

            var spectrumRows =
                queryRows.Select(o => new RescuePSMsForm.SpectrumRow(o)).OrderBy(o => o.SourceName).ToList();
            ////converted IOrderedEnumerable to List, the former one may end up with multiple enumeration, each invokes constructor, resulting a fresh set of object

            /*
             * extract peaks for each spectrum, spectrumRows was sorted by SourceName
            */
            string currentSourceName = null;
            string currentSourcePath = null;
            pwiz.CLI.msdata.MSData msd = null;
            int spectrumRowsCount = spectrumRows.Count();
            //Set<long> processedSpectrumIDs = new Set<long>();

            bg.ReportProgress(-1, string.Format("Extracting peaks for {0} spectra ... ", spectrumRowsCount));

            //// create a temp table to store clustered spectrum IDs
            _session.CreateSQLQuery(@"DROP TABLE IF EXISTS SpectralPeaks").ExecuteUpdate();
            _session.CreateSQLQuery(
                @"CREATE TABLE IF NOT EXISTS SpectralPeaks (Id INTEGER PRIMARY KEY, spectra INTEGER, mz STRING, intensity STRING, mods STRING, charge INTEGER, mass REAL, preAA STRING, postAA STRING, sequence STRING, protein STRING, origPeptide STRING, annotations STRING, numSpectraUsed INTEGER)
                                    ").ExecuteUpdate();
            var peaklist = new List<object[]>();
            var currentSource = string.Empty;

            var sourcesSeen = new HashSet<string>();
            var totalSources = spectrumRows.Select(x => x.SourceName).Distinct().Count();
            lock (_owner)
                for (int i = 0; i < spectrumRowsCount; ++i)
                {
                    var row = spectrumRows.ElementAt(i);

                    if (row.SourceName != currentSource)
                    {
                        sourcesSeen.Add(row.SourceName);
                        var saving = _session.BeginTransaction();
                        saving.Begin();
                        var insertPeakscmd = _session.Connection.CreateCommand();
                        insertPeakscmd.CommandText = "INSERT INTO SpectralPeaks (spectra, mz, intensity) VALUES (?,?,?)";
                        var insertPeakParameters = new List<System.Data.IDbDataParameter>();
                        for (int x = 0; x < 3; ++x)
                        {
                            insertPeakParameters.Add(insertPeakscmd.CreateParameter());
                            insertPeakscmd.Parameters.Add(insertPeakParameters[x]);
                        }
                        insertPeakscmd.Prepare();
                        for (var y = 0; y < peaklist.Count; y++)
                        {
                            insertPeakParameters[0].Value = peaklist[y][0];
                            insertPeakParameters[1].Value = peaklist[y][1];
                            insertPeakParameters[2].Value = peaklist[y][2];
                            insertPeakscmd.ExecuteNonQuery();
                        }
                        saving.Commit();
                        currentSource = row != null ? row.SourceName : string.Empty;
                        peaklist = new List<object[]>();
                    }

                    bg.ReportProgress((int) (((i + 1)/(double) spectrumRowsCount)*100),
                                      string.Format("Extracting peaks for {0} spectra (File {1} of {2})", spectrumRowsCount, sourcesSeen.Count, totalSources));

                    //if (processedSpectrumIDs.Contains(row.SpectrumId))
                    //    break;
                    if (row.SourceName != currentSourceName)
                    {
                        currentSourceName = row.SourceName;
                        currentSourcePath = IDPickerForm.LocateSpectrumSource(currentSourceName,
                                                                              _session.Connection.GetDataSource());
                        if (msd != null)
                            msd.Dispose();

                        //var entryCount = session.CreateSQLQuery(string.Format(
                        //    "SELECT count() FROM SpectralPeaks p " +
                        //    "JOIN Spectrum s ON p.Spectra = s.Id " +
                        //    "JOIN SpectrumSource ss ON ss.Id = s.Source " +
                        //    "WHERE ss.Name= '{0}'", currentSourceName)).List<object>().FirstOrDefault() ?? string.Empty;


                        msd = new pwiz.CLI.msdata.MSDataFile(currentSourcePath);
                        SpectrumListFactory.wrap(msd, "threshold count 100 most-intense");
                        //only keep the top 100 peaks
                        //SpectrumListFactory.wrap(msd, "threshold bpi-relative .5 most-intense"); //keep all peaks that are at least 50% of the intensity of the base peak
                        //SpectrumListFactory.wrap(msd, "threshold tic-cutoff .95 most-intense"); //keep all peaks that count for 95% TIC
                        //threshold <count|count-after-ties|absolute|bpi-relative|tic-relative|tic-cutoff> <threshold> <most-intense|least-intense> [int_set(MS levels)]
                    }
                    var spectrumList = msd.run.spectrumList;
                    var pwizSpectrum = spectrumList.spectrum(spectrumList.find(row.SpectrumNativeID), true);
                    //may create indexoutofrange error if no spectrum nativeID                   
                    var mzList = pwizSpectrum.getMZArray().data; //getMZArray().data returns IList<double>
                    var intensityList = pwizSpectrum.getIntensityArray().data;
                    for (var x = 0; x < mzList.Count; x++)
                    {
                        mzList[x] = Math.Round(mzList[x], 4);
                        intensityList[x] = Math.Round(intensityList[x], 1);
                    }
                    if (mzList.Count == intensityList.Count)
                        peaklist.Add(new object[]
                                         {row.SpectrumId, string.Join("|", mzList), string.Join("|", intensityList)});
                    else
                        MessageBox.Show(row.SpectrumId.ToString());
                    //processedSpectrumIDs.Add(row.SpectrumId);
                }

            bg.ReportProgress(-1, string.Format("Saving extracted peaks from {0}... ", currentSource));
            var newTransaction = _session.BeginTransaction();
            newTransaction.Begin();
            var newInsertPeakscmd = _session.Connection.CreateCommand();
            newInsertPeakscmd.CommandText = "INSERT INTO SpectralPeaks (spectra, mz, intensity) VALUES (?,?,?)";
            var newInsertPeakParameters = new List<System.Data.IDbDataParameter>();
            for (int x = 0; x < 3; ++x)
            {
                newInsertPeakParameters.Add(newInsertPeakscmd.CreateParameter());
                newInsertPeakscmd.Parameters.Add(newInsertPeakParameters[x]);
            }
            newInsertPeakscmd.Prepare();
            for (var y = 0; y < peaklist.Count; y++)
            {
                newInsertPeakParameters[0].Value = peaklist[y][0];
                newInsertPeakParameters[1].Value = peaklist[y][1];
                newInsertPeakParameters[2].Value = peaklist[y][2];
                newInsertPeakscmd.ExecuteNonQuery();
            }
            newTransaction.Commit();

            bg.ReportProgress(-1, string.Format("Indexing peaks for {0} spectra ... ", spectrumRowsCount));
            _session.CreateSQLQuery("Create index if not exists SpecraPeakIndex on SpectralPeaks (spectra, mass)")
                   .ExecuteUpdate();

            var peptideList = _session.QueryOver<Peptide>().List();
            var acceptedSpectra = new HashSet<long>();
            var spectraInfo = new Dictionary<long, object[]>();

            var insertDecoyCmd = _session.Connection.CreateCommand();
            insertDecoyCmd.CommandText =
                "INSERT INTO SpectralPeaks (spectra, mz, intensity,mods,charge,mass,preAA,postAA,sequence,protein,origPeptide, annotations, numSpectraUsed) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)";
            var insertDecoyParameters = new List<System.Data.IDbDataParameter>();
            for (int x = 0; x < 13; ++x)
            {
                insertDecoyParameters.Add(insertDecoyCmd.CreateParameter());
                insertDecoyCmd.Parameters.Add(insertDecoyParameters[x]);
            }
            insertDecoyCmd.Prepare();
            var annotations = new Dictionary<long, string>();
            _successfulAdds = 0;
            _retrievalFails = 0;
            _minSpectraFails = 0;
            _overlapFails = 0;
            _decoyFails = 0;

            for (var peptideNum = 0; peptideNum < peptideList.Count; peptideNum++)
            {
                if ((peptideNum + 1)%10 == 0)
                    bg.ReportProgress((int) (((peptideNum + 1)/(double) peptideList.Count)*100),
                                      string.Format("Trimming spectra for peptide {0}/{1}", peptideNum + 1,
                                                    peptideList.Count));

                int spectraInPeptide = 0;
                var peptide = peptideList[peptideNum];
                var matchSet = new Dictionary<string, List<PeptideSpectrumMatch>>();
                //TODO: Find mod variations
                foreach (var match in peptide.Matches)
                {
                    if (match.Spectrum.Id == null)
                        continue;

                    string peptideName = insertModsInPeptideString(peptide.Sequence, match.Modifications);
                    if (!matchSet.ContainsKey(peptideName))
                        matchSet.Add(peptideName, new List<PeptideSpectrumMatch>());
                    matchSet[peptideName].Add(match);
                    if (match.Spectrum.Id != null)
                        spectraInPeptide++;
                }
                if (spectraInPeptide < _minPerPeptide)
                    continue;

                    foreach (var distinctMatch in matchSet)
                {
                    //get list of spectra in peptide
                    var chargedSpectraList = new Dictionary<int, List<long>>();
                    var chargedScoreKeeper = new Dictionary<int,Dictionary<long, List<double>>>();
                    var charges = new HashSet<int>();
                    foreach (var psm in distinctMatch.Value.Where(x => x.Spectrum.Id != null))
                    {
                        if (psm.Spectrum.Id == null)
                            continue;
                        var charge = psm.Charge;
                        charges.Add(charge);
                        if (!chargedSpectraList.ContainsKey(charge))
                        {
                            chargedSpectraList.Add(charge, new List<long>());
                            chargedScoreKeeper.Add(charge, new Dictionary<long, List<double>>());
                        }
                        if (!chargedSpectraList[charge].Contains(psm.Spectrum.Id ?? -1))
                        {
                            chargedSpectraList[charge].Add(psm.Spectrum.Id ?? -1);
                            chargedScoreKeeper[charge].Add(psm.Spectrum.Id ?? -1, new List<double>());
                        }
                    }
                    var chargesToRemove = new HashSet<int>();
                    foreach (var charge in charges)
                    {
                        if (chargedSpectraList[charge].Count < _libraryExportSettings.minimumSpectra)
                        {
                            _minSpectraFails++;
                            chargesToRemove.Add(charge);
                        }
                        if (chargedSpectraList[charge].Count(x => !acceptedSpectra.Contains(x)) == 0)
                        {
                            _overlapFails++;
                            chargesToRemove.Add(charge);
                        }
                    }
                    foreach (var charge in chargesToRemove)
                        charges.Remove(charge);
                    if (charges.Count <1)
                        continue;

                    foreach (var charge in charges)
                    {
                        long bestSpectra = -100;
                        double bestScore = -100;
                        var spectraList = chargedSpectraList[charge];

                        //get list of spectra with similar precursor mass
                        var extraList = new List<long>();
                        if (_libraryExportSettings.crossPeptide)
                        {
                            var minValue = distinctMatch.Value[0].ObservedNeutralMass -
                                            _libraryExportSettings.precursorMzTolerance;
                            var maxValue = distinctMatch.Value[0].ObservedNeutralMass +
                                            _libraryExportSettings.precursorMzTolerance;

                            var extraListObj = _session.CreateSQLQuery(string.Format(
                                "SELECT spectra FROM SpectralPeaks WHERE spectra NOT IN ({0}) AND mass > {1} AND Mass < {2} AND spectra > 0",
                                string.Join(",", spectraList), minValue, maxValue)).List<object>();
                            foreach (var item in extraListObj)
                                extraList.Add((long) item);
                        }

                        //get spectra peaks
                        var peakList = _session.CreateSQLQuery(string.Format(
                            "SELECT spectra, mz, intensity FROM SpectralPeaks WHERE spectra IN ({0})",
                            string.Join(",", spectraList.Concat(extraList)))).List<object[]>();
                        if (peakList.Count < _libraryExportSettings.minimumSpectra)
                            continue;

                        //for some reason not all spectra have peaks available
                        for (var x = spectraList.Count - 1; x >= 0; x--)
                        {
                            var found = false;
                            foreach (var peak in peakList)
                                if ((long) peak[0] == spectraList[x])
                                    found = true;
                            if (!found)
                            {
                                _retrievalFails++;
                                spectraList.RemoveAt(x);
                            }
                        }

                        var peakInfo = new Dictionary<long, Peaks>();
                        foreach (var entry in peakList)
                        {
                            var mzValues = entry[1].ToString().Split('|').Select(double.Parse).ToList();
                            var intensityValues = entry[2].ToString().Split('|').Select(double.Parse).ToList();
                            peakInfo.Add((long) entry[0], new Peaks(mzValues, intensityValues));
                        }

                        if (_libraryExportSettings.method == LibraryExportOptions.DOT_PRODUCT_METHOD)
                        {
                            var scoreKeeper = chargedScoreKeeper[charge];
                            spectraList = spectraList.Where(x => !acceptedSpectra.Contains(x)).ToList();
                            if (spectraList.Count == 1)
                                bestSpectra = spectraList.First();
                            else if (spectraList.Count == 2)
                            {
                                if (peakInfo[spectraList[0]].OriginalIntensities.Average() >
                                    peakInfo[spectraList[1]].OriginalIntensities.Average())
                                    bestSpectra = spectraList[0];
                                else
                                    bestSpectra = spectraList[1];
                            }
                            else
                            {
                                //compare spectra in peptide
                                for (var x = 0; x < spectraList.Count; x++)
                                {
                                    for (var y = x + 1; y < spectraList.Count; y++)
                                    {
                                        var similarityScore =
                                            ClusteringAnalysis.DotProductCompareTo(peakInfo[spectraList[x]],
                                                                                    peakInfo[spectraList[y]],
                                                                                    _libraryExportSettings
                                                                                        .fragmentMzTolerance);
                                        scoreKeeper[spectraList[x]].Add(similarityScore);
                                        scoreKeeper[spectraList[y]].Add(similarityScore);
                                    }

                                    if (_libraryExportSettings.crossPeptide)
                                    {
                                        for (var y = 0; y < extraList.Count; y++)
                                        {
                                            var similarityScore =
                                                ClusteringAnalysis.DotProductCompareTo(peakInfo[spectraList[x]],
                                                                                        peakInfo[extraList[y]],
                                                                                        _libraryExportSettings
                                                                                            .fragmentMzTolerance);
                                            scoreKeeper[spectraList[x]].Add(similarityScore);
                                        }
                                    }
                                }

                                foreach (var spectra in spectraList)
                                {
                                    var avg = scoreKeeper[spectra].Average();
                                    if (avg > bestScore)
                                    {
                                        bestScore = avg;
                                        bestSpectra = spectra;
                                    }
                                }
                            }
                        }

                        if (bestSpectra < 0)
                            continue;
                        var modList = new List<string>();
                        foreach (var mod in distinctMatch.Value[0].Modifications.OrderBy(x=>x.Offset))
                        {
                            var closestNumber = (int)Math.Round(mod.Modification.MonoMassDelta);
                            if (sptxtMods.ContainsKey(closestNumber))
                                modList.Add(string.Format("{0},{1},{2}", mod.Offset < 0 ? -1 : mod.Offset, mod.Site, sptxtMods[closestNumber]));
                        }
                        var modstring = modList.Count + (modList.Any()
                                                                    ? "/" + string.Join("/", modList)
                                                                    : string.Empty);
                        var mass = Math.Round(distinctMatch.Value[0].ObservedNeutralMass, 4);
                        var proteinNames = new List<string>();
                        var preAA = "X";
                        var postAA = "X";
                        foreach (var instance in peptide.Instances)
                        {
                            try
                            {
                                proteinNames.Add(instance.Protein.Accession);
                                if (instance.Protein.Sequence == null)
                                {
                                    preAA = "X";
                                    postAA = "X";
                                }
                                else
                                {
                                    preAA = instance.Offset > 0
                                                ? instance.Protein.Sequence[instance.Offset - 1].ToString()
                                                : "X";
                                    postAA = instance.Offset + instance.Length < instance.Protein.Sequence.Length
                                                    ? instance.Protein.Sequence[instance.Offset + instance.Length]
                                                        .ToString()
                                                    : "X";
                                }
                            }
                            catch (Exception)
                            {
                                preAA = "X";
                                postAA = "X";
                            }
                        }

                        //set up annotations
                        string peptideName = distinctMatch.Key;
                        var proteinString = proteinNames.Count + "/" + string.Join(",1/", proteinNames) +
                                            (proteinNames.Count > 1 ? ",1" : string.Empty);
                        var modDict = new Dictionary<int, double>();
                        foreach (var mod in distinctMatch.Value[0].Modifications)
                            modDict.Add(mod.Offset, mod.Modification.MonoMassDelta);
                        var splitPeptide = peptide.Sequence.Select(letter => letter.ToString()).ToList();
                        var FragmentList = CreateFragmentMassReference(splitPeptide, modDict);
                        Dictionary<double, FragmentPeakInfo> annotatedList =
                            AnnotatePeaks(peakInfo[bestSpectra].OriginalMZs.ToList().OrderBy(x => x).ToList(),
                                            FragmentList, charge);
                        if (!annotations.ContainsKey(bestSpectra))
                        {
                            var orderedMZs = peakInfo[bestSpectra].OriginalMZs.OrderBy(x => x).ToList();
                            var tempList = new List<string>();
                            foreach (var mz in orderedMZs)
                            {
                                var tempstring = annotatedList[mz].fragmentID;
                                var closestModValue = Math.Round(annotatedList[mz].relativePosition);
                                if (closestModValue < 0)
                                    tempstring += closestModValue.ToString();
                                else if (closestModValue > 0)
                                    tempstring += "+" + closestModValue.ToString();
                                if (annotatedList[mz].fragmentCharge > 1)
                                    tempstring += "^" + annotatedList[mz].fragmentCharge;
                                tempList.Add(tempstring);
                            }
                            annotations.Add(bestSpectra, string.Join("|", tempList));
                        }

                        //add decoys
                        if (_libraryExportSettings.decoys)
                        {
                            string decoyPeptideName = createDecoyPeptideString(peptide.Sequence,
                                                                                distinctMatch.Value[0]
                                                                                    .Modifications);
                            string annotatedDecoyPeptideName = insertModsInPeptideString(decoyPeptideName,
                                                                                            distinctMatch.Value[0]
                                                                                                .Modifications);
                            if (decoyPeptideName == string.Empty ||
                                peptideName.Length != annotatedDecoyPeptideName.Length)
                            {
                                _decoyFails++;
                                continue;
                            }
                            object[] decoyPeaks = createDecoyPeaks(peptide.Sequence, decoyPeptideName,
                                                                    annotatedList,
                                                                    peakInfo[bestSpectra].OriginalMZs.ToList(),
                                                                    peakInfo[bestSpectra].OriginalIntensities
                                                                                        .ToList(),
                                                                    modDict, charge);


                            var decoyProteinString = proteinNames.Count + "/DECOY_" +
                                                        string.Join(",1/DECOY_", proteinNames) +
                                                        (proteinNames.Count > 1 ? ",1" : string.Empty);
                            var spectraAnnotations = string.Join("|", (List<string>) decoyPeaks[2]);

                            insertDecoyParameters[0].Value = -bestSpectra;
                            insertDecoyParameters[1].Value = string.Join("|", (List<double>) decoyPeaks[0]);
                            insertDecoyParameters[2].Value = string.Join("|", (List<double>) decoyPeaks[1]);
                            insertDecoyParameters[3].Value = modstring;
                            insertDecoyParameters[4].Value = charge;
                            insertDecoyParameters[5].Value = mass;
                            insertDecoyParameters[6].Value = preAA;
                            insertDecoyParameters[7].Value = postAA;
                            insertDecoyParameters[8].Value = annotatedDecoyPeptideName;
                            insertDecoyParameters[9].Value = decoyProteinString;
                            insertDecoyParameters[10].Value = preAA + "." + peptideName + "." + postAA;
                            insertDecoyParameters[11].Value = spectraAnnotations;
                            insertDecoyParameters[12].Value = spectraList.Count;
                            insertDecoyCmd.ExecuteNonQuery();
                        }

                        spectraInfo.Add(bestSpectra,
                                        new object[]
                                            {charge, mass, modstring, preAA, postAA, peptideName, proteinString,spectraList.Count});

                        acceptedSpectra.Add(bestSpectra);
                        _successfulAdds++;
                    }
                }
            }
            _session.CreateSQLQuery(string.Format("DELETE FROM SpectralPeaks WHERE spectra NOT IN ({0}) and spectra > 0",
                                                 string.Join(",", acceptedSpectra))).ExecuteUpdate();

            bg.ReportProgress(-1, "Adding detailed peak data... ");
            var insertPeakInfocmd = _session.Connection.CreateCommand();
            insertPeakInfocmd.CommandText =
                "UPDATE SpectralPeaks SET charge=?, mass=?, mods=?, preAA=?, postAA=?, sequence=?, protein=?, numSpectraUsed=?, annotations=? where spectra=?";
            var insertPeakInfoParameters = new List<System.Data.IDbDataParameter>();
            for (int x = 0; x < 10; ++x)
            {
                insertPeakInfoParameters.Add(insertPeakInfocmd.CreateParameter());
                insertPeakInfocmd.Parameters.Add(insertPeakInfoParameters[x]);
            }
            insertPeakInfocmd.Prepare();
            var addInfo = _session.BeginTransaction();
            addInfo.Begin();
            foreach (var item in acceptedSpectra)
            {
                insertPeakInfoParameters[0].Value = spectraInfo[item][0];
                insertPeakInfoParameters[1].Value = spectraInfo[item][1];
                insertPeakInfoParameters[2].Value = spectraInfo[item][2];
                insertPeakInfoParameters[3].Value = spectraInfo[item][3];
                insertPeakInfoParameters[4].Value = spectraInfo[item][4];
                insertPeakInfoParameters[5].Value = spectraInfo[item][5];
                insertPeakInfoParameters[6].Value = spectraInfo[item][6];
                insertPeakInfoParameters[7].Value = spectraInfo[item][7];
                insertPeakInfoParameters[8].Value = annotations[item];
                insertPeakInfoParameters[9].Value = item;
                insertPeakInfocmd.ExecuteNonQuery();
            }

            addInfo.Commit();
            try
            {
                bg.ReportProgress(-1, "Compressing database... ");
                _session.CreateSQLQuery("VACUUM").ExecuteUpdate();
            }
            catch (Exception e)
            {
                MessageBox.Show("Could not compress database");
            }

            bg.ReportProgress(-1, string.Format("Exporting to {0}... ", _exportLocation));

            ExportLibrary(_exportLocation);
        }
Exemple #5
0
        public static Spectra Load(pwiz.CLI.msdata.MSDataFile msFile, DBOptions options, string filePath, bool loadMS = true, bool filterMS2 = true)
        {
            //Find file name in msFile;
            string  mzMlFilepath = filePath;
            int     num_spectra  = msFile.run.spectrumList.size();
            Spectra spectra      = new Spectra(num_spectra);
            //List<Trail> trails = new List<Trail>();
            MS1Spectrum previousMS1 = null;

            try
            {
                //TODO DONT forget to remove the limiter
                //int maxNbMSMS = 10;
                double LastMs1InjectionTime = 0;
                for (int i = 0; i < num_spectra /* && i < 200*/; i++)//TODO Fix that later!
                {
                    //Spectrum
                    pwiz.CLI.msdata.Spectrum spec = msFile.run.spectrumList.spectrum(i, true);

                    if (spec.precursors.Count > 0 || spec.cvParam(pwiz.CLI.cv.CVID.MS_ms_level).value > 1)//is an MSMS
                    {
                        double retention_time = spec.scanList.scans[0].cvParam(pwiz.CLI.cv.CVID.MS_scan_start_time).timeInSeconds() / 60.0;

                        //List precursors and their intensities
                        double precursor_mz         = 0;//Is there a value for the time a scan took to complete?
                        int    charge               = 2;
                        double precursor_intensity  = 0;
                        string fragmentation_method = "unknown";
                        double isolationWindow      = 1.0;
                        double injectionTime        = spec.scanList.scans[0].cvParam(pwiz.CLI.cv.CVID.MS_ion_injection_time).value;
                        foreach (pwiz.CLI.msdata.Precursor precursor in spec.precursors)
                        {
                            fragmentation_method = precursor.activation.cvParams[0].name;
                            if (precursor.isolationWindow.cvParams.Count > 2 && (double)precursor.isolationWindow.cvParams[1].value == (double)precursor.isolationWindow.cvParams[2].value)
                            {
                                isolationWindow = precursor.isolationWindow.cvParams[1].value;
                            }
                            else if (precursor.isolationWindow.cvParams.Count > 2)
                            {
                                options.ConSole.WriteLine("Weird Isolation Window");
                            }

                            foreach (pwiz.CLI.msdata.SelectedIon ion in precursor.selectedIons)
                            {
                                //Cycle through MS to get real precursor intensities
                                precursor_mz = ion.cvParams[0].value;
                                if (ion.cvParams.Count > 1)
                                {
                                    charge = (int)ion.cvParams[1].value;
                                }
                                //else
                                //    dbOptions.ConSole.WriteLine("No charge computed for precursor ");
                                if (ion.cvParams.Count > 2)
                                {
                                    precursor_intensity = ion.cvParams[2].value;
                                }
                            }
                        }

                        int scan_index  = i;
                        int scan_number = scan_index + 1;

                        pwiz.CLI.msdata.BinaryDataArray mz        = spec.getMZArray();
                        pwiz.CLI.msdata.BinaryDataArray intensity = spec.getIntensityArray();

                        int num_peaks = mz.data.Count;
                        if (num_peaks != intensity.data.Count)
                        {
                            options.ConSole.WriteLine("PreoteWizard reports peaks arrays (mz/intensity) of different sizes : (" + num_peaks + "/" + intensity.data.Count + ")");
                            if (intensity.data.Count < num_peaks)
                            {
                                num_peaks = intensity.data.Count;
                            }
                        }
                        GraphML_List <MsMsPeak> peaks = new GraphML_List <MsMsPeak>(num_peaks);
                        for (int k = 0; k < num_peaks; k++)
                        {
                            if (intensity.data[k] > 0)
                            {
                                MsMsPeak peak = new MsMsPeak(mz.data[k], intensity.data[k], 0);
                                peaks.Add(peak);
                            }
                        }
                        mz.Dispose(); mz = null;
                        intensity.Dispose(); intensity = null;

                        peaks.Sort(MsMsPeak.AscendingMzComparison);

                        if (filterMS2)
                        {
                            //peaks = AssignChargeStates(peaks, options.maximumAssumedPrecursorChargeState, options.precursorMassTolerance);
                            //peaks = Deisotopebkp(peaks, options.maximumAssumedPrecursorChargeState, options.precursorMassTolerance);
                            peaks = AssignChargeStatesAndDeisotope(peaks, options.MaximumPrecursorChargeState, new MassTolerance(options.productMassTolerance.Value * 0.5, options.productMassTolerance.Units));
                            peaks = FilterPeaks(peaks, options.MaximumNumberOfFragmentsPerSpectrum);

                            //TODO Add Contaminant removal
                            //peaks = ContaminantMasses.RemoveContaminantsFromMzSortedList(peaks, options.productMassTolerance);

                            //Can sometime be sorted by intensity after this call
                            //peaks = FilterPeaksV2(peaks);
                            peaks.Sort(MsMsPeak.AscendingMzComparison);
                        }

                        /*//TODO Validate that in most cases, next steps can calculate missing charge
                         * if (charge == 0)
                         * {
                         *  for (int c = options.minimumAssumedPrecursorChargeState; c <= options.maximumAssumedPrecursorChargeState; c++)
                         *  {
                         *      if (options.assignChargeStates)
                         *      {
                         *          peaks = AssignChargeStates(peaks, c, options.productMassTolerance);
                         *          if (options.deisotope)
                         *          {
                         *              peaks = Deisotope(peaks, c, options.productMassTolerance);
                         *          }
                         *      }
                         *
                         *      double precursor_mass = Utilities.MassFromMZ(precursor_mz, c);
                         *
                         *      ProductSpectrum spectrum = new ProductSpectrum(mzMlFilepath, scan_number, retention_time, fragmentation_method, precursor_mz, precursor_intensity, c, precursor_mass, peaks);
                         *      spectra.Add(spectrum);
                         *  }
                         * }
                         * else//*/
                        {/*
                          * if (options.assignChargeStates)
                          * {
                          * peaks = AssignChargeStatesbkp(peaks, charge, options.productMassTolerance);
                          * if (options.deisotope)
                          * {
                          *     peaks = Deisotopebkp(peaks, charge, options.productMassTolerance);
                          * }
                          * }//*/
                         //peaks = AssignChargeStatesAndDeisotope(peaks, options.maximumAssumedPrecursorChargeState, options.productMassTolerance);

                            double precursor_mass = Numerics.MassFromMZ(precursor_mz, charge);

                            ProductSpectrum spectrum = new ProductSpectrum(scan_number, retention_time, fragmentation_method, precursor_mz, precursor_intensity, charge, precursor_mass, peaks, isolationWindow, injectionTime, LastMs1InjectionTime);
                            spectra.AddMSMS(spectrum);
                            //zones.Add(new Zone(precursor_mz - isolationWindow, precursor_mz + isolationWindow, retention_time));
                        }

                        //if (spectra.Count >= maxNbMSMS)
                        //    i = 10000000;
                    }
                    else //Is an MS
                    {
                        LastMs1InjectionTime = spec.scanList.scans[0].cvParam(pwiz.CLI.cv.CVID.MS_ion_injection_time).value;
                        if (loadMS)
                        {
                            double retention_time = spec.scanList.scans[0].cvParam(pwiz.CLI.cv.CVID.MS_scan_start_time).timeInSeconds() / 60.0;

                            pwiz.CLI.msdata.BinaryDataArray mz        = spec.getMZArray();
                            pwiz.CLI.msdata.BinaryDataArray intensity = spec.getIntensityArray();

                            if (previousMS1 != null)
                            {
                                previousMS1.ScanDuration = retention_time - previousMS1.RetentionTimeInMin;
                                spectra.MS1s.Add(previousMS1);
                            }
                            previousMS1 = new MS1Spectrum(i, retention_time, intensity.data, mz.data, 1);
                            //Trail.Follow(mz.data, intensity.data, retention_time, ref trails, options);
                            //Trail.RemoveFinished(ref trails, spectra, 1);
                        }
                    }
                    spec.Dispose(); spec = null;
                    Console.Write("\r{0}%   ", ((100 * i) / num_spectra));
                }
                if (previousMS1 != null)
                {
                    spectra.MS1s.Add(previousMS1);
                }

                /*
                 * //Optimization of Track following parameters
                 * long nbChargedTracks = 0;
                 * for(int missingScans = 1; missingScans < 5; missingScans++)
                 * {
                 *  for(int centroid = 1; centroid < 5; centroid++)
                 *  {
                 *      for(int minPeaks = 1; minPeaks < 7; minPeaks++)
                 *      {
                 *          for(double valleyFactor = 0.1; valleyFactor < 4; valleyFactor += 0.3)
                 *          {
                 *              //weightedMean
                 *              Tracks tracks = ComputeSpectraTracks(spectra, options, mzMlFilepath, missingScans, centroid, minPeaks, valleyFactor, MaxQuant.CentroidPosition.weightedMean);
                 *              tracks.Sort(Tracks.AscendingPrecursorMassComparison);
                 *              long cumulIsotopes = 0;
                 *              foreach (stTrack track in tracks)
                 *                  cumulIsotopes += Queries.GetIsotopes(track, options, tracks, sample).Count;
                 *              if (cumulIsotopes > nbChargedTracks)
                 *              {
                 *                  nbChargedTracks = cumulIsotopes;
                 *                  dbOptions.ConSole.WriteLine(missingScans + "," + centroid + "," + minPeaks + "," + valleyFactor + ",weightedMean");
                 *              }
                 *
                 *              //Gaussian
                 *              tracks = ComputeSpectraTracks(spectra, options, mzMlFilepath, missingScans, centroid, minPeaks, valleyFactor, MaxQuant.CentroidPosition.gaussian);
                 *              tracks.Sort(Tracks.AscendingPrecursorMassComparison);
                 *              cumulIsotopes = 0;
                 *              foreach (stTrack track in tracks)
                 *                  cumulIsotopes += Queries.GetIsotopes(track, options, tracks, sample).Count;
                 *              if (cumulIsotopes > nbChargedTracks)
                 *              {
                 *                  nbChargedTracks = cumulIsotopes;
                 *                  dbOptions.ConSole.WriteLine(missingScans + "," + centroid + "," + minPeaks + "," + valleyFactor + ",Gaussian");
                 *              }
                 *          }
                 *      }
                 *  }
                 * }//*/

                if (spectra.MS1s.Count > 0)
                {
                    spectra.tracks = ComputeSpectraTracks(spectra, options, mzMlFilepath, 3, 1, 3, 1.7, MaxQuant.CentroidPosition.weightedMean);
                }
                else
                {
                    spectra.tracks = new Tracks();
                }
                spectra.tracks.Sort(Tracks.AscendingPrecursorMassComparison);
                Console.Write("\r{0}%   ", 100);

                //ContaminantMasses.DisplayContaminants();
            }
            catch (Exception ex)
            {
                options.ConSole.WriteLine(ex.StackTrace);
                options.ConSole.WriteLine(ex.Message);
            }
            return(spectra);
        }