Esempio n. 1
0
        public void TestPathUtils()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1.raw";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            Console.WriteLine(MassSpecDataReaderFactory.RemoveExtension(rawFilePath) + "_Target.tsv");
            Console.WriteLine(Path.GetDirectoryName(rawFilePath));
            Console.WriteLine(Path.Combine(Path.GetDirectoryName(rawFilePath), Path.GetFileNameWithoutExtension(rawFilePath) + "_IcTarget.tsv"));

            var outputDir = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\L1_1_Mode2\Synocho_L1_1_IcTarget.tsv";

            if (!Directory.Exists(outputDir))
            {
                if (!File.GetAttributes(outputDir).HasFlag(FileAttributes.Directory))
                {
                    throw new Exception(outputDir + " is not a directory!");
                }
                Directory.CreateDirectory(outputDir);
            }
            Console.WriteLine(outputDir);
        }
        /// <summary>
        /// Process the given file or folder
        /// </summary>
        /// <returns>0 if success, otherwise an error code</returns>
        /// <remarks>Folder could either be a supported mass spec data folder, or a normal directory with several supported data files</remarks>
        public int Run()
        {
            // Normalize the input path. Only affects paths to a file/folder in a folder-type dataset
            Parameters.InputPath = MassSpecDataReaderFactory.NormalizeDatasetPath(Parameters.InputPath);

            var attr = File.GetAttributes(Parameters.InputPath);
            int errorCode;

            if ((attr & FileAttributes.Directory) == FileAttributes.Directory &&
                !MassSpecDataReaderFactory.IsADirectoryDataset(Parameters.InputPath))
            {
                errorCode = ProcessDirectory(Parameters.InputPath);
            }
            else
            {
                if (!MsRawFile(Parameters.InputPath) && !MsPbfFile(Parameters.InputPath))
                {
                    ShowErrorMessage(@"File extension not supported, " + Parameters.InputPath);
                    return(-1);
                }

                errorCode = ProcessFile(Parameters.InputPath);
            }

            return(errorCode);
        }
Esempio n. 3
0
        public bool Validate()
        {
            // Check for folder-type datasets, and replace specFilePath with the directory name if it is.
            SourcePath = MassSpecDataReaderFactory.GetDatasetName(SourcePath);

            var isDirectoryDataset = MassSpecDataReaderFactory.IsADirectoryDataset(SourcePath);
            // True if specFilePath is a directory that is NOT a supported folder-type dataset.
            var specPathIsDirectory = Directory.Exists(SourcePath) && !isDirectoryDataset;

            if (!File.Exists(SourcePath) && !specPathIsDirectory && !isDirectoryDataset)
            {
                PrintError("File not found: " + SourcePath);
                return(false);
            }

            var types = MassSpecDataReaderFactory.MassSpecDataTypeFilterList;

            types.Remove(".pbf");

            if (!specPathIsDirectory && !(types.Select(ext => SourcePath.ToLower().EndsWith(ext)).Any()))
            {
                PrintError("Invalid file extension: (" + Path.GetExtension(SourcePath) + ") " + SourcePath);
                return(false);
            }

            if (string.IsNullOrWhiteSpace(OutputDir))
            {
                // Must use "Path.GetFullPath" to return the absolute path when the source file is in the working directory
                // But, it could cause problems with too-long paths.
                OutputDir = specPathIsDirectory ? SourcePath : Path.GetDirectoryName(Path.GetFullPath(SourcePath));
            }

            if (string.IsNullOrWhiteSpace(OutputDir))
            {
                PrintError("Invalid output file directory: " + OutputDir);
                return(false);
            }

            if (EndScan < StartScan && EndScan > -1)
            {
                PrintError($"End scan ({EndScan}) must not be less than start scan ({StartScan})!");
                return(false);
            }

            if (!Directory.Exists(OutputDir))
            {
                if (File.Exists(OutputDir) && !File.GetAttributes(OutputDir).HasFlag(FileAttributes.Directory))
                {
                    PrintError("OutputDir \"" + OutputDir + "\" is not a directory!");
                    return(false);
                }
                Directory.CreateDirectory(OutputDir);
            }

            return(true);
        }
Esempio n. 4
0
        public void TestTagBasedSearch()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

//            const string rawFilePath = @"H:\Research\Lewy\raw\Lewy_intact_01.raw";
//            const string rawFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw";
//            const string rawFilePath = @"H:\Research\Yufeng\TopDownYufeng\raw\yufeng_column_test2.raw";
//            const string rawFilePath = @"H:\Research\Weijun_TopDown\raw\UC4_Intact_plasmaTest_90_6May15_Bane_14-09-01RZ.raw";
//            const string rawFilePath = @"H:\Research\Charles\TopDown\raw\SBEP_STM_001_02272012_Aragon.raw";
            const string rawFilePath = @"D:\MassSpecFiles\60k\Yufeng_SampleTest1_150614113438.pbf";

            //const string rawFilePath = @"D:\MassSpecFiles\60k\NCR_50K_Test_24Jun15_Bane_15-02-02RZ.pbf";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = PbfLcMsRun.GetLcMsRun(rawFilePath);

            //const int minTagLength = 5;
            var tagFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFilePath, ".seqtag");
            //var tagParser = new SequenceTagParser(tagFilePath, minTagLength, 100);

            const string fastaFilePath = @"D:\MassSpecFiles\60k\ID_003836_DA9CC1E4.fasta";

            //const string fastaFilePath = @"D:\MassSpecFiles\60k\ID_004973_9BA6912F.fasta";
            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb = new FastaDatabase(fastaFilePath);

            var tolerance = new Tolerance(10);
//            var modsFilePath = @"H:\Research\QCShew_TopDown\Production\Mods_Methyl.txt";
            var modsFilePath = @"D:\MassSpecFiles\60k\Mods.txt";

//            var modsFilePath = "";

            if (!File.Exists(modsFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, modsFilePath);
            }

            var aaSet = new AminoAcidSet(modsFilePath);

            TestTagBasedSearch(run, fastaDb, tolerance, aaSet);
        }
Esempio n. 5
0
        public void TestTagBasedSearchCompRef()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string dataSetPath   = @"D:\MassSpecFiles\CompRef";
            const string fastaFilePath = @"D:\MassSpecFiles\CompRef\ID_003278_4B4B3CB1.fasta";
            const string modsFilePath  = @"D:\MassSpecFiles\CompRef\Mods.txt";

            if (!Directory.Exists(dataSetPath))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, dataSetPath);
            }
            if (!File.Exists(modsFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, modsFilePath);
            }
            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fileEntries = Directory.GetFiles(dataSetPath);

            var dataset = (from fileName in fileEntries where fileName.EndsWith("pbf") select Path.GetFileNameWithoutExtension(fileName)).ToList();

            dataset.Sort();


            var fastaDb   = new FastaDatabase(fastaFilePath);
            var tolerance = new Tolerance(10);
            var aaSet     = new AminoAcidSet(modsFilePath);

            for (var i = 0; i < dataset.Count; i++)
            {
                var rawFile     = string.Format(@"{0}\{1}.pbf", dataSetPath, dataset[i]);
                var ms1File     = string.Format(@"{0}\{1}.ms1ft", dataSetPath, dataset[i]);
                var tagFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFile, ".seqtag");

                var       run          = PbfLcMsRun.GetLcMsRun(rawFile);
                const int minTagLength = 5;
                //var tagParser = new SequenceTagParser(tagFilePath, minTagLength, 100);

                Console.WriteLine("-----------------{0}--------------------", rawFile);

                TestTagBasedSearch(run, fastaDb, tolerance, aaSet);

                Console.WriteLine("-----------------------------------------------------------------------");
            }
        }
Esempio n. 6
0
        /// <summary>
        /// Initialize this data set by reading the raw file asynchronously and initializing child view models
        /// </summary>
        /// <param name="filePath">The raw File Path.</param>
        /// <returns>
        /// The <see cref="Task"/>.
        /// </returns>
        public async Task InitializeAsync(string filePath)
        {
            filePath         = MassSpecDataReaderFactory.NormalizeDatasetPath(filePath);
            this.IsLoading   = true; // Show animated loading screen
            this.Title       = Path.GetFileNameWithoutExtension(Path.GetFileNameWithoutExtension(filePath));
            this.rawFilePath = filePath;

            this.LoadProgressPercent = 0.0;
            this.LoadProgressStatus  = "Loading...";
            var progress = new Progress <ProgressData>(progressData =>
            {
                progressData.UpdateFrequencySeconds = 2;
                if (progressData.ShouldUpdate())
                {
                    this.LoadProgressPercent = progressData.Percent;
                    this.LoadProgressStatus  = progressData.Status;
                }
            });

            // load raw file
            this.LcMs = await Task.Run(() => PbfLcMsRun.GetLcMsRun(filePath, 0, 0, progress));

            // Now that we have an LcMsRun, initialize viewmodels that require it
            this.XicViewModel        = new XicViewModel(this.dialogService, this.LcMs);
            this.SpectrumViewModel   = new SpectrumViewModel(this.dialogService, this.LcMs);
            this.FeatureMapViewModel = new FeatureViewerViewModel((LcMsRun)this.LcMs, this.dialogService);

            // When the selected scan changes in the xic plots, the selected scan for the prsm should update
            this.XicViewModel.SelectedScanUpdated().Subscribe(scan => this.SelectedPrSm.Scan = scan);

            // When an ID is selected on FeatureMap, update selectedPrSm
            this.FeatureMapViewModel.FeatureMapViewModel.WhenAnyValue(x => x.SelectedPrSm).Where(prsm => prsm != null).Subscribe(prsm => this.SelectedPrSm = prsm);

            // Create prsms for scan numbers (unidentified)
            await this.LoadScans();

            ////await this.ScanViewModel.ToggleShowInstrumentDataAsync(IcParameters.Instance.ShowInstrumentData, (PbfLcMsRun)this.LcMs);
            this.SelectedPrSm.LcMs = this.LcMs; // For the selected PrSm, we should always use the LcMsRun for this dataset.

            this.IsLoading = false;             // Hide animated loading screen
        }
Esempio n. 7
0
        public void TestTagMatchingSingleSpec()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3";
            const int    scanNum = 4533;

            // Parse sequence tags
            var          tagFileName   = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".seqtag");
            const int    minTagLength  = 8;
            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta";

            if (!File.Exists(tagFileName))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFileName);
            }

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb      = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);
            var tagParser    = new SequenceTagParser(tagFileName, minTagLength);

            var tags = tagParser.GetSequenceTags(scanNum);

            foreach (var tag in tags)
            {
                var matchedProteins = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence)
                                      .Select(index => fastaDb.GetProteinName(index)).ToArray();
                if (matchedProteins.Any())
                {
                    Console.WriteLine("{0}\t{1}\t{2}\t{3}", tag.Sequence, tag.IsPrefix, tag.FlankingMass, string.Join("\t", matchedProteins));
                }
            }
        }
Esempio n. 8
0
        public void TestTagBasedSearchForLewy()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const string rawFilePath = @"D:\MassSpecFiles\Lewy\Lewy_AT_AD1_21May15_Bane_14-09-01RZ.pbf";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = PbfLcMsRun.GetLcMsRun(rawFilePath);

            //const int minTagLength = 4;
            var tagFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFilePath, ".seqtag");
            //var tagParser = new SequenceTagParser(tagFilePath, minTagLength, 10000);

            const string fastaFilePath = @"D:\MassSpecFiles\Lewy\a4_human.fasta";

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb = new FastaDatabase(fastaFilePath);

            var tolerance    = new Tolerance(10);
            var modsFilePath = @"D:\MassSpecFiles\Lewy\Mods.txt";

            if (!File.Exists(modsFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, modsFilePath);
            }

            var aaSet = new AminoAcidSet(modsFilePath);

            TestTagBasedSearch(run, fastaDb, tolerance, aaSet);
        }
        /// <summary>
        /// Create a PNG image of previously found MS1 features
        /// </summary>
        /// <param name="pbfFilePath">.pbf file path</param>
        /// <param name="ms1FeaturesFilePath">.ms1ft file path</param>
        /// <returns>0 if success, otherwise an error code</returns>
        /// <remarks>
        /// If ms1FeaturesFilePath is an empty string, it is auto-determined based on the .pbf file name
        /// Mass range is determined using Parameters.MinSearchMass and Parameters.MaxSearchMass
        /// </remarks>
        public int CreateFeatureMapImage(string pbfFilePath, string ms1FeaturesFilePath)
        {
            if (!File.Exists(pbfFilePath))
            {
                Console.WriteLine(@"Error: Data file not found: " + pbfFilePath);
                return(-1);
            }

            var outDirectory = GetOutputDirectory(pbfFilePath);

            if (string.IsNullOrEmpty(outDirectory))
            {
                return(-2);
            }

            var baseName = Path.GetFileName(MassSpecDataReaderFactory.RemoveExtension(pbfFilePath));

            if (string.IsNullOrEmpty(ms1FeaturesFilePath) || string.Equals(ms1FeaturesFilePath, "."))
            {
                ms1FeaturesFilePath = Path.Combine(outDirectory, baseName + "." + FileExtension);
            }

            var pngFilePath = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".png");

            if (!File.Exists(ms1FeaturesFilePath))
            {
                Console.WriteLine(@"Error: MS1 features file not found: " + ms1FeaturesFilePath);
                return(-3);
            }

            Console.WriteLine(@"Start loading MS1 data from {0}", pbfFilePath);
            var run = PbfLcMsRun.GetLcMsRun(pbfFilePath);

            CreateFeatureMapImage(run, ms1FeaturesFilePath, pngFilePath);

            return(0);
        }
Esempio n. 10
0
        /// <summary>
        /// Find features in the data file
        /// </summary>
        /// <param name="rawFile">Data file (either a pbf file or a file type from which a pbf file can be auto-created)</param>
        /// <returns>0 if success; negative number on error</returns>
        private int ProcessFile(string rawFile)
        {
            var outDirectory = GetOutputDirectory(rawFile);

            if (string.IsNullOrEmpty(outDirectory))
            {
                return(-1);
            }

            var baseName            = Path.GetFileName(MassSpecDataReaderFactory.RemoveExtension(rawFile));
            var ms1FeaturesFilePath = Path.Combine(outDirectory, baseName + "." + FileExtension);
            var outCsvFilePath      = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".csv");
            var pngFilePath         = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".png");

            if (File.Exists(ms1FeaturesFilePath))
            {
                Console.WriteLine(@"ProMex output already exists: {0}", ms1FeaturesFilePath);
                return(-2);
            }

            if (!File.Exists(rawFile))
            {
                ShowErrorMessage(@"Cannot find input file: " + rawFile);
                return(-3);
            }

            var stopwatch = Stopwatch.StartNew();

            Console.WriteLine(@"Start loading MS1 data from {0}", rawFile);
            var run = PbfLcMsRun.GetLcMsRun(rawFile);

            var featureFinder = new LcMsPeakMatrix(run, _likelihoodScorer, 1, 60, Parameters.MaxThreads);

            Console.WriteLine(@"Complete loading MS1 data. Elapsed Time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);

            if (run.GetMs1ScanVector().Length == 0)
            {
                ShowErrorMessage(@"Data file has no MS1 spectra: " + Path.GetFileName(rawFile));
                return(-4);
            }

            var    comparer         = featureFinder.Comparer;
            var    container        = new LcMsFeatureContainer(featureFinder.Ms1Spectra, _likelihoodScorer, new LcMsFeatureMergeComparer(new Tolerance(10)));
            var    minSearchMassBin = comparer.GetBinNumber(Parameters.MinSearchMass);
            var    maxSearchMassBin = comparer.GetBinNumber(Parameters.MaxSearchMass);
            double totalMassBin     = maxSearchMassBin - minSearchMassBin + 1;

            Console.WriteLine(@"Start MS1 feature extraction.");
            stopwatch.Restart();
            for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++)
            {
                var clusters = featureFinder.FindFeatures(binNum);
                container.Add(clusters);

                if (binNum > minSearchMassBin && (binNum - minSearchMassBin) % 1000 == 0)
                {
                    var elapsed             = (stopwatch.ElapsedMilliseconds) / 1000.0d;
                    var processedBins       = binNum - minSearchMassBin;
                    var processedPercentage = ((double)processedBins / totalMassBin) * 100;
                    Console.WriteLine(@"Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}",
                                      processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed,
                                      container.NumberOfFeatures);
                }
            }

            Console.WriteLine(@"Complete MS1 feature extraction.");
            Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            Console.WriteLine(@" - Number of extracted features = {0}", container.NumberOfFeatures);
            Console.WriteLine(@"Start selecting mutually independent features from feature network graph");
            stopwatch.Restart();


            // write result files
            var tsvWriter = new StreamWriter(ms1FeaturesFilePath);

            tsvWriter.WriteLine(GetHeaderString(Parameters.ScoreReport));

            StreamWriter csvWriter = null;

            if (Parameters.CsvOutput)
            {
                csvWriter = new StreamWriter(outCsvFilePath);
                csvWriter.WriteLine("scan_num,charge,abundance,mz,fit,monoisotopic_mw,FeatureID");
            }

            var filteredFeatures = container.GetFilteredFeatures(featureFinder);
            var featureId        = 0;

            foreach (var feature in filteredFeatures)
            {
                featureId++;
                tsvWriter.WriteLine("{0}\t{1}", featureId, GetString(feature, Parameters.ScoreReport));

                var mostAbuIdx = feature.TheoreticalEnvelope.IndexOrderByRanking[0];

                if (csvWriter != null)
                {
                    foreach (var envelope in feature.EnumerateEnvelopes())
                    {
                        //var mostAbuIsotopeInternalIndex = cluster.IsotopeList.SortedIndexByIntensity[0];
                        var mostAbuPeak = envelope.Peaks[mostAbuIdx];
                        if (mostAbuPeak == null || !mostAbuPeak.Active)
                        {
                            continue;
                        }

                        var fitscore = 1.0 - feature.BestCorrelationScore;
                        csvWriter.WriteLine(string.Format("{0},{1},{2},{3},{4},{5},{6}", envelope.ScanNum, envelope.Charge, envelope.Abundance, mostAbuPeak.Mz, fitscore, envelope.MonoMass, featureId));
                    }
                }
            }
            tsvWriter.Close();

            Console.WriteLine(@"Complete feature filtration");
            Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            Console.WriteLine(@" - Number of filtered features = {0}", featureId);
            Console.WriteLine(@" - ProMex output: {0}", ms1FeaturesFilePath);

            if (csvWriter != null)
            {
                csvWriter.Close();
                Console.WriteLine(@" - ProMex output in ICR2LS format: {0}", outCsvFilePath);
            }

            if (Parameters.FeatureMapImage)
            {
                CreateFeatureMapImage(run, ms1FeaturesFilePath, pngFilePath);
            }

            return(0);
        }
Esempio n. 11
0
 private bool HasExistingPbfFile(string path)
 {
     return(File.Exists(MassSpecDataReaderFactory.ChangeExtension(path, ".pbf")));
 }
Esempio n. 12
0
        /// <summary>
        /// Look for the default .pbf testing file in the default test file folder,
        /// plus also in the UnitTest_Files directory in this project or solution
        /// </summary>
        /// <param name="createIfMissing">If true and the .mzML file is ofund, create the .pbf file</param>
        /// <returns>Path to the file if found, otherwise the default path on Proto-2</returns>
        public static string GetPbfTestFilePath(bool createIfMissing)
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            var pbfFilePath = Path.Combine(DEFAULT_SPEC_FILES_FOLDER, PBF_TEST_FILE);

            var pbfFileInfo = GetTestFile("PbfTestFilePath", pbfFilePath, false);

            if (pbfFileInfo != null)
            {
                // Check for a lock file, which would indicate another process is creating the .pbf file
                var existingLockFile = new FileInfo(Path.Combine(pbfFileInfo.FullName + ".lock"));
                WaitForLockFile(existingLockFile, false);

                pbfFileInfo.Refresh();
                if (pbfFileInfo.Exists)
                {
                    DeleteLockFile(existingLockFile);
                    return(pbfFileInfo.FullName);
                }
            }

            if (!createIfMissing)
            {
                // Not creating the file if missing; return the default path, despite the fact that the file does not exist
                // The calling method will discover that the file is missing and act accordingly
                return(pbfFilePath);
            }

            // Create the missing file

            var mzmlFilePath = Path.Combine(DEFAULT_SPEC_FILES_FOLDER, MZML_TEST_FILE);

            var mzmlFileInfo = GetTestFile("MzmlTestFilePath", mzmlFilePath, false);

            if (mzmlFileInfo?.DirectoryName == null)
            {
                // Unable to create the pbf file; return the default path, despite the fact that the file does not exist
                return(pbfFilePath);
            }

            ShowMessage(methodName, string.Format("Creating {0} using {1}", PBF_TEST_FILE, mzmlFileInfo.FullName));

            var lockFile       = new FileInfo(Path.Combine(mzmlFileInfo.DirectoryName, Path.GetFileNameWithoutExtension(mzmlFileInfo.Name) + ".pbf.lock"));
            var newPbfFilePath = Path.Combine(mzmlFileInfo.DirectoryName, PBF_TEST_FILE);

            try
            {
                // Create a new lock file so other processes know this thread is creating the .pbf file
                WaitForLockFile(lockFile, true);

                mLastStatus = string.Empty;
                var startTime = DateTime.UtcNow;

                var reader   = MassSpecDataReaderFactory.GetMassSpecDataReader(mzmlFileInfo.FullName);
                var progress = new Progress <ProgressData>(p =>
                {
                    p.UpdateFrequencySeconds = 2;
                    if (p.Percent < 100 && (p.Percent % 25).Equals(0) || p.ShouldUpdate())
                    {
                        var statusMessage = string.Format("{0}, {1:00.0}% complete                        ", p.Status, p.Percent);

                        if (string.Equals(mLastStatus, statusMessage))
                        {
                            return;
                        }

                        mLastStatus = statusMessage;
                        Console.Write("\r{0}, {1:00.0}% complete                        ", p.Status, p.Percent);
                    }
                });

                var run = new PbfLcMsRun(mzmlFileInfo.FullName, reader, newPbfFilePath, 0, 0, progress);
                Console.WriteLine();
                ShowMessage(methodName, string.Format("Created {0} in {1:F0} seconds", run.PbfFilePath, DateTime.UtcNow.Subtract(startTime).TotalSeconds));

                DeleteLockFile(lockFile);

                return(run.PbfFilePath);
            }
            catch (Exception ex)
            {
                ShowMessage(methodName, string.Format("Exception creating {0} using {1}: {2}", PBF_TEST_FILE, mzmlFileInfo.FullName, ex.Message));

                try
                {
                    var incompletePbfFile = new FileInfo(newPbfFilePath);
                    if (incompletePbfFile.Exists)
                    {
                        incompletePbfFile.Delete();
                    }

                    DeleteLockFile(lockFile);
                }
                catch
                {
                    // Ignore errors here
                }

                // Return the default path, despite the fact that the file does not exist
                return(pbfFilePath);
            }
        }
 public void TestThermoRawAvailable()
 {
     Assert.AreEqual(true, MassSpecDataReaderFactory.IsThermoRawAvailable());
 }
 public void TestPwizAvailable()
 {
     Assert.AreEqual(true, MassSpecDataReaderFactory.IsPwizAvailable());
 }
Esempio n. 15
0
        public static int Main(string[] args)
        {
            PbfGenInputParameters options;

            try
            {
                var osVersionInfo = new clsOSVersionInfo();
                if (osVersionInfo.GetOSVersion().ToLower().Contains("windows"))
                {
                    var handle = Process.GetCurrentProcess().MainWindowHandle;
                    SetConsoleMode(handle, EnableExtendedFlags);
                }

                var exeName = System.Reflection.Assembly.GetEntryAssembly().GetName().Name;

                var parser = new CommandLineParser <PbfGenInputParameters>(exeName, Version);
                parser.UsageExamples.Add($"Using -start and -end to limit the scan range to include in the .pbf file\n\t{exeName}.exe -s Dataset.raw -start 2000 -end 3000");

                var results = parser.ParseArgs(args);

                if (!results.Success)
                {
                    // Wait for 1.5 seconds
                    System.Threading.Thread.Sleep(1500);

                    return(-1);
                }

                if (!results.ParsedResults.Validate())
                {
                    parser.PrintHelp();

                    // Wait for 1.5 seconds
                    System.Threading.Thread.Sleep(1500);

                    return(-1);
                }

                options = results.ParsedResults;
            }
            catch (Exception ex)
            {
                Console.WriteLine("Exception while parsing the command line parameters: " + ex.Message);
                return(-5);
            }

#if (!DEBUG)
            try
#endif
            {
                var specFilePaths = new[] { options.SourcePath };
                if (Directory.Exists(options.SourcePath) && !MassSpecDataReaderFactory.IsADirectoryDataset(options.SourcePath))
                {
                    specFilePaths = Directory.GetFiles(options.SourcePath, "*.raw"); // TODO: Support folders with other formats in them too...
                }

                foreach (var rawFilePath in specFilePaths)
                {
                    var pbfFileName = MassSpecDataReaderFactory.ChangeExtension(rawFilePath, PbfLcMsRun.FileExtensionConst);
                    var pbfFilePath = Path.Combine(options.OutputDir, Path.GetFileName(pbfFileName));

                    if (File.Exists(pbfFilePath) && PbfLcMsRun.CheckFileFormatVersion(pbfFilePath, out var isCurrent) && isCurrent)
                    {
                        Console.WriteLine("{0} already exists.", pbfFilePath);
                        continue;
                    }

                    Console.WriteLine("Creating {0} from {1}", pbfFilePath, rawFilePath);

                    if (options.StartScan > 0 && options.EndScan > 0)
                    {
                        Console.WriteLine("Only including scans {0} to {1}", options.StartScan, options.EndScan);
                    }
                    else if (options.StartScan > 0)
                    {
                        Console.WriteLine("Only including scans {0} to the end", options.StartScan);
                    }
                    else if (options.EndScan > 0)
                    {
                        Console.WriteLine("Only including scans 1 to {0}", options.EndScan);
                    }

                    var reader   = MassSpecDataReaderFactory.GetMassSpecDataReader(rawFilePath);
                    var progress = new Progress <ProgressData>(p =>
                    {
                        p.UpdateFrequencySeconds = 2;
                        if ((p.Percent % 25).Equals(0) || p.ShouldUpdate())
                        {
                            Console.Write("\r{0}, {1:00.0}% complete                        ", p.Status, p.Percent);
                        }
                    });
                    var run = new PbfLcMsRun(rawFilePath, reader, pbfFilePath, 0, 0, progress, false, options.StartScan, options.EndScan);
                    Console.WriteLine();
                }

                Console.WriteLine("PbfFormatVersion: {0}", PbfLcMsRun.FileFormatId);
                return(0);
            }
#if (!DEBUG)
            catch (Exception ex)
            {
                // NOTE: The DMS Analysis Manager looks for this text; do not change it
                Console.WriteLine("Exception while processing: " + ex.Message);
                Console.WriteLine(ex.StackTrace);
                var errorCode = -Math.Abs(ex.Message.GetHashCode());
                if (errorCode == 0)
                {
                    return(-1);
                }
                else
                {
                    return(errorCode);
                }
            }
#endif
        }
Esempio n. 16
0
        public void TestFeatureIdMatching()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string resultFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V092\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv";

            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var          resultParser    = new MsPathFinderParser(resultFilePath);
            const double qValueThreshold = 0.01;
            const double tolerancePpm    = 13;

            const string dataSet     = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3";
            var          rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw");

            if (!File.Exists(rawFileName))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFileName);
            }

            var run = PbfLcMsRun.GetLcMsRun(rawFileName);

            var idList =
                resultParser.GetIdList().TakeWhile(id => id.QValue <= qValueThreshold).OrderBy(id => id.Mass).ToList();
            var idMassList = idList.Select(id => id.Mass).ToList();
            var idFlag     = new bool[idList.Count];


            // Parse sequence tags
            var       tagFileName    = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".seqtag");
            const int minTagLength   = 6;
            const int numProtMatches = 4;
//            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta";
            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.icsfldecoy.fasta";

            if (!File.Exists(tagFileName))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFileName);
            }

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb      = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);

            var tagParser = new SequenceTagParser(tagFileName, minTagLength);

            var featureFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".ms1ft");
            var featureParser   = new TsvFileParser(featureFileName);

            var minScan   = featureParser.GetData("MinScan").Select(s => Convert.ToInt32(s)).ToArray();
            var maxScan   = featureParser.GetData("MaxScan").Select(s => Convert.ToInt32(s)).ToArray();
            var minCharge = featureParser.GetData("MinCharge").Select(s => Convert.ToInt32(s)).ToArray();
            var maxCharge = featureParser.GetData("MaxCharge").Select(s => Convert.ToInt32(s)).ToArray();
            var monoMass  = featureParser.GetData("MonoMass").Select(Convert.ToDouble).ToArray();

            var numFeaturesWithId                    = 0;
            var numFeaturesWithMs2                   = 0;
            var numFeaturesWithTags                  = 0;
            var numFeaturesWithMatchingTags          = 0;
            var numFeaturesWithTwoOrMoreMatchingTags = 0;
            var numFeaturesWithNoIdAndMatchingTags   = 0;

            for (var i = 0; i < featureParser.NumData; i++)
            {
                var mass = monoMass[i];

                // Find Id
                var tolDa   = new Tolerance(tolerancePpm).GetToleranceAsDa(mass, 1);
                var minMass = mass - tolDa;
                var maxMass = mass + tolDa;
                var index   = idMassList.BinarySearch(mass);
                if (index < 0)
                {
                    index = ~index;
                }

                var matchedId = new List <MsPathFinderId>();
                // go down
                var curIndex = index - 1;
                while (curIndex >= 0)
                {
                    var curId = idList[curIndex];
                    if (curId.Mass < minMass)
                    {
                        break;
                    }
                    if (curId.Scan > minScan[i] && curId.Scan < maxScan[i] &&
                        curId.Charge >= minCharge[i] && curId.Charge <= maxCharge[i])
                    {
                        matchedId.Add(curId);
                        idFlag[curIndex] = true;
                    }
                    --curIndex;
                }

                // go up
                curIndex = index;
                while (curIndex < idList.Count)
                {
                    var curId = idList[curIndex];
                    if (curId.Mass > maxMass)
                    {
                        break;
                    }
                    if (curId.Scan >= minScan[i] && curId.Scan <= maxScan[i] &&
                        curId.Charge >= minCharge[i] && curId.Charge <= maxCharge[i])
                    {
                        matchedId.Add(curId);
                        idFlag[curIndex] = true;
                    }
                    ++curIndex;
                }

                var hasId = false;
                if (matchedId.Any())
                {
                    ++numFeaturesWithId;
                    hasId = true;
                }

                // Find MS2 scans
//                var numMs2Scans = 0;
                var tags   = new List <SequenceTag>();
                var hasMs2 = false;
                for (var scanNum = minScan[i]; scanNum <= maxScan[i]; scanNum++)
                {
                    var isolationWindow = run.GetIsolationWindow(scanNum);
                    if (isolationWindow == null)
                    {
                        continue;
                    }
                    var isolationWindowTargetMz = isolationWindow.IsolationWindowTargetMz;
                    var charge = (int)Math.Round(mass / isolationWindowTargetMz);
                    if (charge < minCharge[i] || charge > maxCharge[i])
                    {
                        continue;
                    }
                    var mz = Ion.GetIsotopeMz(mass, charge,
                                              Averagine.GetIsotopomerEnvelope(mass).MostAbundantIsotopeIndex);
                    if (isolationWindow.Contains(mz))
                    {
//                        ++numMs2Scans;
                        tags.AddRange(tagParser.GetSequenceTags(scanNum));
                        hasMs2 = true;
                    }
                }
                if (hasMs2)
                {
                    ++numFeaturesWithMs2;
                }
                if (tags.Any())
                {
                    ++numFeaturesWithTags;
                }
                var protHist      = new Dictionary <string, int>();
                var hasMatchedTag = false;
                foreach (var tag in tags)
                {
                    var matchedProteins = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).Select(idx => fastaDb.GetProteinName(idx)).ToArray();
                    if (matchedProteins.Any())
                    {
                        hasMatchedTag = true;
                        foreach (var protein in matchedProteins)
                        {
                            int num;
                            if (protHist.TryGetValue(protein, out num))
                            {
                                protHist[protein] = num + 1;
                            }
                            else
                            {
                                protHist[protein] = 1;
                            }
                        }
                    }
                }
                if (hasMatchedTag)
                {
                    ++numFeaturesWithMatchingTags;
                    if (!hasId)
                    {
                        ++numFeaturesWithNoIdAndMatchingTags;
                    }
                }
                if (protHist.Any())
                {
                    var maxOcc = protHist.Values.Max();
                    if (maxOcc >= numProtMatches)
                    {
                        ++numFeaturesWithTwoOrMoreMatchingTags;
                    }
                }
            }

            Console.WriteLine("NumFeatures: {0}", featureParser.NumData);
            Console.WriteLine("NumId: {0}", idList.Count);
            Console.WriteLine("NumFeaturesWithId: {0} ({1})", numFeaturesWithId, numFeaturesWithId / (float)featureParser.NumData);
            Console.WriteLine("NumFeaturesWithMs2: {0} ({1})", numFeaturesWithMs2, numFeaturesWithMs2 / (float)featureParser.NumData);
            Console.WriteLine("NumFeaturesWithTag: {0} ({1})", numFeaturesWithTags, numFeaturesWithTags / (float)featureParser.NumData);
            Console.WriteLine("NumFeaturesWithMatchedTag: {0} ({1})", numFeaturesWithMatchingTags, numFeaturesWithMatchingTags / (float)featureParser.NumData);
            Console.WriteLine("NumFeaturesWithMoreThanOneMatchedTag: {0} ({1})", numFeaturesWithTwoOrMoreMatchingTags, numFeaturesWithTwoOrMoreMatchingTags / (float)featureParser.NumData);
            Console.WriteLine("NumFeaturesWithNoIdAndMatchedTag: {0} ({1})", numFeaturesWithNoIdAndMatchingTags, numFeaturesWithNoIdAndMatchingTags / (float)featureParser.NumData);

            for (var i = 0; i < idFlag.Length; i++)
            {
                if (!idFlag[i])
                {
                    Console.WriteLine(idList[i].Scan);
                }
            }


//            Console.WriteLine(string.Join(",", filter.GetMatchingMs2ScanNums(8115.973001)));
//
//            Console.WriteLine(featureFileName);
        }
Esempio n. 17
0
        /// <summary>
        /// Process spectra and identifications.
        /// </summary>
        /// <param name="rawFilePath">Full file path to raw file.</param>
        /// <param name="idFilePath">Full file path to identification file.</param>
        /// <param name="cancellationToken">For notification of cancellation.</param>
        /// <param name="progress">Progress reporter.</param>
        /// <returns>List of processed IDs.</returns>
        public List <ProcessedResult> Process(string rawFilePath, string idFilePath, CancellationToken cancellationToken, IProgress <ProgressData> progress = null)
        {
            // Set up progress reporter
            progress = progress ?? new Progress <ProgressData>();

            var progressData = new ProgressData(progress);

            // Show initial loading message
            progressData.Report(0.1, "Loading...");

            // Read mzid file
            var mzidReader      = new SimpleMZIdentMLReader();
            var identifications = mzidReader.Read(idFilePath, cancellationToken);

            // Check to make sure raw and MZID file match.
            var rawFileName = Path.GetFileNameWithoutExtension(rawFilePath);

            var spectrumFileFromId = Path.GetFileNameWithoutExtension(identifications.SpectrumFile);
            var dtaIndex           = spectrumFileFromId.LastIndexOf("_dta");

            if (dtaIndex >= 0)
            {
                spectrumFileFromId = spectrumFileFromId.Substring(0, dtaIndex);
            }

            if (rawFileName != spectrumFileFromId)
            {
                throw new ArgumentException($"Mismatch between spectrum file ({rawFileName}) and id file ({spectrumFileFromId}).");
            }

            // Group IDs into a hash by scan number
            var idMap = identifications.Identifications.GroupBy(id => id.ScanNum).ToDictionary(scan => scan.Key, ids => ids);

            var processedResults = new ConcurrentBag <ProcessedResult>();

            // Load raw file
            using (var lcms = MassSpecDataReaderFactory.GetMassSpecDataReader(rawFilePath))
            {
                int count = 0;
                Parallel.ForEach(
                    lcms.ReadAllSpectra(),
                    spectrum =>
                {
                    if (cancellationToken.IsCancellationRequested)
                    {           // Cancel if necessary
                        return;
                    }

                    // Report completion percentage and current scan number
                    if (count % (int)Math.Max(0.01 * lcms.NumSpectra, 1) == 0)
                    {
                        progressData.Report(count, lcms.NumSpectra, $"{Math.Round(100.0*count / lcms.NumSpectra)}%");
                    }

                    Interlocked.Increment(ref count);

                    // Skip spectrum if it isn't MS2
                    var productSpectrum = spectrum as ProductSpectrum;
                    if (productSpectrum == null || !idMap.ContainsKey(spectrum.ScanNum))
                    {
                        return;
                    }

                    var specResults = idMap[spectrum.ScanNum];

                    var results = from specResult in specResults
                                  let sequence = specResult.Peptide.GetIpSequence()
                                                 let coverage = this.CalculateSequenceCoverage(productSpectrum, sequence, specResult.Charge)
                                                                select new ProcessedResult
                    {
                        ScanNum          = spectrum.ScanNum,
                        Sequence         = sequence,
                        Charge           = specResult.Charge,
                        PrecursorMz      = specResult.CalMz,
                        DeNovoScore      = specResult.DeNovoScore,
                        SpecEValue       = specResult.SpecEv,
                        EValue           = specResult.EValue,
                        QValue           = specResult.QValue,
                        PepQValue        = specResult.PepQValue,
                        FragMethod       = productSpectrum.ActivationMethod,
                        IsotopeError     = specResult.IsoError,
                        SequenceCoverage = Math.Round(coverage),
                    };

                    foreach (var result in results)
                    {
                        processedResults.Add(result);
                    }
                });
            }

            // Sort spectra by SpecEValue
            return(processedResults.OrderBy(pr => pr.SpecEValue).ToList());
        }
Esempio n. 18
0
        public bool Validate()
        {
            // Spec file path validation
            if (string.IsNullOrWhiteSpace(SpecFilePath))
            {
                PrintError("Missing parameter for spectrum file path");
                return(false);
            }

            // Check for folder-type datasets, and replace specFilePath with the directory name if it is.
            SpecFilePath = MassSpecDataReaderFactory.GetDatasetName(SpecFilePath);

            var isDirectoryDataset = MassSpecDataReaderFactory.IsADirectoryDataset(SpecFilePath);
            // True if specFilePath is a directory that is NOT a supported folder-type dataset.
            var specPathIsDirectory = Directory.Exists(SpecFilePath) && !isDirectoryDataset;

            if (!File.Exists(SpecFilePath) && !specPathIsDirectory && !isDirectoryDataset)
            {
                PrintError("File not found: " + SpecFilePath);
                return(false);
            }

            var types = MassSpecDataReaderFactory.MassSpecDataTypeFilterList;

            if (!specPathIsDirectory && !(types.Select(ext => SpecFilePath.ToLower().EndsWith(ext)).Any()))
            {
                PrintError("Invalid file extension for spectrum file: (" + Path.GetExtension(SpecFilePath) + ") " + SpecFilePath);
                return(false);
            }

            // TODO: Handle non-.raw files in the subfolder
            SpecFilePaths = Directory.Exists(SpecFilePath) && !MassSpecDataReaderFactory.IsADirectoryDataset(SpecFilePath) ? Directory.GetFiles(SpecFilePath, "*.raw") : new[] { SpecFilePath };

            // Database path validation
            if (string.IsNullOrWhiteSpace(DatabaseFilePath))
            {
                PrintError("Missing parameter for database file path");
                return(false);
            }
            if (!File.Exists(DatabaseFilePath))
            {
                PrintError("File not found: " + DatabaseFilePath);
                return(false);
            }
            var dbExtension = Path.GetExtension(DatabaseFilePath).ToLower();

            if (!dbExtension.Equals(".fa") &&
                !dbExtension.Equals(".fasta"))
            {
                PrintError("Invalid extension for the database file path (" + dbExtension + ")");
                return(false);
            }

            // Output directory validation
            if (string.IsNullOrWhiteSpace(OutputDir))
            {
                // Must use "Path.GetFullPath" to return the absolute path when the source file is in the working directory
                // But, it could cause problems with too-long paths.
                OutputDir = specPathIsDirectory ? SpecFilePath : Path.GetDirectoryName(Path.GetFullPath(SpecFilePath));
            }

            if (string.IsNullOrWhiteSpace(OutputDir))
            {
                PrintError("Invalid output file directory: " + OutputDir);
                return(false);
            }

            if (!Directory.Exists(OutputDir))
            {
                if (File.Exists(OutputDir) && !File.GetAttributes(OutputDir).HasFlag(FileAttributes.Directory))
                {
                    PrintError("OutputDir \"" + OutputDir + "\" is not a directory!");
                    return(false);
                }
                Directory.CreateDirectory(OutputDir);
            }

            // Mods file validation
            if (!string.IsNullOrWhiteSpace(ModsFilePath) && !File.Exists(ModsFilePath))
            {
                PrintError("Modifications file not found: " + ModsFilePath);
                return(false);
            }

            try
            {
                var errorMessage = LoadModsFile(ModsFilePath);
                if (!string.IsNullOrWhiteSpace(errorMessage))
                {
                    PrintError(errorMessage);
                    return(false);
                }
            }
            catch (Exception ex)
            {
                PrintError("Exception parsing the file for parameter -mod: " + ex.Message);
                return(false);
            }

            // Scans file validation
            if (!string.IsNullOrWhiteSpace(ScansFilePath) && !File.Exists(ScansFilePath))
            {
                PrintError("Scans File file not found: " + ScansFilePath);
                return(false);
            }
            try
            {
                var errorMessage = LoadScansFile(ScansFilePath);
                if (!string.IsNullOrWhiteSpace(errorMessage))
                {
                    PrintError(errorMessage);
                    return(false);
                }
            }
            catch (Exception ex)
            {
                PrintError("Exception parsing the file for parameter -scansFile: " + ex.Message);
                return(false);
            }

            // Feature file validation
            if (!string.IsNullOrWhiteSpace(FeatureFilePath) && !File.Exists(FeatureFilePath))
            {
                PrintError("Feature File not found: " + FeatureFilePath);
                return(false);
            }
            if (!string.IsNullOrWhiteSpace(FeatureFilePath) &&
                !Path.GetExtension(FeatureFilePath).ToLower().Equals(".csv") &&
                !Path.GetExtension(FeatureFilePath).ToLower().Equals(".ms1ft") &&
                !Path.GetExtension(FeatureFilePath).ToLower().Equals(".msalign"))
            {
                PrintError("Invalid extension for the Feature file path (" + Path.GetExtension(FeatureFilePath) + ")");
                return(false);
            }

            // MinX/MaxX validation
            if (MinSequenceLength > MaxSequenceLength)
            {
                PrintError("MinPrecursorCharge (" + MinPrecursorIonCharge + ") is larger than MaxPrecursorCharge (" + MaxPrecursorIonCharge + ")!");
                return(false);
            }

            if (MinProductIonCharge > MaxProductIonCharge)
            {
                PrintError("MinFragmentCharge (" + MinProductIonCharge + ") is larger than MaxFragmentCharge (" + MaxProductIonCharge + ")!");
                return(false);
            }

            if (MinSequenceMass > MaxSequenceMass)
            {
                PrintError("MinSequenceMassInDa (" + MinSequenceMass + ") is larger than MaxSequenceMassInDa (" + MaxSequenceMass + ")!");
                return(false);
            }

            MaxNumThreads = GetOptimalMaxThreads(MaxNumThreads);

            return(true);
        }
Esempio n. 19
0
        public bool RunSearch(double corrThreshold = 0.7, CancellationToken?cancellationToken = null, IProgress <ProgressData> progress = null)
        {
            // Get the Normalized spec file/folder path
            SpecFilePath = MassSpecDataReaderFactory.NormalizeDatasetPath(SpecFilePath);

            var prog     = new Progress <ProgressData>();
            var progData = new ProgressData(progress);

            if (progress != null)
            {
                prog = new Progress <ProgressData>(p =>
                {
                    progData.Status         = p.Status;
                    progData.StatusInternal = p.StatusInternal;
                    progData.Report(p.Percent);
                });
            }

            var sw    = new Stopwatch();
            var swAll = new Stopwatch();

            swAll.Start();
            ErrorMessage = string.Empty;

            Console.Write(@"Reading raw file...");
            progData.Status = "Reading spectra file";
            progData.StepRange(10.0);
            sw.Start();

            _run = PbfLcMsRun.GetLcMsRun(SpecFilePath, 0, 0, prog);

            _ms2ScanNums             = _run.GetScanNumbers(2).ToArray();
            _isolationWindowTargetMz = new double[_run.MaxLcScan + 1];
            foreach (var ms2Scan in _ms2ScanNums)
            {
                var ms2Spec = _run.GetSpectrum(ms2Scan) as ProductSpectrum;
                if (ms2Spec == null)
                {
                    continue;
                }
                _isolationWindowTargetMz[ms2Scan] = ms2Spec.IsolationWindow.IsolationWindowTargetMz;
            }


            sw.Stop();
            Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

            progData.StepRange(20.0);
            ISequenceFilter ms1Filter;

            if (this.ScanNumbers != null && this.ScanNumbers.Any())
            {
                ms1Filter = new SelectedMsMsFilter(this.ScanNumbers);
            }
            else if (string.IsNullOrWhiteSpace(FeatureFilePath))
            {
                // Checks whether SpecFileName.ms1ft exists
                var ms1FtFilePath = MassSpecDataReaderFactory.ChangeExtension(SpecFilePath, LcMsFeatureFinderLauncher.FileExtension);
                if (!File.Exists(ms1FtFilePath))
                {
                    Console.WriteLine(@"Running ProMex...");
                    sw.Start();
                    var param = new LcMsFeatureFinderInputParameter
                    {
                        InputPath                = SpecFilePath,
                        MinSearchMass            = MinSequenceMass,
                        MaxSearchMass            = MaxSequenceMass,
                        MinSearchCharge          = MinPrecursorIonCharge,
                        MaxSearchCharge          = MaxPrecursorIonCharge,
                        CsvOutput                = false,
                        ScoreReport              = false,
                        LikelihoodScoreThreshold = -10
                    };
                    var featureFinder = new LcMsFeatureFinderLauncher(param);
                    featureFinder.Run();
                }
                sw.Reset();
                sw.Start();
                Console.Write(@"Reading ProMex results...");
                ms1Filter = new Ms1FtFilter(_run, PrecursorIonTolerance, ms1FtFilePath, -10);
            }
            else
            {
                sw.Reset();
                sw.Start();
                var extension = Path.GetExtension(FeatureFilePath);
                if (extension.ToLower().Equals(".csv"))
                {
                    Console.Write(@"Reading ICR2LS/Decon2LS results...");
                    ms1Filter = new IsosFilter(_run, PrecursorIonTolerance, FeatureFilePath);
                }
                else if (extension.ToLower().Equals(".ms1ft"))
                {
                    Console.Write(@"Reading ProMex results...");
                    ms1Filter = new Ms1FtFilter(_run, PrecursorIonTolerance, FeatureFilePath, -10);
                }
                else if (extension.ToLower().Equals(".msalign"))
                {
                    Console.Write(@"Reading MS-Align+ results...");
                    ms1Filter = new MsDeconvFilter(_run, PrecursorIonTolerance, FeatureFilePath);
                }
                else
                {
                    ms1Filter = null;  //new Ms1FeatureMatrix(_run);
                }
            }

            sw.Stop();
            Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);


            // pre-generate deconvoluted spectra for scoring
            _massBinComparer = new FilteredProteinMassBinning(AminoAcidSet, MaxSequenceMass + 1000);

            _ms2ScorerFactory2 = new CompositeScorerFactory(_run, _massBinComparer, AminoAcidSet,
                                                            MinProductIonCharge, MaxProductIonCharge, ProductIonTolerance);
            sw.Reset();
            Console.WriteLine(@"Generating deconvoluted spectra for MS/MS spectra...");
            sw.Start();
            var pfeOptions = new ParallelOptions
            {
                MaxDegreeOfParallelism = MaxNumThreads,
                CancellationToken      = cancellationToken ?? CancellationToken.None
            };

            Parallel.ForEach(_ms2ScanNums, pfeOptions, ms2ScanNum =>
            {
                _ms2ScorerFactory2.DeconvonluteProductSpectrum(ms2ScanNum);
            });
            sw.Stop();
            Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

            progData.StepRange(10.0);
            progData.Status = "Reading Fasta File";

            // Target database
            var targetDb = new FastaDatabase(DatabaseFilePath);

            targetDb.Read();

            // Generate sequence tags for all MS/MS spectra
            if (TagBasedSearch)
            {
                progData.StepRange(25.0);
                progData.Status = "Generating Sequence Tags";
                sw.Reset();
                Console.WriteLine(@"Generating sequence tags for MS/MS spectra...");
                sw.Start();
                var seqTagGen = GetSequenceTagGenerator();
                _tagMs2ScanNum = seqTagGen.GetMs2ScanNumsContainingTags().ToArray();
                sw.Stop();
                Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
                _tagSearchEngine = new ScanBasedTagSearchEngine(_run, seqTagGen, new LcMsPeakMatrix(_run, ms1Filter), targetDb, ProductIonTolerance, AminoAcidSet,
                                                                _ms2ScorerFactory2,
                                                                ScanBasedTagSearchEngine.DefaultMinMatchedTagLength,
                                                                MaxSequenceMass, MinProductIonCharge, MaxProductIonCharge);
            }

            var specFileName         = MassSpecDataReaderFactory.RemoveExtension(Path.GetFileName(SpecFilePath));
            var targetOutputFilePath = Path.Combine(OutputDir, specFileName + TargetFileNameEnding);
            var decoyOutputFilePath  = Path.Combine(OutputDir, specFileName + DecoyFileNameEnding);
            var tdaOutputFilePath    = Path.Combine(OutputDir, specFileName + TdaFileNameEnding);

            progData.StepRange(60.0);
            progData.Status = "Running Target search";

            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Target))
            {
                sw.Reset();
                Console.Write(@"Reading the target database...");
                sw.Start();
                targetDb.Read();
                sw.Stop();
                Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

                var targetMatches = new SortedSet <DatabaseSequenceSpectrumMatch> [_run.MaxLcScan + 1];

                progData.MaxPercentage = 42.5;
                if (TagBasedSearch)
                {
                    sw.Reset();
                    Console.WriteLine(@"Tag-based searching the target database");
                    sw.Start();
                    RunTagBasedSearch(targetMatches, targetDb, null, prog);
                    Console.WriteLine(@"Target database tag-based search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
                }
                progData.MaxPercentage = 60.0;

                sw.Reset();
                Console.WriteLine(@"Searching the target database");
                sw.Start();
                RunSearch(targetMatches, targetDb, ms1Filter, null, prog);
                Console.WriteLine(@"Target database search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

                // calculate spectral e-value usign generating function
                sw.Reset();
                Console.WriteLine(@"Calculating spectral E-values for target-spectrum matches");
                sw.Start();
                var bestTargetMatches = RunGeneratingFunction(targetMatches);
                WriteResultsToFile(bestTargetMatches, targetOutputFilePath, targetDb);
                sw.Stop();
                Console.WriteLine(@"Target-spectrum match E-value calculation elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
            }

            progData.StepRange(95.0); // total to 95%
            progData.Status = "Running Decoy search";

            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Decoy))
            {
                // Decoy database
                sw.Reset();
                sw.Start();
                var decoyDb = targetDb.Decoy(null, true);

                Console.Write(@"Reading the decoy database...");
                decoyDb.Read();
                Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

                progData.MaxPercentage = 77.5;
                var decoyMatches = new SortedSet <DatabaseSequenceSpectrumMatch> [_run.MaxLcScan + 1];
                if (TagBasedSearch)
                {
                    sw.Reset();
                    Console.WriteLine(@"Tag-based searching the decoy database");
                    sw.Start();
                    RunTagBasedSearch(decoyMatches, decoyDb, null, prog);
                    Console.WriteLine(@"Decoy database tag-based search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
                }
                progData.MaxPercentage = 95.0;

                sw.Reset();
                Console.WriteLine(@"Searching the decoy database");
                sw.Start();
                RunSearch(decoyMatches, decoyDb, ms1Filter, null, prog);
                Console.WriteLine(@"Decoy database search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

                // calculate spectral e-value usign generating function
                sw.Reset();
                Console.WriteLine(@"Calculating spectral E-values for decoy-spectrum matches");
                sw.Start();
                var bestDecoyMatches = RunGeneratingFunction(decoyMatches);
                WriteResultsToFile(bestDecoyMatches, decoyOutputFilePath, decoyDb);
                sw.Stop();
                Console.WriteLine(@"Decoy-spectrum match E-value calculation elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
            }

            progData.StepRange(100.0);
            progData.Status = "Writing combined results file";
            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Both))
            {
                // Add "Qvalue" and "PepQValue"
                var fdrCalculator = new FdrCalculator(targetOutputFilePath, decoyOutputFilePath);
                if (fdrCalculator.HasError())
                {
                    ErrorMessage = fdrCalculator.ErrorMessage;
                    Console.WriteLine(@"Error computing FDR: " + fdrCalculator.ErrorMessage);
                    return(false);
                }

                fdrCalculator.WriteTo(tdaOutputFilePath);
            }
            progData.Report(100.0);

            Console.WriteLine(@"Done.");
            swAll.Stop();
            Console.WriteLine(@"Total elapsed time for search: {0:f1} sec ({1:f2} min)", swAll.Elapsed.TotalSeconds, swAll.Elapsed.TotalMinutes);

            return(true);
        }
Esempio n. 20
0
        public void Write()
        {
            foreach (var specFilePath in SpecFilePaths)
            {
                var outputFilePath = Path.Combine(OutputDir, Path.GetFileNameWithoutExtension(specFilePath) + ParameterFileExtension);

                using (var writer = new StreamWriter(outputFilePath))
                {
                    writer.WriteLine("SpecFile\t" + Path.GetFileName(specFilePath));
                    writer.WriteLine("DatabaseFile\t" + Path.GetFileName(DatabaseFilePath));
                    writer.WriteLine("FeatureFile\t{0}", FeatureFilePath != null ? Path.GetFileName(FeatureFilePath) : Path.GetFileName(MassSpecDataReaderFactory.ChangeExtension(specFilePath, ".ms1ft")));
                    writer.WriteLine("SearchMode\t" + SearchModeInt);
                    writer.WriteLine("Tag-based search\t" + TagBasedSearch);
                    writer.WriteLine("Tda\t" + (TdaBool == null ? "Decoy" : (bool)TdaBool ? "Target+Decoy" : "Target"));
                    writer.WriteLine("PrecursorIonTolerancePpm\t" + PrecursorIonTolerancePpm);
                    writer.WriteLine("ProductIonTolerancePpm\t" + ProductIonTolerancePpm);
                    writer.WriteLine("MinSequenceLength\t" + MinSequenceLength);
                    writer.WriteLine("MaxSequenceLength\t" + MaxSequenceLength);
                    writer.WriteLine("MinPrecursorIonCharge\t" + MinPrecursorIonCharge);
                    writer.WriteLine("MaxPrecursorIonCharge\t" + MaxPrecursorIonCharge);
                    writer.WriteLine("MinProductIonCharge\t" + MinProductIonCharge);
                    writer.WriteLine("MaxProductIonCharge\t" + MaxProductIonCharge);
                    writer.WriteLine("MinSequenceMass\t" + MinSequenceMass);
                    writer.WriteLine("MaxSequenceMass\t" + MaxSequenceMass);
                    writer.WriteLine("MaxDynamicModificationsPerSequence\t" + _maxNumDynModsPerSequence);
                    foreach (var searchMod in _searchModifications)
                    {
                        writer.WriteLine("Modification\t" + searchMod);
                    }
                }
            }
        }
Esempio n. 21
0
        public void Write()
        {
            var outputFilePath = Path.Combine(OutputDir, Path.GetFileNameWithoutExtension(SpecFilePath) + ParameterFileExtension);

            using (var writer = new StreamWriter(outputFilePath))
            {
                writer.WriteLine("SpecFile\t" + Path.GetFileName(SpecFilePath));
                writer.WriteLine("DatabaseFile\t" + Path.GetFileName(DatabaseFilePath));
                writer.WriteLine("FeatureFile\t{0}", !string.IsNullOrWhiteSpace(FeatureFilePath) ? Path.GetFileName(FeatureFilePath) : Path.GetFileName(MassSpecDataReaderFactory.ChangeExtension(SpecFilePath, ".ms1ft")));
                writer.WriteLine("InternalCleavageMode\t" + InternalCleavageMode);
                writer.WriteLine("Tag-based search\t" + TagBasedSearch);
                writer.WriteLine("Tda\t" + (TargetDecoySearchMode == DatabaseSearchMode.Both ? "Target+Decoy" : TargetDecoySearchMode.ToString()));
                writer.WriteLine("PrecursorIonTolerancePpm\t" + PrecursorIonTolerancePpm);
                writer.WriteLine("ProductIonTolerancePpm\t" + ProductIonTolerancePpm);
                writer.WriteLine("MinSequenceLength\t" + MinSequenceLength);
                writer.WriteLine("MaxSequenceLength\t" + MaxSequenceLength);
                writer.WriteLine("MinPrecursorIonCharge\t" + MinPrecursorIonCharge);
                writer.WriteLine("MaxPrecursorIonCharge\t" + MaxPrecursorIonCharge);
                writer.WriteLine("MinProductIonCharge\t" + MinProductIonCharge);
                writer.WriteLine("MaxProductIonCharge\t" + MaxProductIonCharge);
                writer.WriteLine("MinSequenceMass\t" + MinSequenceMass);
                writer.WriteLine("MaxSequenceMass\t" + MaxSequenceMass);
                writer.WriteLine("MaxDynamicModificationsPerSequence\t" + MaxDynamicModificationsPerSequence);
                foreach (var searchMod in Modifications)
                {
                    writer.WriteLine("Modification\t" + searchMod);
                }
            }
        }
Esempio n. 22
0
        public void TestReadingPbfFile()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var pbfFilePath = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"TopDown\ProductionQCShew\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.pbf");

            if (!File.Exists(pbfFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, pbfFilePath);
            }

            var pbfRun = new PbfLcMsRun(pbfFilePath);

            var checksum = pbfRun.PbfFileChecksum;

            var specFilePath = Path.ChangeExtension(pbfFilePath, "raw");

            if (!File.Exists(specFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath);
            }

            Console.WriteLine(@"Loading .pbf into memory");

            //var run = InMemoryLcMsRun.GetLcMsRun(specFilePath);
            var run = new InMemoryLcMsRun(MassSpecDataReaderFactory.GetMassSpecDataReader(specFilePath), 0, 0);

            Console.WriteLine(@"Comparing spectra between .pbf and in-memory spectra");

            // spectrum comparison
            //for (var scanNum = run.MinLcScan; scanNum <= run.MaxLcScan; scanNum++)
            foreach (var scanNum in run.AllScanNumbers)
            {
                var spec1 = run.GetSpectrum(scanNum);
                var spec2 = pbfRun.GetSpectrum(scanNum);

                Assert.IsTrue(spec1.Peaks.Length == spec2.Peaks.Length);
                for (var i = 0; i < spec1.Peaks.Length; i++)
                {
                    var p1 = spec1.Peaks[i];
                    var p2 = spec2.Peaks[i];

                    Assert.True(p1.Equals(p2));

                    Assert.True(Math.Abs(p1.Mz - p2.Mz) < 1e-8);
                    Assert.True(Math.Abs(p1.Intensity - p2.Intensity) < 0.001);
                }
            }

            Console.WriteLine(@"Comparing XICs");
            // chromatogram comparison
            const double targetMz  = 655.01;
            var          tolerance = new Tolerance(10);
            var          xic1      = run.GetFullPrecursorIonExtractedIonChromatogram(targetMz, tolerance);
            var          xic2      = pbfRun.GetFullPrecursorIonExtractedIonChromatogram(targetMz, tolerance);

            Assert.True(xic1.Count == xic2.Count);

            for (var i = 0; i < xic1.Count; i++)
            {
                if (!xic1[i].Equals(xic2[i]))
                {
                    Console.WriteLine(@"{0} {1} {2}", i, xic1[i], xic2[i]);
                }
                Assert.True(xic1[i].Equals(xic2[i]));
            }
            Console.WriteLine(@"Done");
        }
Esempio n. 23
0
        public void TestTagMatching()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            // Parse sequence tags
            const string dataSet      = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3";
            const int    minTagLength = 8;
            var          tagFileName  = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".seqtag");

            if (!File.Exists(tagFileName))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFileName);
            }

            var tagParser = new SequenceTagParser(tagFileName, minTagLength);

            // Parse raw file
            var rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw");

            if (!File.Exists(rawFileName))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFileName);
            }

            var run = PbfLcMsRun.GetLcMsRun(rawFileName);

            // Parse ID file
            const string resultFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V092\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv";

            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var          resultParser    = new MsPathFinderParser(resultFilePath);
            const double qValueThreshold = 0.01;
            var          idList          = resultParser.GetIdWithQValuesNoLargerThan(qValueThreshold);
            var          idFlag          = new bool[run.MaxLcScan + 1];

            foreach (var id in idList)
            {
                idFlag[id.Scan] = true;
            }

            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta";

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

//            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.icsfldecoy.fasta";
            var fastaDb      = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);

            var numMs2Spectra                = 0;
            var numSpectraWithTag            = 0;
            var numSpectraWithMatchingTag    = 0;
            var numSpectraWithMatchedTagNoId = 0;

            foreach (var ms2ScanNum in run.GetScanNumbers(2))
            {
                ++numMs2Spectra;
                var tags = tagParser.GetSequenceTags(ms2ScanNum);
                if (tags != null)
                {
                    ++numSpectraWithTag;
                    foreach (var tag in tags)
                    {
                        if (searchableDb.Search(tag.Sequence) >= 0)
                        {
                            //Console.WriteLine(tag.Sequence);
                            ++numSpectraWithMatchingTag;
                            if (!idFlag[ms2ScanNum])
                            {
                                ++numSpectraWithMatchedTagNoId;
                            }
                            break;
                        }
                    }
                }
            }
            Console.WriteLine("Tag length: {0}", minTagLength);
            Console.WriteLine("NumMs2Spectra: {0}", numMs2Spectra);
            Console.WriteLine("NumMs2SpectraWithTags: {0} ({1})", numSpectraWithTag, numSpectraWithTag / (float)numMs2Spectra);
            Console.WriteLine("NumMs2SpectraWithMatchingTags: {0} ({1})", numSpectraWithMatchingTag, numSpectraWithMatchingTag / (float)numMs2Spectra);
            Console.WriteLine("NumMs2SpectraWithMatchingTagsWithNoId: {0} ({1})", numSpectraWithMatchedTagNoId, numSpectraWithMatchedTagNoId / (float)numMs2Spectra);
        }
Esempio n. 24
0
        public static int Main(string[] args)
        {
            string specFilePath;
            string outputDir;

            try
            {
                var handle = Process.GetCurrentProcess().MainWindowHandle;
                SetConsoleMode(handle, EnableExtendedFlags);

                if (args.Length == 0)
                {
                    PrintUsageInfo();
                    return(-1);
                }

                var paramDic = new Dictionary <string, string>
                {
                    { "-s", null },
                    { "-o", null }
                };

                for (var i = 0; i < args.Length / 2; i++)
                {
                    var key   = args[2 * i];
                    var value = args[2 * i + 1];
                    if (!paramDic.ContainsKey(key))
                    {
                        PrintUsageInfo("Invalid parameter: " + key);
                        return(-1);
                    }
                    paramDic[key] = value;
                }

                // Parse command line parameters
                specFilePath = paramDic["-s"];

                if (specFilePath == null)
                {
                    PrintUsageInfo("Missing required parameter -s!");
                    return(-1);
                }

                // Check for folder-type datasets, and replace specFilePath with the directory name if it is.
                specFilePath = MassSpecDataReaderFactory.GetDatasetName(specFilePath);

                var isDirectoryDataset = MassSpecDataReaderFactory.IsADirectoryDataset(specFilePath);
                // True if specFilePath is a directory that is NOT a supported folder-type dataset.
                var specPathIsDirectory = Directory.Exists(specFilePath) && !isDirectoryDataset;

                if (!File.Exists(specFilePath) && !specPathIsDirectory && !isDirectoryDataset)
                {
                    PrintUsageInfo("File not found: " + specFilePath);
                    return(-1);
                }

                var types = MassSpecDataReaderFactory.MassSpecDataTypeFilterList;
                types.Remove(".pbf");

                if (!specPathIsDirectory && !(types.Select(ext => specFilePath.ToLower().EndsWith(ext)).Any()))
                {
                    PrintUsageInfo("Invalid file extension: (" + Path.GetExtension(specFilePath) + ") " + specFilePath);
                    return(-1);
                }

                // Must use "Path.GetFullPath" to return the absolute path when the source file is in the working directory
                // But, it could cause problems with too-long paths.
                outputDir = paramDic["-o"] ?? (specPathIsDirectory ? specFilePath : Path.GetDirectoryName(Path.GetFullPath(specFilePath)));
                if (outputDir == null)
                {
                    PrintUsageInfo("Invalid output file directory: " + specFilePath);
                    return(-1);
                }

                if (!Directory.Exists(outputDir))
                {
                    if (File.Exists(outputDir) && !File.GetAttributes(outputDir).HasFlag(FileAttributes.Directory))
                    {
                        PrintUsageInfo("OutputDir " + outputDir + " is not a directory!");
                        return(-1);
                    }
                    Directory.CreateDirectory(outputDir);
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine("Exception while parsing the command line parameters: " + ex.Message);
                return(-5);
            }

#if (!DEBUG)
            try
            {
#endif
            string[] specFilePaths = new[] { specFilePath };
            if (Directory.Exists(specFilePath) && !MassSpecDataReaderFactory.IsADirectoryDataset(specFilePath))
            {
                specFilePaths = Directory.GetFiles(specFilePath, "*.raw");
            }

            foreach (var rawFilePath in specFilePaths)
            {
                var pbfFilePath = Path.Combine(outputDir, Path.GetFileNameWithoutExtension(rawFilePath) + PbfLcMsRun.FileExtension);

                bool isCurrent;
                if (File.Exists(pbfFilePath) && PbfLcMsRun.CheckFileFormatVersion(pbfFilePath, out isCurrent) && isCurrent)
                {
                    Console.WriteLine("{0} already exists.", pbfFilePath);
                    continue;
                }

                Console.WriteLine("Creating {0} from {1}", pbfFilePath, rawFilePath);
                IMassSpecDataReader reader = MassSpecDataReaderFactory.GetMassSpecDataReader(rawFilePath);
                var progress = new Progress <ProgressData>(p =>
                {
                    p.UpdateFrequencySeconds = 2;
                    if ((p.Percent % 25).Equals(0) || p.ShouldUpdate())
                    {
                        Console.Write("\r{0}, {1:00.0}% complete                        ", p.Status, p.Percent);
                    }
                });
                var run = new PbfLcMsRun(rawFilePath, reader, pbfFilePath, 0, 0, progress);
                Console.WriteLine();
            }


            Console.WriteLine("PbfFormatVersion: {0}", PbfLcMsRun.FileFormatId);
            return(0);

#if (!DEBUG)
        }

        catch (Exception ex)
        {
            // NOTE: The DMS Analysis Manager looks for this text; do not change it
            Console.WriteLine("Exception while processing: " + ex.Message);
            Console.WriteLine(ex.StackTrace);
            var errorCode = -Math.Abs(ex.Message.GetHashCode());
            if (errorCode == 0)
            {
                return(-1);
            }
            else
            {
                return(errorCode);
            }
        }
#endif
        }
Esempio n. 25
0
        public void TestFeatureId()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3";

            if (!File.Exists(dataSet))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dataSet);
            }

            // Feature: 5236-5286	6-12	8480.3681	5
            const int    minScanNum  = 5236;
            const int    maxScanNum  = 5286;
            const double featureMass = 8480.3681;

            //const int minScanNum = 7251;
            //const int maxScanNum = 7326;
            //const double featureMass = 32347.18;

//            const int minScanNum = 4451;
//            const int maxScanNum = 4541;
//            const double featureMass = 31267.95;

            var tolerance        = new Tolerance(10);
            var relaxedTolerance = new Tolerance(20);

            const int minTagLength       = 5;
            const int minMergedTagLength = 7;
            const int minNumTagMatches   = 1;

            var rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw");
            var run         = PbfLcMsRun.GetLcMsRun(rawFileName);

            var aminoAcidSet    = AminoAcidSet.GetStandardAminoAcidSet();
            var featureFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".ms1ft");
            var filter          = new Ms1FtFilter(run, tolerance, featureFileName);
            var ms2ScanNums     =
                filter.GetMatchingMs2ScanNums(featureMass)
                .Where(scanNum => scanNum > minScanNum && scanNum < maxScanNum)
                .ToArray();

            const string tagFileName   = dataSet + ".seqtag"; //"_MinLength3.seqtag"; //Path.ChangeExtension(dataSet, ".seqtag");
            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta";

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb      = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);
            var tagParser    = new SequenceTagParser(tagFileName, minTagLength);

            var proteinsToTags = new Dictionary <string, IList <MatchedTag> >();

            foreach (var ms2ScanNum in ms2ScanNums)
            {
                var tags = tagParser.GetSequenceTags(ms2ScanNum);
                foreach (var tag in tags)
                {
                    var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray();
                    foreach (var index in matchedIndices)
                    {
                        var protein    = fastaDb.GetProteinName(index);
                        var startIndex = fastaDb.GetZeroBasedPositionInProtein(index);
                        var matchedTag = new MatchedTag(tag, startIndex, featureMass);
                        IList <MatchedTag> existingTags;
                        if (proteinsToTags.TryGetValue(protein, out existingTags))
                        {
                            existingTags.Add(matchedTag);
                        }
                        else
                        {
                            proteinsToTags.Add(protein, new List <MatchedTag> {
                                matchedTag
                            });
                        }
                    }
                }
            }

            foreach (var entry in proteinsToTags.OrderByDescending(e => e.Value.Count))
            {
                if (entry.Value.Count < minNumTagMatches)
                {
                    break;
                }
                var proteinName     = entry.Key;
                var proteinSequence = fastaDb.GetProteinSequence(proteinName);
                var protein         = new Sequence(proteinSequence, aminoAcidSet);
                Console.WriteLine(proteinName + "\t" + entry.Value.Count);

                var matchedTagSet = new MatchedTagSet(proteinSequence, aminoAcidSet,
                                                      tolerance, relaxedTolerance);

                Console.WriteLine("********** Before merging");
                foreach (var matchedTag in entry.Value)
                {
                    var seq = proteinSequence.Substring(matchedTag.StartIndex,
                                                        matchedTag.EndIndex - matchedTag.StartIndex);
                    var nTermMass = protein.GetMass(0, matchedTag.StartIndex);
                    var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count);
                    Console.WriteLine("\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}",
                                      (matchedTag.NTermFlankingMass - nTermMass), seq, (matchedTag.CTermFlankingMass - cTermMass), matchedTag.StartIndex,
                                      matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable);

                    matchedTagSet.Add(matchedTag);
                }

                Console.WriteLine("********** After merging");
                foreach (var matchedTag in matchedTagSet.Tags)
                {
                    if (matchedTag.Length < minMergedTagLength)
                    {
                        continue;
                    }
                    var seq = proteinSequence.Substring(matchedTag.StartIndex,
                                                        matchedTag.EndIndex - matchedTag.StartIndex);
                    var nTermMass = protein.GetMass(0, matchedTag.StartIndex);
                    var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count);
                    Console.WriteLine("\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}",
                                      (matchedTag.NTermFlankingMass - nTermMass), seq, (matchedTag.CTermFlankingMass - cTermMass), matchedTag.StartIndex,
                                      matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable);
                }

                break;
            }
        }
Esempio n. 26
0
        /// <summary>
        /// Find features in the data file
        /// </summary>
        /// <param name="rawFile">Data file (either a pbf file or a file type from which a pbf file can be auto-created)</param>
        /// <returns>0 if success; negative number on error</returns>
        private int ProcessFile(string rawFile)
        {
            var outDirectory = GetOutputDirectory(rawFile);

            if (string.IsNullOrEmpty(outDirectory))
            {
                return(-1);
            }

            var baseName            = Path.GetFileName(MassSpecDataReaderFactory.RemoveExtension(rawFile));
            var ms1FeaturesFilePath = Path.Combine(outDirectory, baseName + "." + FileExtension);
            var outCsvFilePath      = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".csv");
            var pngFilePath         = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".png");

            if (File.Exists(ms1FeaturesFilePath))
            {
                ShowErrorMessage("ProMex output already exists: " + ms1FeaturesFilePath);
                return(-2);
            }

            if (!File.Exists(rawFile))
            {
                ShowErrorMessage("Cannot find input file: " + rawFile);
                return(-3);
            }

            var stopwatch = Stopwatch.StartNew();

            Console.WriteLine("Start loading MS1 data from {0}", rawFile);
            var run = PbfLcMsRun.GetLcMsRun(rawFile);

            var featureFinder = new LcMsPeakMatrix(run, _likelihoodScorer, 1, 60, Parameters.MaxThreads);

            Console.WriteLine("Complete loading MS1 data. Elapsed Time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);

            if (run.GetMs1ScanVector().Length == 0)
            {
                ShowErrorMessage(@"Data file has no MS1 spectra: " + Path.GetFileName(rawFile));
                return(-4);
            }

            if (featureFinder.Ms1PeakCount == 0)
            {
                ShowErrorMessage(@"Data file has no MS1 peaks: " + Path.GetFileName(rawFile));
                return(-5);
            }

            var    comparer         = featureFinder.Comparer;
            var    container        = new LcMsFeatureContainer(featureFinder.Ms1Spectra, _likelihoodScorer, new LcMsFeatureMergeComparer(new Tolerance(10)));
            var    minSearchMassBin = comparer.GetBinNumber(Parameters.MinSearchMass);
            var    maxSearchMassBin = comparer.GetBinNumber(Parameters.MaxSearchMass);
            double totalMassBin     = maxSearchMassBin - minSearchMassBin + 1;

            Console.WriteLine("Start MS1 feature extraction.");
            stopwatch.Restart();
            for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++)
            {
                var clusters = featureFinder.FindFeatures(binNum);
                container.Add(clusters);

                if (binNum > minSearchMassBin && (binNum - minSearchMassBin) % 1000 == 0)
                {
                    var elapsed             = (stopwatch.ElapsedMilliseconds) / 1000.0d;
                    var processedBins       = binNum - minSearchMassBin;
                    var processedPercentage = processedBins / totalMassBin * 100;
                    Console.WriteLine("Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}",
                                      processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed,
                                      container.NumberOfFeatures);
                }
            }

            Console.WriteLine("Complete MS1 feature extraction.");
            Console.WriteLine(" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            Console.WriteLine(" - Number of extracted features = {0}", container.NumberOfFeatures);
            Console.WriteLine("Start selecting mutually independent features from feature network graph");
            stopwatch.Restart();

            var featureId = FilterAndOutputFeatures(container, featureFinder, outCsvFilePath, ms1FeaturesFilePath);

            Console.WriteLine("Complete feature filtration");
            Console.WriteLine(" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            Console.WriteLine(" - Number of filtered features = {0}", featureId);
            Console.WriteLine(" - ProMex output: {0}", ms1FeaturesFilePath);

            if (Parameters.CsvOutput)
            {
                Console.WriteLine(" - ProMex output in ICR2LS format: {0}", outCsvFilePath);
            }

            if (Parameters.FeatureMapImage)
            {
                CreateFeatureMapImage(run, ms1FeaturesFilePath, pngFilePath);
            }

            return(0);
        }