Exemple #1
0
        public void TestTagBasedSearchCompRef()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string dataSetPath   = @"D:\MassSpecFiles\CompRef";
            const string fastaFilePath = @"D:\MassSpecFiles\CompRef\ID_003278_4B4B3CB1.fasta";
            const string modsFilePath  = @"D:\MassSpecFiles\CompRef\Mods.txt";

            if (!Directory.Exists(dataSetPath))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, dataSetPath);
            }
            if (!File.Exists(modsFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, modsFilePath);
            }
            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fileEntries = Directory.GetFiles(dataSetPath);

            var dataset = (from fileName in fileEntries where fileName.EndsWith("pbf") select Path.GetFileNameWithoutExtension(fileName)).ToList();

            dataset.Sort();


            var fastaDb   = new FastaDatabase(fastaFilePath);
            var tolerance = new Tolerance(10);
            var aaSet     = new AminoAcidSet(modsFilePath);

            for (var i = 0; i < dataset.Count; i++)
            {
                var rawFile     = string.Format(@"{0}\{1}.pbf", dataSetPath, dataset[i]);
                var ms1File     = string.Format(@"{0}\{1}.ms1ft", dataSetPath, dataset[i]);
                var tagFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFile, ".seqtag");

                var       run          = PbfLcMsRun.GetLcMsRun(rawFile);
                const int minTagLength = 5;
                //var tagParser = new SequenceTagParser(tagFilePath, minTagLength, 100);

                Console.WriteLine("-----------------{0}--------------------", rawFile);

                TestTagBasedSearch(run, fastaDb, tolerance, aaSet);

                Console.WriteLine("-----------------------------------------------------------------------");
            }
        }
Exemple #2
0
        public void TestTagBasedSearch()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

//            const string rawFilePath = @"H:\Research\Lewy\raw\Lewy_intact_01.raw";
//            const string rawFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw";
//            const string rawFilePath = @"H:\Research\Yufeng\TopDownYufeng\raw\yufeng_column_test2.raw";
//            const string rawFilePath = @"H:\Research\Weijun_TopDown\raw\UC4_Intact_plasmaTest_90_6May15_Bane_14-09-01RZ.raw";
//            const string rawFilePath = @"H:\Research\Charles\TopDown\raw\SBEP_STM_001_02272012_Aragon.raw";
            const string rawFilePath = @"D:\MassSpecFiles\60k\Yufeng_SampleTest1_150614113438.pbf";

            //const string rawFilePath = @"D:\MassSpecFiles\60k\NCR_50K_Test_24Jun15_Bane_15-02-02RZ.pbf";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = PbfLcMsRun.GetLcMsRun(rawFilePath);

            //const int minTagLength = 5;
            var tagFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFilePath, ".seqtag");
            //var tagParser = new SequenceTagParser(tagFilePath, minTagLength, 100);

            const string fastaFilePath = @"D:\MassSpecFiles\60k\ID_003836_DA9CC1E4.fasta";

            //const string fastaFilePath = @"D:\MassSpecFiles\60k\ID_004973_9BA6912F.fasta";
            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb = new FastaDatabase(fastaFilePath);

            var tolerance = new Tolerance(10);
//            var modsFilePath = @"H:\Research\QCShew_TopDown\Production\Mods_Methyl.txt";
            var modsFilePath = @"D:\MassSpecFiles\60k\Mods.txt";

//            var modsFilePath = "";

            if (!File.Exists(modsFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, modsFilePath);
            }

            var aaSet = new AminoAcidSet(modsFilePath);

            TestTagBasedSearch(run, fastaDb, tolerance, aaSet);
        }
Exemple #3
0
        public void TestTagBasedSearchForLewy()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const string rawFilePath = @"D:\MassSpecFiles\Lewy\Lewy_AT_AD1_21May15_Bane_14-09-01RZ.pbf";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = PbfLcMsRun.GetLcMsRun(rawFilePath);

            //const int minTagLength = 4;
            var tagFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFilePath, ".seqtag");
            //var tagParser = new SequenceTagParser(tagFilePath, minTagLength, 10000);

            const string fastaFilePath = @"D:\MassSpecFiles\Lewy\a4_human.fasta";

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb = new FastaDatabase(fastaFilePath);

            var tolerance    = new Tolerance(10);
            var modsFilePath = @"D:\MassSpecFiles\Lewy\Mods.txt";

            if (!File.Exists(modsFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, modsFilePath);
            }

            var aaSet = new AminoAcidSet(modsFilePath);

            TestTagBasedSearch(run, fastaDb, tolerance, aaSet);
        }
        public void TestTagMatchingSingleSpec()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3";
            const int    scanNum = 4533;

            // Parse sequence tags
            var          tagFileName   = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".seqtag");
            const int    minTagLength  = 8;
            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta";

            if (!File.Exists(tagFileName))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFileName);
            }

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb      = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);
            var tagParser    = new SequenceTagParser(tagFileName, minTagLength);

            var tags = tagParser.GetSequenceTags(scanNum);

            foreach (var tag in tags)
            {
                var matchedProteins = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence)
                                      .Select(index => fastaDb.GetProteinName(index)).ToArray();
                if (matchedProteins.Any())
                {
                    Console.WriteLine("{0}\t{1}\t{2}\t{3}", tag.Sequence, tag.IsPrefix, tag.FlankingMass, string.Join("\t", matchedProteins));
                }
            }
        }
 private bool HasExistingPbfFile(string path)
 {
     return(File.Exists(MassSpecDataReaderFactory.ChangeExtension(path, ".pbf")));
 }
        public void TestTagMatching()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            // Parse sequence tags
            const string dataSet      = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3";
            const int    minTagLength = 8;
            var          tagFileName  = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".seqtag");

            if (!File.Exists(tagFileName))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFileName);
            }

            var tagParser = new SequenceTagParser(tagFileName, minTagLength);

            // Parse raw file
            var rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw");

            if (!File.Exists(rawFileName))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFileName);
            }

            var run = PbfLcMsRun.GetLcMsRun(rawFileName);

            // Parse ID file
            const string resultFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V092\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv";

            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var          resultParser    = new MsPathFinderParser(resultFilePath);
            const double qValueThreshold = 0.01;
            var          idList          = resultParser.GetIdWithQValuesNoLargerThan(qValueThreshold);
            var          idFlag          = new bool[run.MaxLcScan + 1];

            foreach (var id in idList)
            {
                idFlag[id.Scan] = true;
            }

            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta";

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

//            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.icsfldecoy.fasta";
            var fastaDb      = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);

            var numMs2Spectra                = 0;
            var numSpectraWithTag            = 0;
            var numSpectraWithMatchingTag    = 0;
            var numSpectraWithMatchedTagNoId = 0;

            foreach (var ms2ScanNum in run.GetScanNumbers(2))
            {
                ++numMs2Spectra;
                var tags = tagParser.GetSequenceTags(ms2ScanNum);
                if (tags != null)
                {
                    ++numSpectraWithTag;
                    foreach (var tag in tags)
                    {
                        if (searchableDb.Search(tag.Sequence) >= 0)
                        {
                            //Console.WriteLine(tag.Sequence);
                            ++numSpectraWithMatchingTag;
                            if (!idFlag[ms2ScanNum])
                            {
                                ++numSpectraWithMatchedTagNoId;
                            }
                            break;
                        }
                    }
                }
            }
            Console.WriteLine("Tag length: {0}", minTagLength);
            Console.WriteLine("NumMs2Spectra: {0}", numMs2Spectra);
            Console.WriteLine("NumMs2SpectraWithTags: {0} ({1})", numSpectraWithTag, numSpectraWithTag / (float)numMs2Spectra);
            Console.WriteLine("NumMs2SpectraWithMatchingTags: {0} ({1})", numSpectraWithMatchingTag, numSpectraWithMatchingTag / (float)numMs2Spectra);
            Console.WriteLine("NumMs2SpectraWithMatchingTagsWithNoId: {0} ({1})", numSpectraWithMatchedTagNoId, numSpectraWithMatchedTagNoId / (float)numMs2Spectra);
        }
        public void TestFeatureIdMatching()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string resultFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V092\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv";

            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var          resultParser    = new MsPathFinderParser(resultFilePath);
            const double qValueThreshold = 0.01;
            const double tolerancePpm    = 13;

            const string dataSet     = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3";
            var          rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw");

            if (!File.Exists(rawFileName))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFileName);
            }

            var run = PbfLcMsRun.GetLcMsRun(rawFileName);

            var idList =
                resultParser.GetIdList().TakeWhile(id => id.QValue <= qValueThreshold).OrderBy(id => id.Mass).ToList();
            var idMassList = idList.Select(id => id.Mass).ToList();
            var idFlag     = new bool[idList.Count];


            // Parse sequence tags
            var       tagFileName    = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".seqtag");
            const int minTagLength   = 6;
            const int numProtMatches = 4;
//            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta";
            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.icsfldecoy.fasta";

            if (!File.Exists(tagFileName))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFileName);
            }

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb      = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);

            var tagParser = new SequenceTagParser(tagFileName, minTagLength);

            var featureFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".ms1ft");
            var featureParser   = new TsvFileParser(featureFileName);

            var minScan   = featureParser.GetData("MinScan").Select(s => Convert.ToInt32(s)).ToArray();
            var maxScan   = featureParser.GetData("MaxScan").Select(s => Convert.ToInt32(s)).ToArray();
            var minCharge = featureParser.GetData("MinCharge").Select(s => Convert.ToInt32(s)).ToArray();
            var maxCharge = featureParser.GetData("MaxCharge").Select(s => Convert.ToInt32(s)).ToArray();
            var monoMass  = featureParser.GetData("MonoMass").Select(Convert.ToDouble).ToArray();

            var numFeaturesWithId                    = 0;
            var numFeaturesWithMs2                   = 0;
            var numFeaturesWithTags                  = 0;
            var numFeaturesWithMatchingTags          = 0;
            var numFeaturesWithTwoOrMoreMatchingTags = 0;
            var numFeaturesWithNoIdAndMatchingTags   = 0;

            for (var i = 0; i < featureParser.NumData; i++)
            {
                var mass = monoMass[i];

                // Find Id
                var tolDa   = new Tolerance(tolerancePpm).GetToleranceAsDa(mass, 1);
                var minMass = mass - tolDa;
                var maxMass = mass + tolDa;
                var index   = idMassList.BinarySearch(mass);
                if (index < 0)
                {
                    index = ~index;
                }

                var matchedId = new List <MsPathFinderId>();
                // go down
                var curIndex = index - 1;
                while (curIndex >= 0)
                {
                    var curId = idList[curIndex];
                    if (curId.Mass < minMass)
                    {
                        break;
                    }
                    if (curId.Scan > minScan[i] && curId.Scan < maxScan[i] &&
                        curId.Charge >= minCharge[i] && curId.Charge <= maxCharge[i])
                    {
                        matchedId.Add(curId);
                        idFlag[curIndex] = true;
                    }
                    --curIndex;
                }

                // go up
                curIndex = index;
                while (curIndex < idList.Count)
                {
                    var curId = idList[curIndex];
                    if (curId.Mass > maxMass)
                    {
                        break;
                    }
                    if (curId.Scan >= minScan[i] && curId.Scan <= maxScan[i] &&
                        curId.Charge >= minCharge[i] && curId.Charge <= maxCharge[i])
                    {
                        matchedId.Add(curId);
                        idFlag[curIndex] = true;
                    }
                    ++curIndex;
                }

                var hasId = false;
                if (matchedId.Any())
                {
                    ++numFeaturesWithId;
                    hasId = true;
                }

                // Find MS2 scans
//                var numMs2Scans = 0;
                var tags   = new List <SequenceTag>();
                var hasMs2 = false;
                for (var scanNum = minScan[i]; scanNum <= maxScan[i]; scanNum++)
                {
                    var isolationWindow = run.GetIsolationWindow(scanNum);
                    if (isolationWindow == null)
                    {
                        continue;
                    }
                    var isolationWindowTargetMz = isolationWindow.IsolationWindowTargetMz;
                    var charge = (int)Math.Round(mass / isolationWindowTargetMz);
                    if (charge < minCharge[i] || charge > maxCharge[i])
                    {
                        continue;
                    }
                    var mz = Ion.GetIsotopeMz(mass, charge,
                                              Averagine.GetIsotopomerEnvelope(mass).MostAbundantIsotopeIndex);
                    if (isolationWindow.Contains(mz))
                    {
//                        ++numMs2Scans;
                        tags.AddRange(tagParser.GetSequenceTags(scanNum));
                        hasMs2 = true;
                    }
                }
                if (hasMs2)
                {
                    ++numFeaturesWithMs2;
                }
                if (tags.Any())
                {
                    ++numFeaturesWithTags;
                }
                var protHist      = new Dictionary <string, int>();
                var hasMatchedTag = false;
                foreach (var tag in tags)
                {
                    var matchedProteins = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).Select(idx => fastaDb.GetProteinName(idx)).ToArray();
                    if (matchedProteins.Any())
                    {
                        hasMatchedTag = true;
                        foreach (var protein in matchedProteins)
                        {
                            int num;
                            if (protHist.TryGetValue(protein, out num))
                            {
                                protHist[protein] = num + 1;
                            }
                            else
                            {
                                protHist[protein] = 1;
                            }
                        }
                    }
                }
                if (hasMatchedTag)
                {
                    ++numFeaturesWithMatchingTags;
                    if (!hasId)
                    {
                        ++numFeaturesWithNoIdAndMatchingTags;
                    }
                }
                if (protHist.Any())
                {
                    var maxOcc = protHist.Values.Max();
                    if (maxOcc >= numProtMatches)
                    {
                        ++numFeaturesWithTwoOrMoreMatchingTags;
                    }
                }
            }

            Console.WriteLine("NumFeatures: {0}", featureParser.NumData);
            Console.WriteLine("NumId: {0}", idList.Count);
            Console.WriteLine("NumFeaturesWithId: {0} ({1})", numFeaturesWithId, numFeaturesWithId / (float)featureParser.NumData);
            Console.WriteLine("NumFeaturesWithMs2: {0} ({1})", numFeaturesWithMs2, numFeaturesWithMs2 / (float)featureParser.NumData);
            Console.WriteLine("NumFeaturesWithTag: {0} ({1})", numFeaturesWithTags, numFeaturesWithTags / (float)featureParser.NumData);
            Console.WriteLine("NumFeaturesWithMatchedTag: {0} ({1})", numFeaturesWithMatchingTags, numFeaturesWithMatchingTags / (float)featureParser.NumData);
            Console.WriteLine("NumFeaturesWithMoreThanOneMatchedTag: {0} ({1})", numFeaturesWithTwoOrMoreMatchingTags, numFeaturesWithTwoOrMoreMatchingTags / (float)featureParser.NumData);
            Console.WriteLine("NumFeaturesWithNoIdAndMatchedTag: {0} ({1})", numFeaturesWithNoIdAndMatchingTags, numFeaturesWithNoIdAndMatchingTags / (float)featureParser.NumData);

            for (var i = 0; i < idFlag.Length; i++)
            {
                if (!idFlag[i])
                {
                    Console.WriteLine(idList[i].Scan);
                }
            }


//            Console.WriteLine(string.Join(",", filter.GetMatchingMs2ScanNums(8115.973001)));
//
//            Console.WriteLine(featureFileName);
        }
        public void TestFeatureId()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3";

            if (!File.Exists(dataSet))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dataSet);
            }

            // Feature: 5236-5286	6-12	8480.3681	5
            const int    minScanNum  = 5236;
            const int    maxScanNum  = 5286;
            const double featureMass = 8480.3681;

            //const int minScanNum = 7251;
            //const int maxScanNum = 7326;
            //const double featureMass = 32347.18;

//            const int minScanNum = 4451;
//            const int maxScanNum = 4541;
//            const double featureMass = 31267.95;

            var tolerance        = new Tolerance(10);
            var relaxedTolerance = new Tolerance(20);

            const int minTagLength       = 5;
            const int minMergedTagLength = 7;
            const int minNumTagMatches   = 1;

            var rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw");
            var run         = PbfLcMsRun.GetLcMsRun(rawFileName);

            var aminoAcidSet    = AminoAcidSet.GetStandardAminoAcidSet();
            var featureFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".ms1ft");
            var filter          = new Ms1FtFilter(run, tolerance, featureFileName);
            var ms2ScanNums     =
                filter.GetMatchingMs2ScanNums(featureMass)
                .Where(scanNum => scanNum > minScanNum && scanNum < maxScanNum)
                .ToArray();

            const string tagFileName   = dataSet + ".seqtag"; //"_MinLength3.seqtag"; //Path.ChangeExtension(dataSet, ".seqtag");
            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta";

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb      = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);
            var tagParser    = new SequenceTagParser(tagFileName, minTagLength);

            var proteinsToTags = new Dictionary <string, IList <MatchedTag> >();

            foreach (var ms2ScanNum in ms2ScanNums)
            {
                var tags = tagParser.GetSequenceTags(ms2ScanNum);
                foreach (var tag in tags)
                {
                    var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray();
                    foreach (var index in matchedIndices)
                    {
                        var protein    = fastaDb.GetProteinName(index);
                        var startIndex = fastaDb.GetZeroBasedPositionInProtein(index);
                        var matchedTag = new MatchedTag(tag, startIndex, featureMass);
                        IList <MatchedTag> existingTags;
                        if (proteinsToTags.TryGetValue(protein, out existingTags))
                        {
                            existingTags.Add(matchedTag);
                        }
                        else
                        {
                            proteinsToTags.Add(protein, new List <MatchedTag> {
                                matchedTag
                            });
                        }
                    }
                }
            }

            foreach (var entry in proteinsToTags.OrderByDescending(e => e.Value.Count))
            {
                if (entry.Value.Count < minNumTagMatches)
                {
                    break;
                }
                var proteinName     = entry.Key;
                var proteinSequence = fastaDb.GetProteinSequence(proteinName);
                var protein         = new Sequence(proteinSequence, aminoAcidSet);
                Console.WriteLine(proteinName + "\t" + entry.Value.Count);

                var matchedTagSet = new MatchedTagSet(proteinSequence, aminoAcidSet,
                                                      tolerance, relaxedTolerance);

                Console.WriteLine("********** Before merging");
                foreach (var matchedTag in entry.Value)
                {
                    var seq = proteinSequence.Substring(matchedTag.StartIndex,
                                                        matchedTag.EndIndex - matchedTag.StartIndex);
                    var nTermMass = protein.GetMass(0, matchedTag.StartIndex);
                    var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count);
                    Console.WriteLine("\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}",
                                      (matchedTag.NTermFlankingMass - nTermMass), seq, (matchedTag.CTermFlankingMass - cTermMass), matchedTag.StartIndex,
                                      matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable);

                    matchedTagSet.Add(matchedTag);
                }

                Console.WriteLine("********** After merging");
                foreach (var matchedTag in matchedTagSet.Tags)
                {
                    if (matchedTag.Length < minMergedTagLength)
                    {
                        continue;
                    }
                    var seq = proteinSequence.Substring(matchedTag.StartIndex,
                                                        matchedTag.EndIndex - matchedTag.StartIndex);
                    var nTermMass = protein.GetMass(0, matchedTag.StartIndex);
                    var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count);
                    Console.WriteLine("\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}",
                                      (matchedTag.NTermFlankingMass - nTermMass), seq, (matchedTag.CTermFlankingMass - cTermMass), matchedTag.StartIndex,
                                      matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable);
                }

                break;
            }
        }
Exemple #9
0
        public void Write()
        {
            var outputFilePath = Path.Combine(OutputDir, Path.GetFileNameWithoutExtension(SpecFilePath) + ParameterFileExtension);

            using (var writer = new StreamWriter(outputFilePath))
            {
                writer.WriteLine("SpecFile\t" + Path.GetFileName(SpecFilePath));
                writer.WriteLine("DatabaseFile\t" + Path.GetFileName(DatabaseFilePath));
                writer.WriteLine("FeatureFile\t{0}", !string.IsNullOrWhiteSpace(FeatureFilePath) ? Path.GetFileName(FeatureFilePath) : Path.GetFileName(MassSpecDataReaderFactory.ChangeExtension(SpecFilePath, ".ms1ft")));
                writer.WriteLine("InternalCleavageMode\t" + InternalCleavageMode);
                writer.WriteLine("Tag-based search\t" + TagBasedSearch);
                writer.WriteLine("Tda\t" + (TargetDecoySearchMode == DatabaseSearchMode.Both ? "Target+Decoy" : TargetDecoySearchMode.ToString()));
                writer.WriteLine("PrecursorIonTolerancePpm\t" + PrecursorIonTolerancePpm);
                writer.WriteLine("ProductIonTolerancePpm\t" + ProductIonTolerancePpm);
                writer.WriteLine("MinSequenceLength\t" + MinSequenceLength);
                writer.WriteLine("MaxSequenceLength\t" + MaxSequenceLength);
                writer.WriteLine("MinPrecursorIonCharge\t" + MinPrecursorIonCharge);
                writer.WriteLine("MaxPrecursorIonCharge\t" + MaxPrecursorIonCharge);
                writer.WriteLine("MinProductIonCharge\t" + MinProductIonCharge);
                writer.WriteLine("MaxProductIonCharge\t" + MaxProductIonCharge);
                writer.WriteLine("MinSequenceMass\t" + MinSequenceMass);
                writer.WriteLine("MaxSequenceMass\t" + MaxSequenceMass);
                writer.WriteLine("MaxDynamicModificationsPerSequence\t" + MaxDynamicModificationsPerSequence);
                foreach (var searchMod in Modifications)
                {
                    writer.WriteLine("Modification\t" + searchMod);
                }
            }
        }
Exemple #10
0
        public static int Main(string[] args)
        {
            PbfGenInputParameters options;

            try
            {
                var osVersionInfo = new clsOSVersionInfo();
                if (osVersionInfo.GetOSVersion().ToLower().Contains("windows"))
                {
                    var handle = Process.GetCurrentProcess().MainWindowHandle;
                    SetConsoleMode(handle, EnableExtendedFlags);
                }

                var exeName = System.Reflection.Assembly.GetEntryAssembly().GetName().Name;

                var parser = new CommandLineParser <PbfGenInputParameters>(exeName, Version);
                parser.UsageExamples.Add($"Using -start and -end to limit the scan range to include in the .pbf file\n\t{exeName}.exe -s Dataset.raw -start 2000 -end 3000");

                var results = parser.ParseArgs(args);

                if (!results.Success)
                {
                    // Wait for 1.5 seconds
                    System.Threading.Thread.Sleep(1500);

                    return(-1);
                }

                if (!results.ParsedResults.Validate())
                {
                    parser.PrintHelp();

                    // Wait for 1.5 seconds
                    System.Threading.Thread.Sleep(1500);

                    return(-1);
                }

                options = results.ParsedResults;
            }
            catch (Exception ex)
            {
                Console.WriteLine("Exception while parsing the command line parameters: " + ex.Message);
                return(-5);
            }

#if (!DEBUG)
            try
#endif
            {
                var specFilePaths = new[] { options.SourcePath };
                if (Directory.Exists(options.SourcePath) && !MassSpecDataReaderFactory.IsADirectoryDataset(options.SourcePath))
                {
                    specFilePaths = Directory.GetFiles(options.SourcePath, "*.raw"); // TODO: Support folders with other formats in them too...
                }

                foreach (var rawFilePath in specFilePaths)
                {
                    var pbfFileName = MassSpecDataReaderFactory.ChangeExtension(rawFilePath, PbfLcMsRun.FileExtensionConst);
                    var pbfFilePath = Path.Combine(options.OutputDir, Path.GetFileName(pbfFileName));

                    if (File.Exists(pbfFilePath) && PbfLcMsRun.CheckFileFormatVersion(pbfFilePath, out var isCurrent) && isCurrent)
                    {
                        Console.WriteLine("{0} already exists.", pbfFilePath);
                        continue;
                    }

                    Console.WriteLine("Creating {0} from {1}", pbfFilePath, rawFilePath);

                    if (options.StartScan > 0 && options.EndScan > 0)
                    {
                        Console.WriteLine("Only including scans {0} to {1}", options.StartScan, options.EndScan);
                    }
                    else if (options.StartScan > 0)
                    {
                        Console.WriteLine("Only including scans {0} to the end", options.StartScan);
                    }
                    else if (options.EndScan > 0)
                    {
                        Console.WriteLine("Only including scans 1 to {0}", options.EndScan);
                    }

                    var reader   = MassSpecDataReaderFactory.GetMassSpecDataReader(rawFilePath);
                    var progress = new Progress <ProgressData>(p =>
                    {
                        p.UpdateFrequencySeconds = 2;
                        if ((p.Percent % 25).Equals(0) || p.ShouldUpdate())
                        {
                            Console.Write("\r{0}, {1:00.0}% complete                        ", p.Status, p.Percent);
                        }
                    });
                    var run = new PbfLcMsRun(rawFilePath, reader, pbfFilePath, 0, 0, progress, false, options.StartScan, options.EndScan);
                    Console.WriteLine();
                }

                Console.WriteLine("PbfFormatVersion: {0}", PbfLcMsRun.FileFormatId);
                return(0);
            }
#if (!DEBUG)
            catch (Exception ex)
            {
                // NOTE: The DMS Analysis Manager looks for this text; do not change it
                Console.WriteLine("Exception while processing: " + ex.Message);
                Console.WriteLine(ex.StackTrace);
                var errorCode = -Math.Abs(ex.Message.GetHashCode());
                if (errorCode == 0)
                {
                    return(-1);
                }
                else
                {
                    return(errorCode);
                }
            }
#endif
        }
        public void Write()
        {
            foreach (var specFilePath in SpecFilePaths)
            {
                var outputFilePath = Path.Combine(OutputDir, Path.GetFileNameWithoutExtension(specFilePath) + ParameterFileExtension);

                using (var writer = new StreamWriter(outputFilePath))
                {
                    writer.WriteLine("SpecFile\t" + Path.GetFileName(specFilePath));
                    writer.WriteLine("DatabaseFile\t" + Path.GetFileName(DatabaseFilePath));
                    writer.WriteLine("FeatureFile\t{0}", FeatureFilePath != null ? Path.GetFileName(FeatureFilePath) : Path.GetFileName(MassSpecDataReaderFactory.ChangeExtension(specFilePath, ".ms1ft")));
                    writer.WriteLine("SearchMode\t" + SearchModeInt);
                    writer.WriteLine("Tag-based search\t" + TagBasedSearch);
                    writer.WriteLine("Tda\t" + (TdaBool == null ? "Decoy" : (bool)TdaBool ? "Target+Decoy" : "Target"));
                    writer.WriteLine("PrecursorIonTolerancePpm\t" + PrecursorIonTolerancePpm);
                    writer.WriteLine("ProductIonTolerancePpm\t" + ProductIonTolerancePpm);
                    writer.WriteLine("MinSequenceLength\t" + MinSequenceLength);
                    writer.WriteLine("MaxSequenceLength\t" + MaxSequenceLength);
                    writer.WriteLine("MinPrecursorIonCharge\t" + MinPrecursorIonCharge);
                    writer.WriteLine("MaxPrecursorIonCharge\t" + MaxPrecursorIonCharge);
                    writer.WriteLine("MinProductIonCharge\t" + MinProductIonCharge);
                    writer.WriteLine("MaxProductIonCharge\t" + MaxProductIonCharge);
                    writer.WriteLine("MinSequenceMass\t" + MinSequenceMass);
                    writer.WriteLine("MaxSequenceMass\t" + MaxSequenceMass);
                    writer.WriteLine("MaxDynamicModificationsPerSequence\t" + _maxNumDynModsPerSequence);
                    foreach (var searchMod in _searchModifications)
                    {
                        writer.WriteLine("Modification\t" + searchMod);
                    }
                }
            }
        }
Exemple #12
0
        public bool RunSearch(double corrThreshold = 0.7, CancellationToken?cancellationToken = null, IProgress <ProgressData> progress = null)
        {
            // Get the Normalized spec file/folder path
            SpecFilePath = MassSpecDataReaderFactory.NormalizeDatasetPath(SpecFilePath);

            var prog     = new Progress <ProgressData>();
            var progData = new ProgressData(progress);

            if (progress != null)
            {
                prog = new Progress <ProgressData>(p =>
                {
                    progData.Status         = p.Status;
                    progData.StatusInternal = p.StatusInternal;
                    progData.Report(p.Percent);
                });
            }

            var sw    = new Stopwatch();
            var swAll = new Stopwatch();

            swAll.Start();
            ErrorMessage = string.Empty;

            Console.Write(@"Reading raw file...");
            progData.Status = "Reading spectra file";
            progData.StepRange(10.0);
            sw.Start();

            _run = PbfLcMsRun.GetLcMsRun(SpecFilePath, 0, 0, prog);

            _ms2ScanNums             = _run.GetScanNumbers(2).ToArray();
            _isolationWindowTargetMz = new double[_run.MaxLcScan + 1];
            foreach (var ms2Scan in _ms2ScanNums)
            {
                var ms2Spec = _run.GetSpectrum(ms2Scan) as ProductSpectrum;
                if (ms2Spec == null)
                {
                    continue;
                }
                _isolationWindowTargetMz[ms2Scan] = ms2Spec.IsolationWindow.IsolationWindowTargetMz;
            }


            sw.Stop();
            Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

            progData.StepRange(20.0);
            ISequenceFilter ms1Filter;

            if (this.ScanNumbers != null && this.ScanNumbers.Any())
            {
                ms1Filter = new SelectedMsMsFilter(this.ScanNumbers);
            }
            else if (string.IsNullOrWhiteSpace(FeatureFilePath))
            {
                // Checks whether SpecFileName.ms1ft exists
                var ms1FtFilePath = MassSpecDataReaderFactory.ChangeExtension(SpecFilePath, LcMsFeatureFinderLauncher.FileExtension);
                if (!File.Exists(ms1FtFilePath))
                {
                    Console.WriteLine(@"Running ProMex...");
                    sw.Start();
                    var param = new LcMsFeatureFinderInputParameter
                    {
                        InputPath                = SpecFilePath,
                        MinSearchMass            = MinSequenceMass,
                        MaxSearchMass            = MaxSequenceMass,
                        MinSearchCharge          = MinPrecursorIonCharge,
                        MaxSearchCharge          = MaxPrecursorIonCharge,
                        CsvOutput                = false,
                        ScoreReport              = false,
                        LikelihoodScoreThreshold = -10
                    };
                    var featureFinder = new LcMsFeatureFinderLauncher(param);
                    featureFinder.Run();
                }
                sw.Reset();
                sw.Start();
                Console.Write(@"Reading ProMex results...");
                ms1Filter = new Ms1FtFilter(_run, PrecursorIonTolerance, ms1FtFilePath, -10);
            }
            else
            {
                sw.Reset();
                sw.Start();
                var extension = Path.GetExtension(FeatureFilePath);
                if (extension.ToLower().Equals(".csv"))
                {
                    Console.Write(@"Reading ICR2LS/Decon2LS results...");
                    ms1Filter = new IsosFilter(_run, PrecursorIonTolerance, FeatureFilePath);
                }
                else if (extension.ToLower().Equals(".ms1ft"))
                {
                    Console.Write(@"Reading ProMex results...");
                    ms1Filter = new Ms1FtFilter(_run, PrecursorIonTolerance, FeatureFilePath, -10);
                }
                else if (extension.ToLower().Equals(".msalign"))
                {
                    Console.Write(@"Reading MS-Align+ results...");
                    ms1Filter = new MsDeconvFilter(_run, PrecursorIonTolerance, FeatureFilePath);
                }
                else
                {
                    ms1Filter = null;  //new Ms1FeatureMatrix(_run);
                }
            }

            sw.Stop();
            Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);


            // pre-generate deconvoluted spectra for scoring
            _massBinComparer = new FilteredProteinMassBinning(AminoAcidSet, MaxSequenceMass + 1000);

            _ms2ScorerFactory2 = new CompositeScorerFactory(_run, _massBinComparer, AminoAcidSet,
                                                            MinProductIonCharge, MaxProductIonCharge, ProductIonTolerance);
            sw.Reset();
            Console.WriteLine(@"Generating deconvoluted spectra for MS/MS spectra...");
            sw.Start();
            var pfeOptions = new ParallelOptions
            {
                MaxDegreeOfParallelism = MaxNumThreads,
                CancellationToken      = cancellationToken ?? CancellationToken.None
            };

            Parallel.ForEach(_ms2ScanNums, pfeOptions, ms2ScanNum =>
            {
                _ms2ScorerFactory2.DeconvonluteProductSpectrum(ms2ScanNum);
            });
            sw.Stop();
            Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

            progData.StepRange(10.0);
            progData.Status = "Reading Fasta File";

            // Target database
            var targetDb = new FastaDatabase(DatabaseFilePath);

            targetDb.Read();

            // Generate sequence tags for all MS/MS spectra
            if (TagBasedSearch)
            {
                progData.StepRange(25.0);
                progData.Status = "Generating Sequence Tags";
                sw.Reset();
                Console.WriteLine(@"Generating sequence tags for MS/MS spectra...");
                sw.Start();
                var seqTagGen = GetSequenceTagGenerator();
                _tagMs2ScanNum = seqTagGen.GetMs2ScanNumsContainingTags().ToArray();
                sw.Stop();
                Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
                _tagSearchEngine = new ScanBasedTagSearchEngine(_run, seqTagGen, new LcMsPeakMatrix(_run, ms1Filter), targetDb, ProductIonTolerance, AminoAcidSet,
                                                                _ms2ScorerFactory2,
                                                                ScanBasedTagSearchEngine.DefaultMinMatchedTagLength,
                                                                MaxSequenceMass, MinProductIonCharge, MaxProductIonCharge);
            }

            var specFileName         = MassSpecDataReaderFactory.RemoveExtension(Path.GetFileName(SpecFilePath));
            var targetOutputFilePath = Path.Combine(OutputDir, specFileName + TargetFileNameEnding);
            var decoyOutputFilePath  = Path.Combine(OutputDir, specFileName + DecoyFileNameEnding);
            var tdaOutputFilePath    = Path.Combine(OutputDir, specFileName + TdaFileNameEnding);

            progData.StepRange(60.0);
            progData.Status = "Running Target search";

            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Target))
            {
                sw.Reset();
                Console.Write(@"Reading the target database...");
                sw.Start();
                targetDb.Read();
                sw.Stop();
                Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

                var targetMatches = new SortedSet <DatabaseSequenceSpectrumMatch> [_run.MaxLcScan + 1];

                progData.MaxPercentage = 42.5;
                if (TagBasedSearch)
                {
                    sw.Reset();
                    Console.WriteLine(@"Tag-based searching the target database");
                    sw.Start();
                    RunTagBasedSearch(targetMatches, targetDb, null, prog);
                    Console.WriteLine(@"Target database tag-based search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
                }
                progData.MaxPercentage = 60.0;

                sw.Reset();
                Console.WriteLine(@"Searching the target database");
                sw.Start();
                RunSearch(targetMatches, targetDb, ms1Filter, null, prog);
                Console.WriteLine(@"Target database search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

                // calculate spectral e-value usign generating function
                sw.Reset();
                Console.WriteLine(@"Calculating spectral E-values for target-spectrum matches");
                sw.Start();
                var bestTargetMatches = RunGeneratingFunction(targetMatches);
                WriteResultsToFile(bestTargetMatches, targetOutputFilePath, targetDb);
                sw.Stop();
                Console.WriteLine(@"Target-spectrum match E-value calculation elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
            }

            progData.StepRange(95.0); // total to 95%
            progData.Status = "Running Decoy search";

            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Decoy))
            {
                // Decoy database
                sw.Reset();
                sw.Start();
                var decoyDb = targetDb.Decoy(null, true);

                Console.Write(@"Reading the decoy database...");
                decoyDb.Read();
                Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

                progData.MaxPercentage = 77.5;
                var decoyMatches = new SortedSet <DatabaseSequenceSpectrumMatch> [_run.MaxLcScan + 1];
                if (TagBasedSearch)
                {
                    sw.Reset();
                    Console.WriteLine(@"Tag-based searching the decoy database");
                    sw.Start();
                    RunTagBasedSearch(decoyMatches, decoyDb, null, prog);
                    Console.WriteLine(@"Decoy database tag-based search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
                }
                progData.MaxPercentage = 95.0;

                sw.Reset();
                Console.WriteLine(@"Searching the decoy database");
                sw.Start();
                RunSearch(decoyMatches, decoyDb, ms1Filter, null, prog);
                Console.WriteLine(@"Decoy database search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

                // calculate spectral e-value usign generating function
                sw.Reset();
                Console.WriteLine(@"Calculating spectral E-values for decoy-spectrum matches");
                sw.Start();
                var bestDecoyMatches = RunGeneratingFunction(decoyMatches);
                WriteResultsToFile(bestDecoyMatches, decoyOutputFilePath, decoyDb);
                sw.Stop();
                Console.WriteLine(@"Decoy-spectrum match E-value calculation elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
            }

            progData.StepRange(100.0);
            progData.Status = "Writing combined results file";
            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Both))
            {
                // Add "Qvalue" and "PepQValue"
                var fdrCalculator = new FdrCalculator(targetOutputFilePath, decoyOutputFilePath);
                if (fdrCalculator.HasError())
                {
                    ErrorMessage = fdrCalculator.ErrorMessage;
                    Console.WriteLine(@"Error computing FDR: " + fdrCalculator.ErrorMessage);
                    return(false);
                }

                fdrCalculator.WriteTo(tdaOutputFilePath);
            }
            progData.Report(100.0);

            Console.WriteLine(@"Done.");
            swAll.Stop();
            Console.WriteLine(@"Total elapsed time for search: {0:f1} sec ({1:f2} min)", swAll.Elapsed.TotalSeconds, swAll.Elapsed.TotalMinutes);

            return(true);
        }