コード例 #1
0
        public void TestTagMatching()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            // Parse sequence tags
            const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3";
            const int minTagLength = 8;
            var tagFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".seqtag");

            if (!File.Exists(tagFileName))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFileName);
            }

            var tagParser = new SequenceTagParser(tagFileName, minTagLength);

            // Parse raw file
            var rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw");

            if (!File.Exists(rawFileName))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFileName);
            }

            var run = PbfLcMsRun.GetLcMsRun(rawFileName);

            // Parse ID file
            const string resultFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V092\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv";
            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var resultParser = new MsPathFinderParser(resultFilePath);
            const double qValueThreshold = 0.01;
            var idList = resultParser.GetIdWithQValuesNoLargerThan(qValueThreshold);
            var idFlag = new bool[run.MaxLcScan + 1];
            foreach (var id in idList) idFlag[id.Scan] = true;

            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta";
            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

//            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.icsfldecoy.fasta";
            var fastaDb = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);

            var numMs2Spectra = 0;
            var numSpectraWithTag = 0;
            var numSpectraWithMatchingTag = 0;
            var numSpectraWithMatchedTagNoId = 0;
            foreach (var ms2ScanNum in run.GetScanNumbers(2))
            {
                ++numMs2Spectra;
                var tags = tagParser.GetSequenceTags(ms2ScanNum);
                if (tags != null)
                {
                    ++numSpectraWithTag;
                    foreach (var tag in tags)
                    {
                        if (searchableDb.Search(tag.Sequence) >= 0)
                        {
                            //Console.WriteLine(tag.Sequence);
                            ++numSpectraWithMatchingTag;
                            if (!idFlag[ms2ScanNum]) ++numSpectraWithMatchedTagNoId;
                            break;
                        }
                    }
                }
            }
            Console.WriteLine("Tag length: {0}", minTagLength);
            Console.WriteLine("NumMs2Spectra: {0}", numMs2Spectra);
            Console.WriteLine("NumMs2SpectraWithTags: {0} ({1})", numSpectraWithTag, numSpectraWithTag/(float)numMs2Spectra);
            Console.WriteLine("NumMs2SpectraWithMatchingTags: {0} ({1})", numSpectraWithMatchingTag, numSpectraWithMatchingTag/(float)numMs2Spectra);
            Console.WriteLine("NumMs2SpectraWithMatchingTagsWithNoId: {0} ({1})", numSpectraWithMatchedTagNoId, numSpectraWithMatchedTagNoId / (float)numMs2Spectra);
        }
コード例 #2
0
        public void TestFeatureIdMatching()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string resultFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V092\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv";

            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var resultParser = new MsPathFinderParser(resultFilePath);
            const double qValueThreshold = 0.01;
            const double tolerancePpm = 13;

            const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3";
            var rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw");

            if (!File.Exists(rawFileName))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFileName);
            }

            var run = PbfLcMsRun.GetLcMsRun(rawFileName);

            var idList =
                resultParser.GetIdList().TakeWhile(id => id.QValue <= qValueThreshold).OrderBy(id => id.Mass).ToList();
            var idMassList = idList.Select(id => id.Mass).ToList();
            var idFlag = new bool[idList.Count];


            // Parse sequence tags
            var tagFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".seqtag");
            const int minTagLength = 6;
            const int numProtMatches = 4;
//            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta";
            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.icsfldecoy.fasta";

            if (!File.Exists(tagFileName))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFileName);
            }

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);

            var tagParser = new SequenceTagParser(tagFileName, minTagLength);

            var featureFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".ms1ft");
            var featureParser = new TsvFileParser(featureFileName);

            var minScan = featureParser.GetData("MinScan").Select(s => Convert.ToInt32(s)).ToArray();
            var maxScan = featureParser.GetData("MaxScan").Select(s => Convert.ToInt32(s)).ToArray();
            var minCharge = featureParser.GetData("MinCharge").Select(s => Convert.ToInt32(s)).ToArray();
            var maxCharge = featureParser.GetData("MaxCharge").Select(s => Convert.ToInt32(s)).ToArray();
            var monoMass = featureParser.GetData("MonoMass").Select(Convert.ToDouble).ToArray();

            var numFeaturesWithId = 0;
            var numFeaturesWithMs2 = 0;
            var numFeaturesWithTags = 0;
            var numFeaturesWithMatchingTags = 0;
            var numFeaturesWithTwoOrMoreMatchingTags = 0;
            var numFeaturesWithNoIdAndMatchingTags = 0;

            for (var i = 0; i < featureParser.NumData; i++)
            {
                var mass = monoMass[i];

                // Find Id
                var tolDa = new Tolerance(tolerancePpm).GetToleranceAsDa(mass, 1);
                var minMass = mass - tolDa;
                var maxMass = mass + tolDa;
                var index = idMassList.BinarySearch(mass);
                if (index < 0) index = ~index;

                var matchedId = new List<MsPathFinderId>();
                // go down
                var curIndex = index - 1;
                while (curIndex >= 0)
                {
                    var curId = idList[curIndex];
                    if (curId.Mass < minMass) break;
                    if (curId.Scan > minScan[i] && curId.Scan < maxScan[i]
                        && curId.Charge >= minCharge[i] && curId.Charge <= maxCharge[i])
                    {
                        matchedId.Add(curId);
                        idFlag[curIndex] = true;
                    }
                    --curIndex;
                }

                // go up
                curIndex = index;
                while (curIndex < idList.Count)
                {
                    var curId = idList[curIndex];
                    if (curId.Mass > maxMass) break;
                    if (curId.Scan >= minScan[i] && curId.Scan <= maxScan[i]
                        && curId.Charge >= minCharge[i] && curId.Charge <= maxCharge[i])
                    {
                        matchedId.Add(curId);
                        idFlag[curIndex] = true;
                    }
                    ++curIndex;
                }

                var hasId = false;
                if (matchedId.Any())
                {
                    ++numFeaturesWithId;
                    hasId = true;
                }

                // Find MS2 scans
//                var numMs2Scans = 0;
                var tags = new List<SequenceTag>();
                var hasMs2 = false;
                for (var scanNum = minScan[i]; scanNum <= maxScan[i]; scanNum++)
                {
                    var isolationWindow = run.GetIsolationWindow(scanNum);
                    if (isolationWindow == null) continue;
                    var isolationWindowTargetMz = isolationWindow.IsolationWindowTargetMz;
                    var charge = (int)Math.Round(mass / isolationWindowTargetMz);
                    if (charge < minCharge[i] || charge > maxCharge[i]) continue;
                    var mz = Ion.GetIsotopeMz(mass, charge,
                        Averagine.GetIsotopomerEnvelope(mass).MostAbundantIsotopeIndex);
                    if (isolationWindow.Contains(mz))
                    {
//                        ++numMs2Scans;
                        tags.AddRange(tagParser.GetSequenceTags(scanNum));
                        hasMs2 = true;
                    }
                }
                if (hasMs2) ++numFeaturesWithMs2;
                if (tags.Any()) ++numFeaturesWithTags;
                var protHist = new Dictionary<string, int>();
                var hasMatchedTag = false;
                foreach (var tag in tags)
                {
                    var matchedProteins = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).Select(idx => fastaDb.GetProteinName(idx)).ToArray();
                    if (matchedProteins.Any())
                    {
                        hasMatchedTag = true;
                        foreach (var protein in matchedProteins)
                        {
                            int num;
                            if (protHist.TryGetValue(protein, out num)) protHist[protein] = num + 1;
                            else protHist[protein] = 1;
                        }
                    }
                }
                if (hasMatchedTag)
                {
                    ++numFeaturesWithMatchingTags;
                    if (!hasId) ++numFeaturesWithNoIdAndMatchingTags;
                }
                if (protHist.Any())
                {
                    var maxOcc = protHist.Values.Max();
                    if (maxOcc >= numProtMatches) ++numFeaturesWithTwoOrMoreMatchingTags;
                }
            }

            Console.WriteLine("NumFeatures: {0}", featureParser.NumData);
            Console.WriteLine("NumId: {0}", idList.Count);
            Console.WriteLine("NumFeaturesWithId: {0} ({1})", numFeaturesWithId, numFeaturesWithId / (float)featureParser.NumData);
            Console.WriteLine("NumFeaturesWithMs2: {0} ({1})", numFeaturesWithMs2, numFeaturesWithMs2 / (float)featureParser.NumData);
            Console.WriteLine("NumFeaturesWithTag: {0} ({1})", numFeaturesWithTags, numFeaturesWithTags / (float)featureParser.NumData);
            Console.WriteLine("NumFeaturesWithMatchedTag: {0} ({1})", numFeaturesWithMatchingTags, numFeaturesWithMatchingTags / (float)featureParser.NumData);
            Console.WriteLine("NumFeaturesWithMoreThanOneMatchedTag: {0} ({1})", numFeaturesWithTwoOrMoreMatchingTags, numFeaturesWithTwoOrMoreMatchingTags / (float)featureParser.NumData);
            Console.WriteLine("NumFeaturesWithNoIdAndMatchedTag: {0} ({1})", numFeaturesWithNoIdAndMatchingTags, numFeaturesWithNoIdAndMatchingTags / (float)featureParser.NumData);

            for (var i = 0; i < idFlag.Length; i++)
            {
                if (!idFlag[i])
                {
                    Console.WriteLine(idList[i].Scan);
                }
            }


//            Console.WriteLine(string.Join(",", filter.GetMatchingMs2ScanNums(8115.973001)));
//
//            Console.WriteLine(featureFileName);
        }