Example #1
0
        public IcTopDownLauncher(
            string specFilePath,
            string dbFilePath,
            string outputDir,
            AminoAcidSet aaSet,
            string featureFilePath = null)
        {
            ErrorMessage = string.Empty;

            SpecFilePath = specFilePath;
            DatabaseFilePath = dbFilePath;
            AminoAcidSet = aaSet;
            OutputDir = outputDir;

            FeatureFilePath = featureFilePath;

            MinSequenceLength = 21;
            MaxSequenceLength = 300;
            MaxNumNTermCleavages = 1;
            MaxNumCTermCleavages = 0;
            MinPrecursorIonCharge = 2;
            MaxPrecursorIonCharge = 60;
            MinProductIonCharge = 1;
            MaxProductIonCharge = 20;
            MinSequenceMass = 2000.0;
            MaxSequenceMass = 50000.0;
            PrecursorIonTolerance = new Tolerance(10);
            ProductIonTolerance = new Tolerance(10);
            RunTargetDecoyAnalysis = DatabaseSearchMode.Both;
            SearchMode = InternalCleavageType.SingleInternalCleavage;
            MaxNumThreads = 4;
            ScanNumbers = null;
            NumMatchesPerSpectrum = 3;
            TagBasedSearch = true;
        }
Example #2
0
 public IcRescorer(string specFilePath, string icResultFilePath, string outputFilePath, AminoAcidSet aaSet, Tolerance tolerance, double ms2CorrThreshold = 0.7
     , int minProductIonCharge = 1, int maxProductIonCharge = 10)
 {
     var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826);
     _topDownScorer = new InformedTopDownScorer(run, aaSet, minProductIonCharge, maxProductIonCharge, tolerance, ms2CorrThreshold);
     Rescore(icResultFilePath, outputFilePath);
 }
 public InformedProteomicsAnalysis(
     InMemoryLcMsRun run,
     IEnumerable<string> peptideEnumerator,
     AminoAcidSet aminoAcidSet): this(run, peptideEnumerator, aminoAcidSet, 1, 3)
 {
     
 }
Example #4
0
        private static void TestCountingPeptides()
        {
            var aaSet = new AminoAcidSet();

            var sw = new Stopwatch();
            sw.Start();

            //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_002166_F86E3B2F.fasta";
            const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_003456_9B916A8B.fasta";
            //            const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004208_295531A4.fasta";
            var db = new FastaDatabase(dbFile);
            var indexedDb = new IndexedDatabase(db);
            indexedDb.Read();
            //var numPeptides = indexedDb.AnnotationsAndOffsetsNoEnzyme(7, 150).LongCount();
            var peptides =
                indexedDb.AnnotationsAndOffsets(7, 40, 2, 2, Enzyme.Trypsin);

            Parallel.ForEach(peptides, annotationAndOffset =>
            //foreach(var annotationAndOffset in peptides)
            {
                var annotation = annotationAndOffset.Annotation;
                var offset = annotationAndOffset.Offset;

                var graph = SequenceGraph.CreateGraph(aaSet, annotation);
            }
                )
            ;

//            Console.WriteLine("NumPeptides: {0}", numPeptides);
            sw.Stop();
            var sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
            Console.WriteLine(@"{0:f4} sec", sec);
        }
Example #5
0
        public ShiftedSequenceGraph(AminoAcidSet aminoAcidSet, double shiftedMass, bool isForward, int maxSequenceLength, double maxSequenceMass = 50000.0)
        {
            _aminoAcidSet = aminoAcidSet;
            _modificationParams = aminoAcidSet.GetModificationParams();

            _isForward = isForward;

            _index = 0;
            _maxSeqIndex = maxSequenceLength + 2;   // shift + Term + length
            _maxSequenceMass = maxSequenceMass;

            _aminoAcidSequence = new AminoAcid[_maxSeqIndex];
            var shiftAa = new AminoAcid('\0', "Shift", new CompositionWithDeltaMass(shiftedMass));
            _aminoAcidSequence[0] = shiftAa;

            ShiftMass = shiftedMass;

            _fragmentComposition = new Composition.Composition[_maxSeqIndex];
            _fragmentComposition[0] = shiftAa.Composition;

            _graph = new Node[_maxSeqIndex][];
            _graph[0] = new[] { new Node(0) };

            _nodeComposition = new Composition.Composition[_maxSeqIndex][]; 
            _compNodeComposition = new Composition.Composition[_maxSeqIndex][];
            for (var i = 0; i < _maxSeqIndex; i++)
            {
                _compNodeComposition[i] = new Composition.Composition[_modificationParams.NumModificationCombinations];
                _nodeComposition[i] = new Composition.Composition[_modificationParams.NumModificationCombinations];
            }

            IsValid = true;
        }
 public FeatureBasedTagSearchEngine(
     LcMsRun run,
     Ms1FtParser featureParser,
     ProductScorerBasedOnDeconvolutedSpectra ms2Scorer,
     SequenceTagParser tagParser,
     FastaDatabase fastaDb,
     Tolerance tolerance,
     AminoAcidSet aaSet,
     double maxSequenceMass = 50000.0,
     int minProductIonCharge = 1,
     int maxProductIonCharge = 20)
 {
     _run = run;
     _ms2Scorer = ms2Scorer;
     _featureParser = featureParser;
     _ms1FtFilter = new Ms1FtFilter(run, tolerance, featureParser.Ms1FtFileName);
     _tagParser = tagParser;
     _fastaDb = fastaDb;
     _searchableDb = new SearchableDatabase(fastaDb);
     _tolerance = tolerance;
     _aaSet = aaSet;
     _maxSequenceMass = maxSequenceMass;
     _minProductIonCharge = minProductIonCharge;
     _maxProductIonCharge = maxProductIonCharge;
 }
Example #7
0
        public void TestBuildingReverseGraph()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string annotation = "_.MARTKQTARK._";

            // Configure amino acid set
            var methylK = new SearchModification(Modification.Methylation, 'K', SequenceLocation.Everywhere, false);
            //var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false);
            var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);

            var searchModifications = new List<SearchModification>
            {
                methylK,
                //pyroGluQ,
                oxM
            };

            const int numMaxModsPerProtein = 2;

            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);

            var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation);
            foreach (var composition in seqGraph.GetSequenceCompositions())
            {
                Console.WriteLine("{0}\t{1}", composition, composition.Mass);
            }
        }
        public ProteinScoringGraphFactory(IMassBinning comparer, AminoAcidSet aminoAcidSet)
        {
            _comparer = comparer;
            _adjList = new LinkedList<ScoringGraphEdge>[_comparer.NumberOfBins];

            for (var i = 0; i < _comparer.NumberOfBins; i++) _adjList[i] = new LinkedList<ScoringGraphEdge>();

            var terminalModifications = FilteredProteinMassBinning.GetTerminalModifications(aminoAcidSet);
            var aminoAcidArray = FilteredProteinMassBinning.GetExtendedAminoAcidArray(aminoAcidSet);

            for (var i = 0; i < _comparer.NumberOfBins; i++)
            {
                var mi = _comparer.GetMass(i);
                var fineNodeMass = mi;

                for (var a = 0; a < aminoAcidArray.Length; a++)
                {
                    var aa = aminoAcidArray[a];
                    var j = _comparer.GetBinNumber(fineNodeMass + aa.Mass);
                    if (j < 0 || j >= _comparer.NumberOfBins) continue;
                    _adjList[j].AddLast(new ScoringGraphEdge(i));

                    if (i == 0 && !(aa is ModifiedAminoAcid))
                    {
                        foreach (var terminalMod in terminalModifications)
                        {
                            var modifiedAa = new ModifiedAminoAcid(aa, terminalMod);
                            j = _comparer.GetBinNumber(fineNodeMass + modifiedAa.Mass);
                            if (j < 0 || j >= _comparer.NumberOfBins) continue;
                            _adjList[j].AddLast(new ScoringGraphEdge(i));
                        }                        
                    }
                }
            }
        }
Example #9
0
        public void TestPsm()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string specFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raw";
            if (!File.Exists(specFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath);
            }

            const char pre = 'R';
            const string sequence = "LENWPPASLADDL";
            const char post = 'A';
            const string annotation = "R.LENWPPASLADDL._";
            const int charge = 2;
            const int ms2ScanNum = 25534;

            var aaSet = new AminoAcidSet();

            var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 0, 0);
            var ms2Scorer = new ProductScorerBasedOnDeconvolutedSpectra(run, 1, 2, 10, 0, 1.1);
            ms2Scorer.DeconvoluteAllProductSpectra();
            var scorer = ms2Scorer.GetMs2Scorer(ms2ScanNum);

            var graph = SequenceGraph.CreateGraph(aaSet, annotation);
            graph.SetSink(0);
            var score = graph.GetFragmentScore(scorer);
            Console.WriteLine("Fast search score: " + score);
            var composition = graph.GetSinkSequenceCompositionWithH2O();

            var informedScorer = new InformedBottomUpScorer(run, aaSet, 1, 15, new Tolerance(10));
            var refinedScore = informedScorer.GetScores(pre, sequence, post, composition, charge, ms2ScanNum);
            Console.WriteLine("RefinedScores: {0}", refinedScore);
        }
        public ScanBasedTagSearchEngine(
            LcMsRun run,
            ISequenceTagFinder seqTagFinder,
            LcMsPeakMatrix featureFinder,
            FastaDatabase fastaDb,
            Tolerance tolerance,
            AminoAcidSet aaSet,
            CompositeScorerFactory ms2ScorerFactory = null,
            int minMatchedTagLength = DefaultMinMatchedTagLength,
            double maxSequenceMass = 50000.0,
            int minProductIonCharge = 1,
            int maxProductIonCharge = 20)
        {
            _run = run;
            _featureFinder = featureFinder;
            
            _searchableDb = new SearchableDatabase(fastaDb);

            _tolerance = tolerance;
            _aaSet = aaSet;
            _minMatchedTagLength = minMatchedTagLength;
            _maxSequenceMass = maxSequenceMass;
            _minProductIonCharge = minProductIonCharge;
            _maxProductIonCharge = maxProductIonCharge;
            MinScan = int.MinValue;
            MaxScan = int.MaxValue;
            _ms2ScorerFactory = ms2ScorerFactory;
            _seqTagFinder = seqTagFinder;
        }
Example #11
0
        public void TestFitScoreCalculationEtd()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            if (!File.Exists(TestLcMsRun.TestTopDownRawFilePathEtd))
            {
                Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestLcMsRun.TestTopDownRawFilePathCid);
            }

            var run = InMemoryLcMsRun.GetLcMsRunScanRange(TestLcMsRun.TestTopDownRawFilePathEtd, 810, 810);
            var spec = run.GetSpectrum(810) as ProductSpectrum;
            Assert.True(spec != null);

            const string suf54 = "ENIKTLPAKRNEQDQKQLIVPLADSLKPGTYTVDWHVVSVDGHKTKGHYTFSVK";
            var suf54Comp = new AminoAcidSet().GetComposition(suf54);
            Assert.True(suf54Comp != null);

            var ionType = new IonTypeFactory(10).GetIonType("z6");
            var ion = ionType.GetIon(suf54Comp);
            //ion.Composition.ComputeApproximateIsotopomerEnvelop();
            Console.WriteLine("MonoMz: {0}, MonoMass: {1}", ion.GetMonoIsotopicMz(), ion.Composition.Mass);

            var fitScore = spec.GetFitScore(ion, new Tolerance(15), 0.1);
            Console.WriteLine("FitScore: {0}", fitScore);
            Assert.True(fitScore < 0.15);
        }
Example #12
0
        public IList<SpectrumMatch> Read()
        {
            var specMatches = new List<SpectrumMatch>();
            var tsvFile = new TsvFileParser(_fileName);
            var precursorCharges = tsvFile.GetData(PrecursorChargeHeader);
            var scans = tsvFile.GetData(ScanHeader);

            var peptides = tsvFile.GetData(TopDownPeptideHeader);
            if (peptides != null)
            {
                var peptideSet = new HashSet<string>();
                const double filterThreshold = QValueThreshold;
                var filterValues = tsvFile.GetData(QValueHeader);

                var aset = new AminoAcidSet();

                for (int i = 0; i < peptides.Count; i++)
                {
                    if (Convert.ToDouble(filterValues[i]) > filterThreshold || peptideSet.Contains(peptides[i])) continue;
                    peptideSet.Add(peptides[i]);
                    var scanNum = Convert.ToInt32(scans[i]);
                    int precursorCharge = Convert.ToInt32(precursorCharges[i]);
                    specMatches.Add(new SpectrumMatch(new Sequence(peptides[i], aset), _lcms, scanNum, precursorCharge, _decoy));
                }
            }
            return specMatches;
        }
Example #13
0
 public MatchedTagSet(string sequence, 
     AminoAcidSet aminoAcidSet, Tolerance tolerance, Tolerance relaxedTolerance)
 {
     _sequence = sequence;
     _aminoAcidSet = aminoAcidSet;
     _tolerance = tolerance;
     _relaxedTolerance = relaxedTolerance;
     _tags = new List<MatchedTag>();
 }
Example #14
0
 public InformedTopDownScorer(LcMsRun run, AminoAcidSet aaSet, int minProductCharge, int maxProductCharge, Tolerance tolerance, double ms2CorrThreshold = 0.7)
 {
     Run = run;
     AminoAcidSet = aaSet;
     MinProductCharge = minProductCharge;
     MaxProductCharge = maxProductCharge;
     Tolerance = tolerance;
     Ms2CorrThreshold = ms2CorrThreshold;
 }
Example #15
0
 public InformedBottomUpScorer(LcMsRun run, AminoAcidSet aaSet, int minProductCharge, int maxProductCharge, Tolerance tolerance)
 {
     Run = run;
     AminoAcidSet = aaSet;
     MinProductCharge = minProductCharge;
     MaxProductCharge = maxProductCharge;
     Tolerance = tolerance;
     _rankScorer = new RankScore(ActivationMethod.HCD, Ms2DetectorType.Orbitrap, Enzyme.Trypsin, Protocol.Standard);
     _scoredSpectra = new Dictionary<int, ScoredSpectrum>();
 }
 public InformedProteomicsAnalysis(
     InMemoryLcMsRun run, 
     IEnumerable<string> peptideEnumerator, 
     AminoAcidSet aminoAcidSet, 
     int minCharge, 
     int maxCharge)
 {
     Run = run;
     PeptideEnumerator = peptideEnumerator;
     AminoAcidSet = aminoAcidSet;
     MinCharge = minCharge;
     MaxCharge = maxCharge;
 }
Example #17
0
 public CompositeScorerFactory(
     ILcMsRun run,
     IMassBinning comparer,
     AminoAcidSet aaSet,
     int minProductCharge = 1, int maxProductCharge = 20,
     double productTolerancePpm = 10,
     int isotopeOffsetTolerance = 2,
     double filteringWindowSize = 1.1
     )
     : this(run, comparer, aaSet, minProductCharge, maxProductCharge, new Tolerance(productTolerancePpm), isotopeOffsetTolerance, filteringWindowSize)
 {
     
 }
Example #18
0
        public void TestParsingManyMods()
        {
            const string modFilePath = @"\\protoapps\UserData\Jungkap\Lewy\db\Mods.txt";
            var aaSet = new AminoAcidSet(modFilePath);
            //aaSet.Display();


            //SequenceLocation.ProteinNTerm
            var residue = AminoAcid.ProteinNTerm.Residue;
            var location = SequenceLocation.ProteinNTerm;
            var aa = aaSet.GetAminoAcid(residue, location);
            Console.Write("{0}\t{1}\t{2}", residue, aa.Mass, aa.Composition);
            foreach (var modIndex in aaSet.GetModificationIndices(residue, location))
            {
                var modification = aaSet.GetModificationParams().GetModification(modIndex);
                Console.WriteLine(modification.Mass);
                //Console.Write("\t" + _modificationParams.GetModification(modIndex));
            }
            Console.WriteLine();
            residue = AminoAcid.ProteinCTerm.Residue;
            location = SequenceLocation.ProteinCTerm;
            aa = aaSet.GetAminoAcid(residue, location);
            Console.Write("{0}\t{1}\t{2}", residue, aa.Mass, aa.Composition);
            foreach (var modIndex in aaSet.GetModificationIndices(residue, location))
            {
                var modification = aaSet.GetModificationParams().GetModification(modIndex);
                Console.WriteLine(modification.Mass);
                //Console.Write("\t" + _modificationParams.GetModification(modIndex));
            }


            //foreach (var aa in AminoAcid.StandardAminoAcidArr)
            
                /*
                var keys = _locationSpecificResidueMap[location].Keys.ToArray();
                Array.Sort(keys);
                foreach (var residue in keys)
                {
                    var aa = GetAminoAcid(residue, location);
                    Console.Write("{0}\t{1}\t{2}", residue, aa.Mass, aa.Composition);
                    foreach (var modIndex in GetModificationIndices(residue, location))
                    {
                        Console.Write("\t" + _modificationParams.GetModification(modIndex));
                    }
                    Console.WriteLine();
                }
            }     */            


        }
Example #19
0
        public void TestFitScoreCalculationCid()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            if (!File.Exists(TestLcMsRun.TestTopDownRawFilePathCid))
            {
                Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestLcMsRun.TestTopDownRawFilePathCid);
            }

            var run = InMemoryLcMsRun.GetLcMsRunScanRange(TestLcMsRun.TestTopDownRawFilePathCid, 5743, 5743);
            var spec = run.GetSpectrum(5743);
            Assert.True(spec != null);

            const string protein = "MRIILLGAPGAGKGTQAQFIMEKYGIPQISTGDMLRAAVKSGSELGKQAKDIMDAGKLVTDELVIALVKERIAQEDCRNGFLLDGFPRTIPQADAMKEAGIVVDYVLEFDVPDELIVDRIVGRRVHAASGRVYHVKFNPPKVEGKDDVTGEDLTTRKDDQEETVRKRLVEYHQMTAPLIGYYQKEAEAGNTKYAKVDGTQAVADVRAALEKILG";
            var protComp = new AminoAcidSet().GetComposition(protein) + Composition.H2O;
            Assert.True(protComp != null);
            Assert.True(protComp.C == 1035);
            Assert.True(protComp.H == 1683);
            Assert.True(protComp.N == 289);
            Assert.True(protComp.O == 318);
            Assert.True(protComp.P == 0);
            Assert.True(protComp.S == 7);
            Assert.True(Math.Abs(protComp.Mass - 23473.245267145) < 0.0000001);
            Assert.True(protComp.NominalMass == 23461);

            var ion = new Ion(protComp, 20);
//            ion.Composition.ComputeApproximateIsotopomerEnvelop();
            var isotopomerEnvelop = ion.Composition.GetIsotopomerEnvelopeRelativeIntensities();
            Console.WriteLine(@"MonoMz: {0}, MonoMass: {1}", ion.GetMonoIsotopicMz(), ion.Composition.Mass);

            var matchedPeaks = spec.GetAllIsotopePeaks(ion, new Tolerance(15), 0.1);
            for (var i = 0; i < matchedPeaks.Length; i++)
            {
                Console.WriteLine(@"{0}	{1}	{2}	{3}", i, ion.GetIsotopeMz(i), isotopomerEnvelop[i], matchedPeaks[i] == null ? 0 : matchedPeaks[i].Intensity);
            }
            var fitScore = spec.GetFitScore(ion, new Tolerance(15), 0.1);
            var cosine = spec.GetConsineScore(ion, new Tolerance(15), 0.1);
            var corr = spec.GetCorrScore(ion, new Tolerance(15), 0.1);

            Console.WriteLine(@"FitScore: {0}", fitScore);
            Console.WriteLine(@"Cosine: {0}", cosine);
            Console.WriteLine(@"Corr: {0}", corr);

            Assert.True(Math.Abs(fitScore - 0.181194589537041) < 0.0001);
            Assert.True(Math.Abs(cosine - 0.917609346566222) < 0.0001);
            Assert.True(Math.Abs(corr - 0.808326778009839) < 0.0001);

        }
Example #20
0
        /// <summary>
        /// Create a graph representing the annotation. Annotation is reversed.
        /// </summary>
        /// <param name="aaSet">amino acid set</param>
        /// <param name="annotation">annotation (e.g. G.PEPTIDER.K or _.PEPTIDER._)</param>
        /// <returns></returns>
        public static SequenceGraph CreateGraph(AminoAcidSet aaSet, string annotation)
        {
            const char delimiter = (char)FastaDatabase.Delimiter;
            if (annotation == null || !Regex.IsMatch(annotation, @"^[A-Z" + delimiter + @"]\.[A-Z]+\.[A-Z" + delimiter + @"]$")) return null;
            
            var nTerm = annotation[0] == FastaDatabase.Delimiter
                                  ? AminoAcid.ProteinNTerm
                                  : AminoAcid.PeptideNTerm;
            var cTerm = annotation[annotation.Length - 1] == FastaDatabase.Delimiter
                                  ? AminoAcid.ProteinCTerm
                                  : AminoAcid.PeptideCTerm;

            var sequence = annotation.Substring(2, annotation.Length - 4);
            return CreateGraph(aaSet, nTerm, sequence, cTerm);
        }
Example #21
0
        public void TestBuildingSequenceGraphLongProtein()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            // Configure amino acid set
            const int numMaxModsPerProtein = 6;
            var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.ProteinNTerm, false);
            var dehydro = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false);
            var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false);
            var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false);
            var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);

            var searchModifications = new List<SearchModification>
            {
                pyroGluQ,
                //dehydro,
                //cysteinylC,
                //glutathioneC,
                //oxM
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);

            //const string protAnnotation = "A.HAHLTHQYPAANAQVTAAPQAITLNFSEGVETGFSGAKITGPKNENIKTLPAKRNEQDQKQLIVPLADSLKPGTYTVDWHVVSVDGHKTKGHYTFSVK.-";
            //const string protAnnotation =
            //    "_.QQ._";

            const string protAnnotation =
                "_.MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILSAFIGDEIPQEILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTHIAGDKPVTILTATSGDTGAAVAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETVAIDGDFDACQALVKQAFDDEELKVALGLNSANSINISRLLAQICYYFEAVAQLPQETRNQLVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVPRFLHDGQWSPKATQATLSNAMDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTSEPHAAVAYRALRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAALRKLMMNHQ._";

            var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotation);
            var seqCompositions = seqGraph.GetSequenceCompositions();

            for (var modIndex = 0; modIndex < seqCompositions.Length; modIndex++)
            {
                var seqComposition = seqCompositions[modIndex];
                Console.WriteLine("SequenceComposition: {0}", seqComposition);

                foreach (var composition in seqGraph.GetFragmentCompositions(modIndex, 0))
                {
                    //if (composition.GetMass() > seqComposition.GetMass())
                    {
                        Console.WriteLine("***Seq: {0}, Frag: {1}", seqComposition, composition);
                    }
                }
            }
        }
Example #22
0
 public TagMatchFinder(
     ProductSpectrum spec,
     IScorer ms2Scorer,
     LcMsPeakMatrix featureFinder,
     string proteinSequence, 
     Tolerance tolerance, 
     AminoAcidSet aaSet, 
     double maxSequenceMass)
 {
     _spec = spec;
     _ms2Scorer = ms2Scorer;
     _featureFinder = featureFinder;
     _proteinSequence = proteinSequence;
     _tolerance = tolerance;
     _aaSet = aaSet;
     _maxSequenceMass = maxSequenceMass;
 }
Example #23
0
        public IcBottomUpLauncher(
            string specFilePath,
            string dbFilePath,
            string outputDir,
            AminoAcidSet aaSet,
            Enzyme enzyme)
        {
            ErrorMessage = string.Empty;

            SpecFilePath = specFilePath;
            DatabaseFilePath = dbFilePath;
            AminoAcidSet = aaSet;
            Enzyme = enzyme;

            if (outputDir == null)
            {
                OutputDir = Path.GetDirectoryName(SpecFilePath);
            }
            else
            {
                if (!Directory.Exists(outputDir))
                {
                    if (File.Exists(outputDir) && !File.GetAttributes(outputDir).HasFlag(FileAttributes.Directory))
                    {
                        throw new Exception(outputDir + " is not a directory!");
                    }
                    Directory.CreateDirectory(outputDir);
                }
                OutputDir = outputDir;
            }

            OutputDir = outputDir;
            MinSequenceLength = 6;
            MaxSequenceLength = 30;
            MinPrecursorIonCharge = 1;
            MaxPrecursorIonCharge = 4;
            MinProductIonCharge = 1;
            MaxProductIonCharge = 3;
            PrecursorIonTolerance = new Tolerance(10);
            ProductIonTolerance = new Tolerance(10);
            RunTargetDecoyAnalysis = DatabaseSearchMode.Both;
            NumTolerableTermini = 1;
            NumMatchesPerSpectrum = 10;
        }
Example #24
0
        public void TestSearchWithTagGeneration()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string rawFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = PbfLcMsRun.GetLcMsRun(rawFilePath);
            const string fastaFilePath = @"D:\MSPathFinder\Fasta\ID_002216_235ACCEA.fasta";
            //const string fastaFilePath = @"D:\MassSpecFiles\60k\ID_004973_9BA6912F.fasta";
            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb = new FastaDatabase(fastaFilePath);
            var tolerance = new Tolerance(10);
            var modsFilePath = @"D:\MSPathFinder\Fasta\Mods.txt";

            if (!File.Exists(modsFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, modsFilePath);
            }

            var aaSet = new AminoAcidSet(modsFilePath);

            //TestTagBasedSearch(run, fastaDb, tolerance, aaSet);
            var tagSearchEngine = new ScanBasedTagSearchEngine(run, new SequenceTagGenerator(run, new Tolerance(8)),  new LcMsPeakMatrix(run),  fastaDb, tolerance,aaSet);

            var matchedTags = tagSearchEngine.RunSearch(4672);
            foreach (var match in matchedTags)
            {
                Console.Write(match.Sequence);
                Console.WriteLine("\t{0}\t{1}\t{2}", match.TagMatch.StartIndex, match.TagMatch.EndIndex, match.TagMatch.Mass);
            }


        }
Example #25
0
 public CompositeScorerFactory(
     ILcMsRun run,
     IMassBinning comparer,
     AminoAcidSet aaSet,
     int minProductCharge, int maxProductCharge,
     Tolerance productTolerance,
     int isotopeOffsetTolerance = 2,
     double filteringWindowSize = 1.1)
 {
     _run = run;
     _minProductCharge = minProductCharge;
     _maxProductCharge = maxProductCharge;
     _productTolerance = productTolerance;
     FilteringWindowSize = filteringWindowSize;
     IsotopeOffsetTolerance = isotopeOffsetTolerance;
     _ms2Scorer = new Dictionary<int, IScorer>();
     _comparer = comparer;
     _scoringGraphFactory = new ProteinScoringGraphFactory(comparer, aaSet);
 }
Example #26
0
        public void TestScoring()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            var rawFile = @"\\protoapps\UserData\Jungkap\Joshua\testData\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf" ;
            var resultFile = @"\\protoapps\UserData\Jungkap\Joshua\IdResult\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv";

            if (!File.Exists(rawFile))
            {
                Console.WriteLine(@"Warning: Skipping test {0} since file not found: {1}", methodName, rawFile);
                return;
            }

            if (!File.Exists(resultFile))
            {
                Console.WriteLine(@"Warning: Skipping test {0} since file not found: {1}", methodName, resultFile);
                return;
            }

            var tsvParser = new TsvFileParser(resultFile);
            var tsvData = tsvParser.GetAllData();
            var ms2ScanNumbers = tsvData["Scan"];

            var run = PbfLcMsRun.GetLcMsRun(rawFile, 0, 0);

            for (int i = 0; i < 1; i++)
            {

                var scanNum = Int32.Parse(ms2ScanNumbers[i]);
                var spectrum = run.GetSpectrum(scanNum) as ProductSpectrum;
                int tsvIndex = ms2ScanNumbers.FindIndex(x => Int32.Parse(x) == scanNum);

                var seqStr = tsvData["Sequence"].ElementAt(tsvIndex).Trim();
                var seqMod = tsvData["Modifications"].ElementAt(tsvIndex).Trim();
                var aaSet = new AminoAcidSet();
                var sequence = Sequence.CreateSequence(seqStr, seqMod, aaSet);
                Console.WriteLine(sequence.Count);
                var score = GetScoreTest(sequence, spectrum);
                Console.WriteLine(scanNum + ":" + score);

            }
        }
Example #27
0
 static MgfSequenceReader()
 {
     StandardAminoAcidSet = new AminoAcidSet(Modification.Carbamidomethylation);
     Modifications = new Dictionary<string, Tuple<AminoAcid, List<Modification>>>();
     Modifications.Add("99.032",
         new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('G'),
                                                 new List<Modification> { Modification.Acetylation }));
     Modifications.Add("113.048",
         new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('A'), 
                                                 new List<Modification> { Modification.Acetylation }));
     Modifications.Add("129.043",
         new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('S'),
                                                 new List<Modification> { Modification.Acetylation }));
     Modifications.Add("141.079",
         new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('V'),
                                                 new List<Modification> { Modification.Acetylation }));
     Modifications.Add("143.059",
         new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('T'),
                                                 new List<Modification> { Modification.Acetylation }));
     Modifications.Add("147.035",
         new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('M'),
                                                 new List<Modification> { Modification.Oxidation }));
     Modifications.Add("157.038",
         new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('D'),
                                                 new List<Modification> { Modification.Acetylation }));
     Modifications.Add("160.03",
         new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('C'),
                                                 new List<Modification> { Modification.Carbamidomethylation }));
     Modifications.Add("171.054",
         new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('E'),
                                                 new List<Modification> { Modification.Acetylation }));
     Modifications.Add("173.051",
         new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('M'),
                                                 new List<Modification> { Modification.Acetylation }));
     Modifications.Add("189.046",
         new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('F'),
                                                 new List<Modification> { Modification.Acetylation }));
     Modifications.Add("202.041",
         new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('C'),
                                                 new List<Modification> { Modification.Carbamidomethylation,
                                                                          Modification.Acetylation }));
 }
 public FeatureBasedTagSearchEngine(
     LcMsRun run,
     Ms1FtParser featureParser,
     SequenceTagParser tagParser,
     FastaDatabase fastaDb,
     Tolerance tolerance,
     AminoAcidSet aaSet,
     double maxSequenceMass = 50000.0,
     int minProductIonCharge = 1,
     int maxProductIonCharge = 20)
     : this(
         run,
         featureParser,
         null,
         tagParser,
         fastaDb,
         tolerance,
         aaSet,
         maxSequenceMass,
         minProductIonCharge,
         maxProductIonCharge)
 {
 }
Example #29
0
        public void ValidateIcResultsWithModifications()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1_Rescored.tsv";

            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var parser = new TsvFileParser(resultFilePath);
            var sequences = parser.GetData("Sequence");
            var modifications = parser.GetData("Modifications");
            var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray();
            var scanNums = parser.GetData("ScanNum").Select(s => Convert.ToInt32(s)).ToArray();
            var aaSet = new AminoAcidSet();
            for (var i = 0; i < parser.NumData; i++)
            {
                var sequenceComp = aaSet.GetComposition(sequences[i]) + Composition.H2O;

                var modComposition = Composition.Zero;
                var modsStr = modifications[i].Substring(1, modifications[i].Length - 2);
                var mods = modsStr.Split(',');
                foreach(var modStr in mods)
                {
                    if (modStr.Length == 0) continue;
                    var modName = modStr.Split()[0];
                    var mod = Modification.Get(modName);
                    modComposition += mod.Composition;
                }

                var compFromSeqAndMods = sequenceComp + modComposition;
                Assert.True(compFromSeqAndMods.Equals(compositions[i]));
            }
        }
        public void TestRescoring()
        {
            //const string specFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw";
            const string specFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";
            //const string sequence = "SGWYELSKSSNDQFKFVLKAGNGEVILTSELYTGKSGAMNGIESVQTNSPIEARYAKEVAKNDKPYFNLKAANHQIIGTSQMYSSTA";
            //const int scanNum = 4084;

            const string sequence = "SKTKHPLPEQWQKNQEAAKATQVAFDLDEKFQYSIRKAALDAGVSPSDQIRTILGLSVSRRPTRPRLTVSLNADDYVQLAEKYDLNADAQLEIKRRVLEDLVRFVAED";
            const int scanNum = 5448;
            const int charge = 11;

            // Configure amino acid set
            var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);
            var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false);

            const int numMaxModsPerProtein = 4;
            var searchModifications = new List<SearchModification>
            {
                dehydroC,
                glutathioneC,
                oxM,
                acetylN,
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);            


            var composition = aaSet.GetComposition(sequence) + Composition.H2O;

            var run = PbfLcMsRun.GetLcMsRun(specFilePath, 0, 0);
            var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 15, new Tolerance(10));
            var scores = informedScorer.GetScores(AminoAcid.ProteinNTerm, sequence, AminoAcid.ProteinCTerm, composition, charge, scanNum);
            Console.WriteLine("Total Score = " + scores.Score);
            Console.WriteLine("#Fragments = " + scores.NumMatchedFrags);
        }
Example #31
0
 public static AminoAcidSet GetStandardAminoAcidSet()
 {
     return(_standardAminoAcidSet ?? (_standardAminoAcidSet = new AminoAcidSet()));
 }
Example #32
0
        /// <summary>
        /// Add an amino acid residue to this generator.
        /// </summary>
        /// <param name="index">index to add the amino acid. 0 is C-term. 1 is the C-term amino acid.</param>
        /// <param name="residue">amino acid residue to add.</param>
        /// <param name="loc">location of the residue</param>
        /// <returns>true if residue is a valid amino acid; false otherwise.</returns>
        private bool PutAminoAcid(int index, char residue, SequenceLocation loc)
        {
            _index = index + 1;

            var aminoAcid = AminoAcidSet.GetAminoAcid(residue, loc);

            if (aminoAcid == null) // residue is not valid
            {
                return(false);
            }

            var fragmentComposition = _fragmentComposition[_index - 1] + aminoAcid.Composition;

            if (fragmentComposition.Mass > _maxSequenceMass)
            {
                return(false);
            }

            _aminoAcidSequence[_index]   = aminoAcid;
            _fragmentComposition[_index] = fragmentComposition;

            var modIndices = AminoAcidSet.GetModificationIndices(residue, loc);

            if (!modIndices.Any())  // No modification
            {
                _graph[_index] = new Node[_graph[_index - 1].Length];
                for (var i = 0; i < _graph[_index - 1].Length; i++)
                {
                    _graph[_index][i] = new Node(_graph[_index - 1][i].ModificationCombinationIndex, i);
                }
            }
            else
            {
                var modCombIndexToNodeMap = new Dictionary <int, Node>();
                for (var i = 0; i < _graph[_index - 1].Length; i++)
                {
                    var prevNodeIndex    = i;
                    var prevNode         = _graph[_index - 1][i];
                    var prevModCombIndex = prevNode.ModificationCombinationIndex;

                    // unmodified edge
                    if (modCombIndexToNodeMap.TryGetValue(prevModCombIndex, out var unmodifiedEdgeNode))
                    {
                        unmodifiedEdgeNode.AddPrevNodeIndex(prevNodeIndex);
                    }
                    else
                    {
                        modCombIndexToNodeMap.Add(prevModCombIndex, new Node(prevModCombIndex, prevNodeIndex));
                    }

                    // modified edge
                    foreach (var modIndex in modIndices)
                    {
                        var modCombIndex = ModificationParams.GetModificationCombinationIndex(
                            prevNode.ModificationCombinationIndex, modIndex);
                        if (modCombIndex < 0)   // too many modifications
                        {
                            continue;
                        }
                        if (modCombIndexToNodeMap.TryGetValue(modCombIndex, out var modifiedEdgeNode))
                        {
                            modifiedEdgeNode.AddPrevNodeIndex(prevNodeIndex);
                        }
                        else
                        {
                            modCombIndexToNodeMap.Add(modCombIndex, new Node(modCombIndex, prevNodeIndex));
                        }
                    }
                    _graph[_index] = modCombIndexToNodeMap.Values.ToArray();
                }
            }

            return(true);
        }
Example #33
0
        /// <summary>
        /// Create a graph representing the sequence. Sequence is reversed.
        /// </summary>
        /// <param name="aaSet">amino acid set</param>
        /// <param name="nTerm">N-term amino acid</param>
        /// <param name="sequence">sequence</param>
        /// <param name="cTerm">C-term amino acid</param>
        /// <returns>sequence graph</returns>
        public static SequenceGraph CreateGraph(AminoAcidSet aaSet, AminoAcid nTerm, string sequence, AminoAcid cTerm)
        {
            var seqGraph = new SequenceGraph(aaSet, nTerm, sequence, cTerm);

            return(seqGraph.IsValid ? seqGraph : null);
        }
Example #34
0
        /// <summary>
        /// Add an amino acid residue to this generator.
        /// </summary>
        /// <param name="index">index to add the amino acid. 0 is C-term. 1 is the C-term amino acid.</param>
        /// <param name="residue">amino acid residue to add.</param>
        /// <returns>true if residue is a valid amino acid; false otherwise.</returns>
        private bool PutAminoAcid(int index, char residue)
        {
            _index = index + 1;

            SequenceLocation?location = null;

            if (_index == 1) // C-term residue
            {
                if (residue == AminoAcid.PeptideCTerm.Residue)
                {
                    location = SequenceLocation.PeptideCTerm;
                }
                else if (residue == AminoAcid.ProteinCTerm.Residue)
                {
                    location = SequenceLocation.ProteinCTerm;
                }
            }
            else if (_index == _aminoAcidSequence.Length - 1 - NumNTermCleavages)   // N-term residue
            {
                if (residue == AminoAcid.PeptideNTerm.Residue)
                {
                    location = SequenceLocation.PeptideNTerm;
                }
                else if (residue == AminoAcid.ProteinNTerm.Residue)
                {
                    location = SequenceLocation.ProteinNTerm;
                }
            }
            else if (_index == 2) // Amino acid at the C-term
            {
                if (_aminoAcidSequence[1] == AminoAcid.PeptideCTerm)
                {
                    location = SequenceLocation.PeptideCTerm;
                }
                else if (_aminoAcidSequence[1] == AminoAcid.ProteinCTerm)
                {
                    location = SequenceLocation.ProteinCTerm;
                }
            }
            else if (_index == _aminoAcidSequence.Length - 2 - NumNTermCleavages) // Amino acid at the N-term
            {
                if (_aminoAcidSequence[_aminoAcidSequence.Length - 1] == AminoAcid.PeptideNTerm)
                {
                    location = SequenceLocation.PeptideNTerm;
                }
                else if (_aminoAcidSequence[_aminoAcidSequence.Length - 1] == AminoAcid.ProteinNTerm)
                {
                    location = SequenceLocation.ProteinNTerm;
                }
            }
            else
            {
                location = SequenceLocation.Everywhere;
            }

            if (location == null)
            {
                return(false);
            }

            var loc       = (SequenceLocation)location;
            var aminoAcid = AminoAcidSet.GetAminoAcid(residue, loc);

            if (aminoAcid == null) // residue is not valid
            {
                return(false);
            }

            _aminoAcidSequence[_index] = aminoAcid;
            _suffixComposition[_index] = _suffixComposition[_index - 1] + aminoAcid.Composition;

            var modIndices = AminoAcidSet.GetModificationIndices(residue, loc);

            if (!modIndices.Any())  // No modification
            {
                _graph[_index] = new Node[_graph[_index - 1].Length];
                for (var i = 0; i < _graph[_index - 1].Length; i++)
                {
                    _graph[_index][i] = new Node(_graph[_index - 1][i].ModificationCombinationIndex, i);
                }
            }
            else
            {
                var modCombIndexToNodeMap = new Dictionary <int, Node>();
                for (var i = 0; i < _graph[_index - 1].Length; i++)
                {
                    var  prevNodeIndex    = i;
                    var  prevNode         = _graph[_index - 1][i];
                    var  prevModCombIndex = prevNode.ModificationCombinationIndex;
                    Node newNode;
                    // unmodified edge
                    if (modCombIndexToNodeMap.TryGetValue(prevModCombIndex, out newNode))
                    {
                        newNode.AddPrevNodeIndex(prevNodeIndex);
                    }
                    else
                    {
                        modCombIndexToNodeMap.Add(prevModCombIndex, new Node(prevModCombIndex, prevNodeIndex));
                    }

                    // modified edge
                    foreach (var modIndex in modIndices)
                    {
                        var modCombIndex = ModificationParams.GetModificationCombinationIndex(
                            prevNode.ModificationCombinationIndex, modIndex);
                        if (modCombIndex < 0)   // too many modifications
                        {
                            continue;
                        }
                        if (modCombIndexToNodeMap.TryGetValue(modCombIndex, out newNode))
                        {
                            newNode.AddPrevNodeIndex(prevNodeIndex);
                        }
                        else
                        {
                            modCombIndexToNodeMap.Add(modCombIndex, new Node(modCombIndex, prevNodeIndex));
                        }
                    }
                    _graph[_index] = modCombIndexToNodeMap.Values.ToArray();
                }
            }

            return(true);
        }
Example #35
0
 /// <summary>
 /// Build a sequence from the supplied character sequence, using the provided amino acid set
 /// </summary>
 /// <param name="sequence"></param>
 /// <param name="aminoAcidSet"></param>
 public Sequence(string sequence, AminoAcidSet aminoAcidSet) : this(sequence.Select(aminoAcidSet.GetAminoAcid))
 {
 }
Example #36
0
 public static AminoAcidSet GetStandardAminoAcidSetWithCarboamidomethylCys()
 {
     return(_standardAminoAcidSetWithCarboamidomethylCys ??
            (_standardAminoAcidSetWithCarboamidomethylCys = new AminoAcidSet(Modification.Carbamidomethylation)));
 }