Beispiel #1
0
 public IcRescorer(string specFilePath, string icResultFilePath, string outputFilePath, AminoAcidSet aaSet, Tolerance tolerance, double ms2CorrThreshold = 0.7
     , int minProductIonCharge = 1, int maxProductIonCharge = 10)
 {
     var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826);
     _topDownScorer = new InformedTopDownScorer(run, aaSet, minProductIonCharge, maxProductIonCharge, tolerance, ms2CorrThreshold);
     Rescore(icResultFilePath, outputFilePath);
 }
 public FeatureBasedTagSearchEngine(
     LcMsRun run,
     Ms1FtParser featureParser,
     ProductScorerBasedOnDeconvolutedSpectra ms2Scorer,
     SequenceTagParser tagParser,
     FastaDatabase fastaDb,
     Tolerance tolerance,
     AminoAcidSet aaSet,
     double maxSequenceMass = 50000.0,
     int minProductIonCharge = 1,
     int maxProductIonCharge = 20)
 {
     _run = run;
     _ms2Scorer = ms2Scorer;
     _featureParser = featureParser;
     _ms1FtFilter = new Ms1FtFilter(run, tolerance, featureParser.Ms1FtFileName);
     _tagParser = tagParser;
     _fastaDb = fastaDb;
     _searchableDb = new SearchableDatabase(fastaDb);
     _tolerance = tolerance;
     _aaSet = aaSet;
     _maxSequenceMass = maxSequenceMass;
     _minProductIonCharge = minProductIonCharge;
     _maxProductIonCharge = maxProductIonCharge;
 }
        public SequenceTagIndexFinder(Tolerance tolerance, int minCharge, int maxCharge)
        {
            _tolerance = tolerance;
            _minCharge = minCharge;
            _maxCharge = maxCharge;

        }
Beispiel #4
0
 public new Xic GetFullProductExtractedIonChromatogram(double mz, Tolerance tolerance, double precursorIonMz)
 {
     var tolTh = tolerance.GetToleranceAsTh(mz);
     var minMz = mz - tolTh;
     var maxMz = mz + tolTh;
     return GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorIonMz);
 }
        public static Spectrum GetDeconvolutedSpectrum(Spectrum spec, int minCharge, int maxCharge, Tolerance tolerance, double corrThreshold,
                                                       int isotopeOffsetTolerance, double filteringWindowSize = 1.1)
        {
            var deconvolutedPeaks = Deconvoluter.GetDeconvolutedPeaks(spec, minCharge, maxCharge, isotopeOffsetTolerance, filteringWindowSize, tolerance, corrThreshold);
            var peakList = new List<Peak>();
            var binHash = new HashSet<int>();
            foreach (var deconvolutedPeak in deconvolutedPeaks)
            {
                var mass = deconvolutedPeak.Mass;
                var binNum = GetBinNumber(mass);
                if (!binHash.Add(binNum)) continue;
                peakList.Add(new Peak(mass, deconvolutedPeak.Intensity));
            }

            var productSpec = spec as ProductSpectrum;
            if (productSpec != null)
            {
                return new ProductSpectrum(peakList, spec.ScanNum)
                {
                    MsLevel = spec.MsLevel,
                    ActivationMethod = productSpec.ActivationMethod,
                    IsolationWindow = productSpec.IsolationWindow
                };
            }

            return new Spectrum(peakList, spec.ScanNum);
        }
 /// <summary>
 /// Gets the extracted ion chromatogram of the specified m/z (using only MS1 spectra)
 /// </summary>
 /// <param name="mz">target m/z</param>
 /// <param name="tolerance">tolerance</param>
 /// <returns>XIC as a list of XICPeaks</returns>
 public IList<XicPeak> GetExtractedIonChromatogram(double mz, Tolerance tolerance)
 {
     var tolTh = tolerance.GetToleranceAsTh(mz);
     var minMz = mz - tolTh;
     var maxMz = mz + tolTh;
     return GetExtractedIonChromatogram(minMz, maxMz);
 }
        public ScanBasedTagSearchEngine(
            LcMsRun run,
            ISequenceTagFinder seqTagFinder,
            LcMsPeakMatrix featureFinder,
            FastaDatabase fastaDb,
            Tolerance tolerance,
            AminoAcidSet aaSet,
            CompositeScorerFactory ms2ScorerFactory = null,
            int minMatchedTagLength = DefaultMinMatchedTagLength,
            double maxSequenceMass = 50000.0,
            int minProductIonCharge = 1,
            int maxProductIonCharge = 20)
        {
            _run = run;
            _featureFinder = featureFinder;
            
            _searchableDb = new SearchableDatabase(fastaDb);

            _tolerance = tolerance;
            _aaSet = aaSet;
            _minMatchedTagLength = minMatchedTagLength;
            _maxSequenceMass = maxSequenceMass;
            _minProductIonCharge = minProductIonCharge;
            _maxProductIonCharge = maxProductIonCharge;
            MinScan = int.MinValue;
            MaxScan = int.MaxValue;
            _ms2ScorerFactory = ms2ScorerFactory;
            _seqTagFinder = seqTagFinder;
        }
Beispiel #8
0
        public Ms1FtFilter(LcMsRun run, Tolerance massTolerance, string ms1FtFileName, double minLikelihoodRatio = 0)
        {
            _lcMsChargeMap = new LcMsChargeMap(run, massTolerance);
            _minLikelihoodRatio = minLikelihoodRatio;

            Read(ms1FtFileName);
        }
Beispiel #9
0
 public IEnumerable<int> GetMatchingMs2ScanNums(double sequenceMass, Tolerance tolerance, LcMsRun run)
 {
     var massBinNum = GetBinNumber(sequenceMass);
     IEnumerable<int> ms2ScanNums;
     if (_sequenceMassBinToScanNumsMap.TryGetValue(massBinNum, out ms2ScanNums)) return ms2ScanNums;
     return new int[0];
 }
Beispiel #10
0
 public static IList<Peak> FindAllPeaks(List<Peak> peakList, double mz, Tolerance tolerance)
 {
     var tolTh = tolerance.GetToleranceAsTh(mz);
     var minMz = mz - tolTh;
     var maxMz = mz + tolTh;
     return FindAllPeaks(peakList, minMz, maxMz);
 }
Beispiel #11
0
        public IcTopDownLauncher(
            string specFilePath,
            string dbFilePath,
            string outputDir,
            AminoAcidSet aaSet,
            string featureFilePath = null)
        {
            ErrorMessage = string.Empty;

            SpecFilePath = specFilePath;
            DatabaseFilePath = dbFilePath;
            AminoAcidSet = aaSet;
            OutputDir = outputDir;

            FeatureFilePath = featureFilePath;

            MinSequenceLength = 21;
            MaxSequenceLength = 300;
            MaxNumNTermCleavages = 1;
            MaxNumCTermCleavages = 0;
            MinPrecursorIonCharge = 2;
            MaxPrecursorIonCharge = 60;
            MinProductIonCharge = 1;
            MaxProductIonCharge = 20;
            MinSequenceMass = 2000.0;
            MaxSequenceMass = 50000.0;
            PrecursorIonTolerance = new Tolerance(10);
            ProductIonTolerance = new Tolerance(10);
            RunTargetDecoyAnalysis = DatabaseSearchMode.Both;
            SearchMode = InternalCleavageType.SingleInternalCleavage;
            MaxNumThreads = 4;
            ScanNumbers = null;
            NumMatchesPerSpectrum = 3;
            TagBasedSearch = true;
        }
Beispiel #12
0
        public void TestFeatureAlignment()
        {
            const string outFilePath = @"\\protoapps\UserData\Jungkap\Lewy\aligned\promex_crosstab_temp.tsv";
            
            
            //CPTAC_Intact_CR32A_24Aug15_Bane_15-02-06-RZ
            var prsmReader = new ProteinSpectrumMatchReader();
            var tolerance = new Tolerance(10);
            var alignment = new LcMsFeatureAlignment(new AnalysisCompRef.CompRefFeatureComparer(tolerance));

            for (var i = 0; i < NdataSet; i++)
            {
                var rawFile = string.Format(@"{0}\{1}.pbf", PbfPath, GetDataSetNames(i));
                var mspFile = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder, GetDataSetNames(i));
                var mspFile2 = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder2, GetDataSetNames(i));
                var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", Ms1FtFolder, GetDataSetNames(i));
                Console.WriteLine(rawFile);
                var run = PbfLcMsRun.GetLcMsRun(rawFile);
                var prsmList1 = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder);
                var prsmList2 = prsmReader.LoadIdentificationResult(mspFile2, ProteinSpectrumMatch.SearchTool.MsPathFinder);
                prsmList1.AddRange(prsmList2);
                
                var prsmList = MergePrsm(prsmList1);
                var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run);

                for (var j = 0; j < prsmList.Count; j++)
                {
                    var match = prsmList[j];
                    match.ProteinId = match.ProteinName;
                }

                // tag features by PrSMs
                for (var j = 0; j < features.Count; j++)
                {
                    //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i);
                    var massTol = tolerance.GetToleranceAsTh(features[j].Mass);
                    foreach (var match in prsmList)
                    {
                        if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol)
                        {
                            features[j].ProteinSpectrumMatches.Add(match);
                        }
                    }
                }

                alignment.AddDataSet(i, features, run);
            }

            alignment.AlignFeatures();

            Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures);
            
            for (var i = 0; i < NdataSet; i++)
            {
                alignment.FillMissingFeatures(i);
                Console.WriteLine("{0} has been processed", GetDataSetNames(i));
            }
            
            OutputCrossTabWithId(outFilePath, alignment);
        }
 public PrecursorOffsetFrequency(int reducedCharge, float offset, float frequency, Tolerance tolerance)
 {
     ReducedCharge = reducedCharge;
     Offset = offset;
     Frequency = frequency;
     Tolerance = tolerance;
 }
Beispiel #14
0
        public List<List<MSDeconvNode>> GetClustersList(Tolerance tol, double elutionInterval,List<MSDeconvNode> nodeList)
        {
            var edgeDict = GetEdges(nodeList,tol,elutionInterval);
            
            var connectedComponents = new List<List<MSDeconvNode>>();
            var nodesInComponenets = new Dictionary<MSDeconvNode,int>();

            for (int i = 0; i < nodeList.Count; i++)
            {
                if (!nodesInComponenets.ContainsKey(nodeList[i]))
                {
                    connectedComponents.Add(GetConnectedComponenet(nodeList[i],edgeDict,nodesInComponenets));
                }           
            }

            /**foreach (var c in connectedComponents)
            {
                Console.WriteLine("Connected Componenet Elements:");
                foreach (var n in c)
                {
                    Console.WriteLine("{0} {1}", n.ScanNumber,n.RealMonoMass);
                }
                Console.WriteLine();
            }*/

            return connectedComponents;
        }
Beispiel #15
0
 public MatchedPeakPostScorer(Tolerance tolerance, int minCharge, int maxCharge)
 {
     _tolerance = tolerance;
     _minCharge = minCharge;
     _maxCharge = maxCharge;
     _rankingInfo = new Dictionary<int, int[]>();
 }
Beispiel #16
0
 public ScoredSpectrum(Spectrum spec, RankScore scorer, int charge, double massWithH2O, Tolerance tolerance)
 {
     _rankedSpec = new RankedSpectrum(spec);
     _scorer = scorer;
     _charge = charge;
     _sequenceMass = massWithH2O;
     _tolerance = tolerance;
 }
Beispiel #17
0
        public MsDeconvFilter(LcMsRun run, Tolerance massTolerance, string msDeconvFileName)
        {
            _run = run;
            _massTolerance = massTolerance;
            _lcMsMatchMap = new LcMsMatchMap();

            Read(msDeconvFileName);
        }
 public FitBasedLogLikelihoodRatioScorer(ProductSpectrum ms2Spec, Tolerance tolerance, int minCharge, int maxCharge)
  {
      _ms2Spec = ms2Spec;
      _tolerance = tolerance;
      _minCharge = minCharge;
      _maxCharge = maxCharge;
      _baseIonTypes = ms2Spec.ActivationMethod != ActivationMethod.ETD ? BaseIonTypesCID : BaseIonTypesETD;
  }
Beispiel #19
0
        /// <summary>
        /// Finds the maximum intensity peak within the specified range
        /// </summary>
        /// <param name="mz">m/z</param>
        /// <param name="tolerance">tolerance</param>
        /// <returns>maximum intensity peak</returns>
        public Peak FindPeak(double mz, Tolerance tolerance)
        {
            var tolTh = tolerance.GetToleranceAsTh(mz);
            var minMz = mz - tolTh;
            var maxMz = mz + tolTh;

            return FindPeak(minMz, maxMz);
        }
Beispiel #20
0
 public static List<DeconvolutedPeak> GetDeconvolutedPeaks(
     Spectrum spec, int minCharge, int maxCharge,
     int isotopeOffsetTolerance, double filteringWindowSize,
     Tolerance tolerance, double corrScoreThreshold)
 {
     return GetDeconvolutedPeaks(spec.Peaks, minCharge, maxCharge, isotopeOffsetTolerance, filteringWindowSize,
         tolerance, corrScoreThreshold);
 }
Beispiel #21
0
        public FeatureSet GetFeatures(double mz, Tolerance tolerance, DataReader.FrameType frameType)
        {
            var intensityBlock = _uimfUtil.GetXic(mz, tolerance.GetValue(), frameType, 
                tolerance.GetUnit() == ToleranceUnit.Ppm ? DataReader.ToleranceType.PPM : DataReader.ToleranceType.Thomson);
            var features = new FeatureSet(intensityBlock);

            return features;
        }
 public IsotopeIntensityCorrelationScoreTrainerUsingMgfFile(List<MSMSSpectrum> spectra, Dictionary<GroupParameter, List<IonType>> ionTypes, Tolerance tolerance, int maxCharge)
 {
     _spectra = spectra;
     _ionTypes = ionTypes;
     _tolerance = tolerance;
     _maxCharge = maxCharge;
     IsotopeIntensityCorrProbDictionary = new Dictionary<GroupParameter, Dictionary<IonType, Dictionary<int, double>>>();
 }
        public IonTypeTrainerUsingMgfFile(List<MSMSSpectrum> spectra, Tolerance tolerance, int maxCharge)
        {
            _spectra = spectra;
            _tolerance = tolerance;
            _maxCharge = maxCharge;
            _ionFrequencyFunction = new Dictionary<GroupParameter, Dictionary<IonType, double>>();
            IonTypes = new Dictionary<GroupParameter, List<IonType>>();

        }
Beispiel #24
0
 public InformedTopDownScorer(LcMsRun run, AminoAcidSet aaSet, int minProductCharge, int maxProductCharge, Tolerance tolerance, double ms2CorrThreshold = 0.7)
 {
     Run = run;
     AminoAcidSet = aaSet;
     MinProductCharge = minProductCharge;
     MaxProductCharge = maxProductCharge;
     Tolerance = tolerance;
     Ms2CorrThreshold = ms2CorrThreshold;
 }
Beispiel #25
0
 public MatchedTagSet(string sequence, 
     AminoAcidSet aminoAcidSet, Tolerance tolerance, Tolerance relaxedTolerance)
 {
     _sequence = sequence;
     _aminoAcidSet = aminoAcidSet;
     _tolerance = tolerance;
     _relaxedTolerance = relaxedTolerance;
     _tags = new List<MatchedTag>();
 }
Beispiel #26
0
        public IsosFilter(LcMsRun run, Tolerance massTolerance, string isosFileName, double fitScoreThreshold = 1.0)
        {
            _run = run;
            _massTolerance = massTolerance;
            _fitScoreThreshold = fitScoreThreshold;
            _lcMsMatchMap = new LcMsMatchMap();

            Read(isosFileName);
        }
Beispiel #27
0
        public void AddMostAbundantIsotopePeakIntensity()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string rawFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TestYufengData\QC_ShewIntact_40K_LongSeparation_1_141016155143.raw";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + rawFilePath);
            }

            var run = PbfLcMsRun.GetLcMsRun(rawFilePath);

            const string resultFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TestYufengData\QC_ShewIntact_40K_LongSeparation_1_141016155143_IcTda.tsv";

            var parser = new TsvFileParser(resultFilePath);
            var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray();
            var scanNums = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
            var charges = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray();
            var precursorIntensities = new double[parser.NumData];
            var tolerance = new Tolerance(10);
            for (var i = 0; i < parser.NumData; i++)
            {
                var scanNum = scanNums[i];
                var composition = compositions[i];
                var charge = charges[i];
                var precursorIon = new Ion(composition, charge);

                var precursorScanNum = run.GetPrecursorScanNum(scanNum);
                var precursorSpec = run.GetSpectrum(precursorScanNum);
                var isotopePeaks = precursorSpec.GetAllIsotopePeaks(precursorIon, tolerance, 0.1);
                if (isotopePeaks != null)
                {
                    var maxIntensity = 0.0;
                    for (var j = 0; j < isotopePeaks.Length; j++)
                    {
                        if (isotopePeaks[j] != null && isotopePeaks[j].Intensity > maxIntensity)
                            maxIntensity = isotopePeaks[j].Intensity;
                    }
                    precursorIntensities[i] = maxIntensity;
                }
            }

            // Writing
            const string newResultFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TestYufengData\QC_ShewIntact_40K_LongSeparation_1_141016155143_IcTdaWithIntensities.tsv";
            using (var writer = new StreamWriter(newResultFilePath))
            {
                writer.WriteLine(string.Join("\t", parser.GetHeaders())+"\t"+"PrecursorIntensity");
                for (var i = 0; i < parser.NumData; i++)
                {
                    writer.WriteLine(parser.GetRows()[i]+"\t"+precursorIntensities[i]);
                }
            }
            Console.WriteLine("Done");
        }
 public RatioScoreTrainerUsingMgfFile(List<MSMSSpectrum> spectra, Dictionary<GroupParameter, List<IonType>> ionTypes, Tolerance tolerance, int maxCharge)
 {
     _spectra = spectra;
     _ionTypes = ionTypes;
     _tolerance = tolerance;
     _maxCharge = maxCharge;
     RatioProbDictionary = new Dictionary<GroupParameter, Dictionary<Tuple<IonType, IonType>, Dictionary<int, double>>>();
     NoIonProbDictionary = new Dictionary<GroupParameter, double>();
     _allNumberDictionary = new Dictionary<GroupParameter, double>();
 }
 public InformedBottomUpScorer(LcMsRun run, AminoAcidSet aaSet, int minProductCharge, int maxProductCharge, Tolerance tolerance)
 {
     Run = run;
     AminoAcidSet = aaSet;
     MinProductCharge = minProductCharge;
     MaxProductCharge = maxProductCharge;
     Tolerance = tolerance;
     _rankScorer = new RankScore(ActivationMethod.HCD, Ms2DetectorType.Orbitrap, Enzyme.Trypsin, Protocol.Standard);
     _scoredSpectra = new Dictionary<int, ScoredSpectrum>();
 }
Beispiel #30
0
        public static DeconvolutedSpectrum GetDeconvolutedSpectrum(
                    Spectrum spec, int minCharge, int maxCharge,
                    int isotopeOffsetTolerance, double filteringWindowSize,
                    Tolerance tolerance, double corrScoreThreshold = 0.7)
        {
            var peaks = GetDeconvolutedPeaks(spec.Peaks, minCharge, maxCharge, isotopeOffsetTolerance, filteringWindowSize,
                tolerance, corrScoreThreshold);

            return new DeconvolutedSpectrum(spec, peaks.ToArray());
        }