public IsotopicProfile CreateIsotopicProfileFromEmpiricalFormula(string baseEmpiricalFormula, string elementLabelled, int lightIsotope, int heavyIsotope, double percentHeavyLabel, int chargeState = 1) { var isUnlabelled = elementLabelled == "" || percentHeavyLabel == 0; IsotopicProfile iso; if (isUnlabelled) { iso = IsotopicDistributionCalculator.Instance.GetIsotopePattern(baseEmpiricalFormula); } else { var abundanceLightIsotopeLabeled1 = CalculateAbundanceLightIsotope(elementLabelled, lightIsotope, percentHeavyLabel); var abundanceHeavyIsotopeLabeled1 = CalculateAbundanceHeavyIsotope(elementLabelled, heavyIsotope, percentHeavyLabel); IsotopicDistributionCalculator.Instance.SetLabeling(elementLabelled, lightIsotope, abundanceLightIsotopeLabeled1, heavyIsotope, abundanceHeavyIsotopeLabeled1); iso = IsotopicDistributionCalculator.Instance.GetIsotopePattern(baseEmpiricalFormula); IsotopicDistributionCalculator.Instance.ResetToUnlabeled(); } var monoisotopicMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(baseEmpiricalFormula); iso.MonoIsotopicMass = monoisotopicMass; CalculateMZValuesForLabeledProfile(iso, baseEmpiricalFormula, elementLabelled, chargeState, lightIsotope, heavyIsotope); iso.ChargeState = chargeState; return(iso); }
public List <IqTarget> CreateTargets(IEnumerable <string> empiricalFormulaList, double minMZObs = 400, double maxMZObserved = 1500) { var targetIDCounter = 0; var targetList = new List <IqTarget>(); foreach (var formula in empiricalFormulaList) { IqTarget parentTarget = new IqTargetBasic(); parentTarget.EmpiricalFormula = formula; parentTarget.ID = targetIDCounter++; parentTarget.MonoMassTheor = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(parentTarget.EmpiricalFormula); parentTarget.ElutionTimeTheor = 0.5; parentTarget.ChargeState = 0; //this is the neutral mass var childTargets = CreateChargeStateTargets(parentTarget, minMZObs, maxMZObserved); parentTarget.AddTargetRange(childTargets); targetList.Add(parentTarget); } return(targetList); }
public void GetMonoisotopicMassForEmpiricalFormulaWithIronTest1() { var formula = "C145 H208 N39 O40 S2 Fe1"; var monomass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(formula); Assert.IsTrue(monomass > 0); Console.WriteLine("monoisotopic mass= \t" + monomass); }
public bool CheckSequenceIntegrity(string sequence) { var ptmMass = PtmMassFromCode(sequence); var sequenceFormula = SequenceToEmpiricalFormula(sequence); var sequenceMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(sequenceFormula); if ((ptmMass < 0) && (Math.Abs(ptmMass) > (sequenceMass / 2))) { return(false); } return(true); }
public void GetMonoisotopicMassFromEmpiricalFormulaTest1() { var testPeptide = "SAMPLER"; var peptideUtils = new PeptideUtils(); var formula = peptideUtils.GetEmpiricalFormulaForPeptideSequence(testPeptide); var monomass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(formula); Assert.IsTrue(monomass > 0); Console.WriteLine("SAMPLER monoisotopic mass= \t" + monomass); Assert.AreEqual(802.40072m, (decimal)Math.Round(monomass, 5)); //note that Peptide Util reports 802.40071, as does MacCoss's lab: http://proteome.gs.washington.edu/cgi-bin/aa_calc.pl }
public override IsotopicProfile GenerateTheorProfile(string empiricalFormula, int chargeState) { var iso = _isotopicDistCalculator.GetIsotopePattern(empiricalFormula); iso.ChargeState = chargeState; var monoMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula); if (chargeState != 0) { CalculateMassesForIsotopicProfile(iso, monoMass, chargeState); } return(iso); }
public List <IqTarget> Import() { var targets = GetMassTagDataFromDb(); if (IsEmpiricalFormulaExtracted) { GetModDataFromDb(_targetsContainingMods); var peptideUtils = new PeptideUtils(); foreach (IqTargetDms iqTarget in targets) { var baseEmpiricalFormula = peptideUtils.GetEmpiricalFormulaForPeptideSequence(iqTarget.Code); if (!string.IsNullOrEmpty(iqTarget.ModDescription)) { var target = iqTarget; var mods = (from n in _massTagModData where n.Item1 == target.ID select n); foreach (var tuple in mods) { var modString = tuple.Item4; try { baseEmpiricalFormula = EmpiricalFormulaUtilities.AddFormula(baseEmpiricalFormula, modString); } catch (Exception ex) { IqLogger.Log.Debug("Failed to calculate empirical formula for the Target " + target.ID + " (" + ex.Message + ")" + "; Having trouble with the mod: " + modString + "; This Target was NOT imported!!"); } } } iqTarget.EmpiricalFormula = baseEmpiricalFormula; if (IsMonoMassCalculatedFromEmpiricalFormula) { iqTarget.MonoMassTheor = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(iqTarget.EmpiricalFormula); } } } return(targets); }
public void AveragineRoundTripTest() { var parser = new IqCodeParser(); var ptmDouble = -11849.17; var ptmMass = "[" + ptmDouble.ToString() + "]"; // this is done just for formatting of the function call below var absPtmDouble = Math.Abs(ptmDouble); //this is done because the emperical formula returns a positive amount of atoms (not negative) //so in the assert comparison we have to use the positive mass value var empiricalFormula = parser.GetEmpiricalFormulaFromSequence(ptmMass); Console.WriteLine("This is my emperical formula:" + empiricalFormula + ":"); var returnedMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula); Console.WriteLine(empiricalFormula); Console.WriteLine(returnedMass); Assert.AreEqual(absPtmDouble, returnedMass, .0001); }
public void ProteinSequenceToMassHugePTMTest() { var parser = new IqCodeParser(); var proteoform = "M.V(HLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVC)[-11849.17]VLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH."; var trueMass = 4008.08; // only one significant decimal really. could be 4008.07 or .08 Can't tell yet var unmodifiedProteoform = "M.VHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH."; var ptm = "[-11849.17]"; var proteoformComposition = parser.GetEmpiricalFormulaFromSequence(proteoform); var unmodifiedProteoformComposition = parser.GetEmpiricalFormulaFromSequence(unmodifiedProteoform); var ptmComposition = parser.GetEmpiricalFormulaFromSequence(ptm); var difference = EmpiricalFormulaUtilities.SubtractFormula(unmodifiedProteoformComposition, ptmComposition); Console.WriteLine(proteoformComposition); Console.WriteLine(difference); Assert.AreEqual(proteoformComposition, difference); var differenceMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(difference); var proteformMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(proteoformComposition); var unmodifiedProteoformMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(unmodifiedProteoformComposition); Console.WriteLine(unmodifiedProteoformMass); var ptmMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(ptmComposition); Console.WriteLine(ptmMass); var conversionFirst = unmodifiedProteoformMass - ptmMass; Console.WriteLine(conversionFirst); Assert.AreEqual(trueMass, conversionFirst, 0.1); Assert.AreEqual(trueMass, differenceMass, .1); Assert.AreEqual(trueMass, proteformMass, .1); }
public void AddPhosphorylationTest1() { const string testPeptide = "SAMPLER"; var peptideUtils = new PeptideUtils(); var formula = peptideUtils.GetEmpiricalFormulaForPeptideSequence(testPeptide); const string phosphorylationMod = "HPO3"; var empiricalFormula = EmpiricalFormulaUtilities.AddFormula(formula, phosphorylationMod); var massUnmodified = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(formula); var massModified = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula); var diff = Math.Round(massModified - massUnmodified, 1, MidpointRounding.AwayFromZero); Console.WriteLine(formula + "\t" + massUnmodified); Console.WriteLine(empiricalFormula + "\t" + massModified); Console.WriteLine("diff= " + diff); Assert.AreEqual(80.0, diff); }
public void PyroglutamateTest1() { const string testPeptide = "SAMPLER"; var peptideUtils = new PeptideUtils(); var formula = peptideUtils.GetEmpiricalFormulaForPeptideSequence(testPeptide); const string pyroglutamateMod = "H3N1"; var empiricalFormula = EmpiricalFormulaUtilities.SubtractFormula(formula, pyroglutamateMod); var massUnmodified = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(formula); var massModified = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula); var diff = Math.Round(massModified - massUnmodified, 1, MidpointRounding.AwayFromZero); Console.WriteLine(formula + "\t" + massUnmodified); Console.WriteLine(empiricalFormula + "\t" + massModified); Console.WriteLine("diff= " + diff); Assert.AreEqual(-17.0, diff); }
public void AddAcetylationTest1() { var testPeptide = "SAMPLER"; var peptideUtils = new PeptideUtils(); var formula = peptideUtils.GetEmpiricalFormulaForPeptideSequence(testPeptide); var acetylationFormula = "C2H2O"; var empiricalFormula = EmpiricalFormulaUtilities.AddFormula(formula, acetylationFormula); var massUnmodified = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(formula); var massModified = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula); var diff = Math.Round(massModified - massUnmodified, 1, MidpointRounding.AwayFromZero); Console.WriteLine(formula + "\t" + massUnmodified); Console.WriteLine(empiricalFormula + "\t" + massModified); Console.WriteLine("diff= " + diff); Assert.AreEqual(42.0, diff); }
public void TempTest1() { var code = "FEQDGENYTGTIDGNMGAYAR"; var peptideUtils = new PeptideUtils(); var empiricalFormula = peptideUtils.GetEmpiricalFormulaForPeptideSequence(code); var monomass = peptideUtils.GetMonoIsotopicMassForPeptideSequence(code); var mztheo = monomass / 2 + DeconTools.Backend.Globals.PROTON_MASS; Console.WriteLine(monomass + "\t" + mztheo); var formula = EmpiricalFormulaUtilities.ParseEmpiricalFormulaString("H(3) C(2) N O"); var revisedFormula = EmpiricalFormulaUtilities.GetEmpiricalFormulaFromElementTable(formula); var iodoMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(revisedFormula); Console.WriteLine("iodomass= " + iodoMass); }
public virtual void UpdateTargetMissingInfo(IqTarget target, bool calcAveragineForMissingEmpiricalFormula = true, bool cysteinesAreModified = false) { var isMissingMonoMass = target.MonoMassTheor <= 0; if (String.IsNullOrEmpty(target.EmpiricalFormula)) { if (!String.IsNullOrEmpty(target.Code)) { //Create empirical formula based on code. Assume it is an unmodified peptide //target.EmpiricalFormula = _peptideUtils.GetEmpiricalFormulaForPeptideSequence(target.Code); target.EmpiricalFormula = IqCodeParser.GetEmpiricalFormulaFromSequence(target.Code, cysteinesAreModified); } else { if (isMissingMonoMass) { throw new ApplicationException( "Trying to fill in missing data on target, but Target is missing both the 'Code' and the Monoisotopic Mass. One or the other is needed."); } target.Code = "AVERAGINE"; target.EmpiricalFormula = IsotopicDistributionCalculator.GetAveragineFormulaAsString(target.MonoMassTheor); } } if (isMissingMonoMass) { target.MonoMassTheor = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(target.EmpiricalFormula); } if (target.ChargeState != 0) { target.MZTheor = target.MonoMassTheor / target.ChargeState + DeconTools.Backend.Globals.PROTON_MASS; } }
public TargetCollection Import(List <int> TargetIDsToFilterOn) { var filterOnTargetIDs = TargetIDsToFilterOn != null && TargetIDsToFilterOn.Count > 0; var data = new TargetCollection(); using (var reader = new StreamReader(m_filename)) { var headerLine = reader.ReadLine(); //first line is the header line. _headers = processLine(headerLine); var lineCounter = 1; while (reader.Peek() != -1) { var line = reader.ReadLine(); lineCounter++; var lineData = processLine(line); PeptideTarget massTag; try { massTag = convertTextToMassTag(lineData); if (filterOnTargetIDs) { if (!TargetIDsToFilterOn.Contains(massTag.ID)) { continue; } } } catch (Exception ex) { var msg = "Importer failed. Error reading line: " + lineCounter.ToString() + "\nDetails: " + ex.Message; throw new Exception(msg); } if (!massTag.ContainsMods && String.IsNullOrEmpty(massTag.EmpiricalFormula)) { massTag.EmpiricalFormula = massTag.GetEmpiricalFormulaFromTargetCode(); } var massTagMassInfoMissing = (Math.Abs(massTag.MonoIsotopicMass - 0) < double.Epsilon); var chargeStateInfoIsAvailable = massTag.ChargeState != 0; if (massTagMassInfoMissing) { if (!String.IsNullOrEmpty(massTag.EmpiricalFormula)) { massTag.MonoIsotopicMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula( massTag.EmpiricalFormula); if (chargeStateInfoIsAvailable) { massTag.MZ = massTag.MonoIsotopicMass / massTag.ChargeState + Globals.PROTON_MASS; } } } if (!chargeStateInfoIsAvailable) { double minMZToConsider = 400; double maxMZToConsider = 1500; var targetList = new List <PeptideTarget>(); for (var chargeState = 1; chargeState < 100; chargeState++) { var calcMZ = massTag.MonoIsotopicMass / chargeState + Globals.PROTON_MASS; if (calcMZ > minMZToConsider && calcMZ < maxMZToConsider) { var copiedMassTag = new PeptideTarget(massTag); //we need to create multiple mass tags copiedMassTag.ChargeState = (short)chargeState; copiedMassTag.MZ = calcMZ; targetList.Add(copiedMassTag); } } data.TargetList.AddRange(targetList.Take(3)); } else { data.TargetList.Add(massTag); } } } foreach (PeptideTarget peptideTarget in data.TargetList) { //bool noNormalizedElutionTimeInfoAvailable = Math.Abs(peptideTarget.NormalizedElutionTime - -1) < Single.Epsilon; //if (noNormalizedElutionTimeInfoAvailable) //{ // peptideTarget.NormalizedElutionTime = 0.5f; //} } return(data); }
public void TestGetEmpiricalFormulaForModifiedPeptideSequences() { var pyroglutamateCodes = new Dictionary <string, string> { { "M.QVYAMRRLKQWLVGSYQTDNSAFVPYDRTLLWFTFGLAVVGFVMVTSASMPVGQRLAEDPFLFAKRDGIYMIVALCLALVTMRVPMAVWQRYSSLMLFGSILLLLVVLAVGSSVNGASRWIAFGPLRIQPAELSKLALFCYLSSYLVRKVEEVRNNFWGFCKPMGVMLILAVLLLLQPDLGTVVVLFVTTLALLFLAGAKIWQFLAIIGTGIAAVVMLIIVEPYRVRRITSFLEPWEDPFGSGYQLTQSLMAFGRGDLLGQGLGNSVQKLEYLPEAHTDFIFSILAEELGYIGVVLVLLMVFFIAFRAMQIGRRALLLDQRFSGFLACSIGIWFTFQTLVNVGAAAGML.P", "M.(Q)[-17.03]VYAMRRLKQWLVGSYQTDNSAFVPYDRTLLWFTFGLAVVGFVMVTSASMPVGQRLAEDPFLFAKRDGIYMIVALCLALVTMRVPMAVWQRYSSLMLFGSILLLLVVLAVGSSVNGASRWIAFGPLRIQPAELSKLALFCYLSSYLVRKVEEVRNNFWGFCKPMGVMLILAVLLLLQPDLGTVVVLFVTTLALLFLAGAKIWQFLAIIGTGIAAVVMLIIVEPYRVRRITSFLEPWEDPFGSGYQLTQSLMAFGRGDLLGQGLGNSVQKLEYLPEAHTDFIFSILAEELGYIGVVLVLLMVFFIAFRAMQIGRRALLLDQRFSGFLACSIGIWFTFQTLVNVGAAAGML.P" }, { ".QFHIYHSNQLSLLKSLMVHFMQNRPLSSPFEQEVILVQSPGMSQWLQIQLAESLGIAANIRYPLPATFIWEMFTRVLSGIPKESAFSKDAMTWKLMALLPNYLDDPAFKPLRHYLKDDEDKRKLHQLAGRVADLFDQYLVYRPDWLSAWENDQLIDGLSDNQYWQKTLWLALQRYTEDLAQPKWHRANLYQQFISTLNDAPVGALAHCFPSRIFICGISALPQVYLQALQAIGRHTEIYLLFTNPCRYYWGDIQDPKFLARLNSRKPRHYQQLHELPWFKDEQNASTLFNEEGEQNVGNPLLASWGKLGKDNLYFLSELEYSDVLDAFVDIPRD.N", ".(Q)[-17.03]FHIYHSNQLSLLKSLMVHFMQNRPLSSPFEQEVILVQSPGMSQWLQIQLAESLGIAANIRYPLPATFIWEMFTRVLSGIPKESAFSKDAMTWKLMALLPNYLDDPAFKPLRHYLKDDEDKRKLHQLAGRVADLFDQYLVYRPDWLSAWENDQLIDGLSDNQYWQKTLWLALQRYTEDLAQPKWHRANLYQQFISTLNDAPVGALAHCFPSRIFICGISALPQVYLQALQAIGRHTEIYLLFTNPCRYYWGDIQDPKFLARLNSRKPRHYQQLHELPWFKDEQNASTLFNEEGEQNVGNPLLASWGKLGKDNLYFLSELEYSDVLDAFVDIPRD.N" }, { ".QRFKLWVFISLCLHASLVAAAILYVVEDKPIAPEPISIQMLAFAADEPVGEPEPVVEEVTPPEPEPVVEPEPEPEPEPIPDVKPVIEKPIEKKPEPKPKPKPKPVEKPKPPVERPQQQPLA.L", ".(QRFKLW)[-17.03]VFISLCLHASLVAAAILYVVEDKPIAPEPISIQMLAFAADEPVGEPEPVVEEVTPPEPEPVVEPEPEPEPEPIPDVKPVIEKPIEKKPEPKPKPKPKPVEKPKPPVERPQQQPLA.L" } }; var acetylationCodes = new Dictionary <string, string> { { "M.SVYAMRRLKQWLVGSYQTDNSAFVPYDRTLLWFTFGLAVVGFVMVTSASMPVGQRLAEDPFLFAKRDGIYMIVALCLALVTMRVPMAVWQRYSSLMLFGSILLLLVVLAVGSSVNGASRWIAFGPLRIQPAELSKLALFCYLSSYLVRKVEEVRNNFWGFCKPMGVMLILAVLLLLQPDLGTVVVLFVTTLALLFLAGAKIWQFLAIIGTGIAAVVMLIIVEPYRVRRITSFLEPWEDPFGSGYQLTQSLMAFGRGDLLGQGLGNSVQKLEYLPEAHTDFIFSILAEELGYIGVVLVLLMVFFIAFRAMQIGRRALLLDQRFSGFLACSIGIWFTFQTLVNVGAAAGML.P", "M.(S)[42.01]VYAMRRLKQWLVGSYQTDNSAFVPYDRTLLWFTFGLAVVGFVMVTSASMPVGQRLAEDPFLFAKRDGIYMIVALCLALVTMRVPMAVWQRYSSLMLFGSILLLLVVLAVGSSVNGASRWIAFGPLRIQPAELSKLALFCYLSSYLVRKVEEVRNNFWGFCKPMGVMLILAVLLLLQPDLGTVVVLFVTTLALLFLAGAKIWQFLAIIGTGIAAVVMLIIVEPYRVRRITSFLEPWEDPFGSGYQLTQSLMAFGRGDLLGQGLGNSVQKLEYLPEAHTDFIFSILAEELGYIGVVLVLLMVFFIAFRAMQIGRRALLLDQRFSGFLACSIGIWFTFQTLVNVGAAAGML.P" }, { ".MFHIYHSNQLSLLKSLMVHFMQNRPLSSPFEQEVILVQSPGMSQWLQIQLAESLGIAANIRYPLPATFIWEMFTRVLSGIPKESAFSKDAMTWKLMALLPNYLDDPAFKPLRHYLKDDEDKRKLHQLAGRVADLFDQYLVYRPDWLSAWENDQLIDGLSDNQYWQKTLWLALQRYTEDLAQPKWHRANLYQQFISTLNDAPVGALAHCFPSRIFICGISALPQVYLQALQAIGRHTEIYLLFTNPCRYYWGDIQDPKFLARLNSRKPRHYQQLHELPWFKDEQNASTLFNEEGEQNVGNPLLASWGKLGKDNLYFLSELEYSDVLDAFVDIPRD.N", ".(M)[42.01]FHIYHSNQLSLLKSLMVHFMQNRPLSSPFEQEVILVQSPGMSQWLQIQLAESLGIAANIRYPLPATFIWEMFTRVLSGIPKESAFSKDAMTWKLMALLPNYLDDPAFKPLRHYLKDDEDKRKLHQLAGRVADLFDQYLVYRPDWLSAWENDQLIDGLSDNQYWQKTLWLALQRYTEDLAQPKWHRANLYQQFISTLNDAPVGALAHCFPSRIFICGISALPQVYLQALQAIGRHTEIYLLFTNPCRYYWGDIQDPKFLARLNSRKPRHYQQLHELPWFKDEQNASTLFNEEGEQNVGNPLLASWGKLGKDNLYFLSELEYSDVLDAFVDIPRD.N" }, { ".MRFKLWVFISLCLHASLVAAAILYVVEDKPIAPEPISIQMLAFAADEPVGEPEPVVEEVTPPEPEPVVEPEPEPEPEPIPDVKPVIEKPIEKKPEPKPKPKPKPVEKPKPPVERPQQQPLA.L", ".(MRFKLW)[42.01]VFISLCLHASLVAAAILYVVEDKPIAPEPISIQMLAFAADEPVGEPEPVVEEVTPPEPEPVVEPEPEPEPEPIPDVKPVIEKPIEKKPEPKPKPKPKPVEKPKPPVERPQQQPLA.L" } }; var msAlignImporter = new MassTagFromMSAlignFileImporter(String.Empty); var peptideUtils = new PeptideUtils(); Console.WriteLine("pyroglutamate"); foreach (var pair in pyroglutamateCodes) { string empiricalFormula = peptideUtils.GetEmpiricalFormulaForPeptideSequence(pair.Key); string empiricalFormulaWithMod = msAlignImporter.GetEmpiricalFormulaForSequenceWithMods(pair.Value); double monoMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula); double monoMassWithMod = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormulaWithMod); double diff = monoMassWithMod - monoMass; Console.WriteLine(empiricalFormula + "(" + monoMass + ")\t" + empiricalFormulaWithMod + "(" + monoMassWithMod + "); diff=" + diff); Assert.AreEqual(-17, Math.Round(diff, 0, MidpointRounding.AwayFromZero)); } Console.WriteLine("acetylation"); foreach (var pair in acetylationCodes) { string empiricalFormula = peptideUtils.GetEmpiricalFormulaForPeptideSequence(pair.Key); string empiricalFormulaWithMod = msAlignImporter.GetEmpiricalFormulaForSequenceWithMods(pair.Value); double monoMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula); double monoMassWithMod = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormulaWithMod); double diff = monoMassWithMod - monoMass; Console.WriteLine(empiricalFormula + "(" + monoMass + ")\t" + empiricalFormulaWithMod + "(" + monoMassWithMod + "); diff=" + diff); Assert.AreEqual(42, Math.Round(diff, 0, MidpointRounding.AwayFromZero)); } }
public override void LoadAndInitializeTargets(string targetsFilePath) { if (string.IsNullOrEmpty(targetsFilePath)) { IqLogger.Log.Info("IqMassAndNetAligner - no alignment targets were loaded. The inputted targets file path is NULL."); return; } if (!File.Exists(targetsFilePath)) { IqLogger.Log.Info("IqMassAndNetAligner - no alignment targets were loaded. The inputted targets file path is does not exist."); return; } var importer = new IqTargetsFromFirstHitsFileImporter(targetsFilePath); Targets = importer.Import().Where(p => p.QualityScore < 0.01).OrderBy(p => p.ID).ToList(); //Targets = Targets.Where(p => p.Code.Contains("FEQDGENYTGTIDGNMGAYAR")).ToList(); var filteredList = new List <IqTarget>(); //calculate empirical formula for targets using Code and then monoisotopic mass foreach (var iqTarget in Targets) { iqTarget.Code = _peptideUtils.CleanUpPeptideSequence(iqTarget.Code); if (_peptideUtils.ValidateSequence(iqTarget.Code)) { iqTarget.EmpiricalFormula = _peptideUtils.GetEmpiricalFormulaForPeptideSequence(iqTarget.Code, true, true); var calcMonoMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(iqTarget.EmpiricalFormula); var monoMassFromFirstHitsFile = iqTarget.MonoMassTheor; var massCalculationsAgree = Math.Abs(monoMassFromFirstHitsFile - calcMonoMass) < 0.02; if (massCalculationsAgree) { iqTarget.MonoMassTheor = calcMonoMass; iqTarget.ElutionTimeTheor = iqTarget.ScanLC / (double)Run.MaxLCScan; filteredList.Add(iqTarget); _targetUtilities.UpdateTargetMissingInfo(iqTarget, true); var chargeStateTarget = new IqTargetMsgfFirstHit(); _targetUtilities.CopyTargetProperties(iqTarget, chargeStateTarget); iqTarget.AddTarget(chargeStateTarget); } } } filteredList = (from n in filteredList group n by new { n.Code, n.ChargeState } into grp select grp.OrderBy(p => p.QualityScore).First() ).ToList(); Targets = filteredList; TargetedWorkflowParameters workflowParameters = new BasicTargetedWorkflowParameters(); workflowParameters.ChromNETTolerance = 0.005; workflowParameters.ChromGenTolerance = 50; //define workflows for parentTarget and childTargets var parentWorkflow = new ChromPeakDeciderIqWorkflow(Run, workflowParameters); var childWorkflow = new ChargeStateChildIqWorkflow(Run, workflowParameters); var workflowAssigner = new IqWorkflowAssigner(); workflowAssigner.AssignWorkflowToParent(parentWorkflow, Targets); workflowAssigner.AssignWorkflowToChildren(childWorkflow, Targets); if (Targets.Count > 0) { IqLogger.Log.Info("IqMassAndNetAligner - Loaded " + Targets.Count + " targets for use in mass and net alignment"); } else { IqLogger.Log.Info("IqMassAndNetAligner - NOTE - no targets have been loaded."); } //IqWorkflowAssigner workflowAssigner = new IqWorkflowAssigner(); //workflowAssigner.AssignWorkflowToParent(workflow, Targets); }
public TargetCollection Import(out Dictionary <int, PrsmData> prsmData) { StreamReader reader; prsmData = new Dictionary <int, PrsmData>(); if (!File.Exists(_filename)) { throw new FileNotFoundException("Input file not found: " + _filename); } try { reader = new StreamReader(_filename); } catch (Exception ex) { throw new IOException("There was a problem importing from the file.", ex); } var targets = new TargetCollection(); // Group LcmsFeatureTargets by their code var proteinSpeciesGroups = new Dictionary <string, List <LcmsFeatureTarget> >(); using (var sr = reader) { if (sr.Peek() == -1) { sr.Close(); throw new InvalidDataException("There is no data in the file we are trying to read."); } var columnHeaders = sr.ReadLine().Split('\t').ToList(); var columnMapping = GetColumnMapping(columnHeaders); var lineCounter = 0; //used for tracking which line is being processed. //read and process each line of the file while (sr.Peek() > -1) { ++lineCounter; _dataRowsProcessed = lineCounter; var processedData = sr.ReadLine().Split('\t').ToList(); //ensure that processed line is the same size as the header line if (processedData.Count != columnHeaders.Count) { throw new InvalidDataException("Data in row #" + lineCounter.ToString(CultureInfo.InvariantCulture) + "is invalid - \nThe number of columns does not match that of the header line"); } // Get Prsm_ID int prsmId; if (!int.TryParse(processedData[columnMapping[PRSM_ID_HEADER]], out prsmId)) { throw new InvalidDataException("Could not parse Prsm ID."); } // Get scan int scanLcTarget; if (!int.TryParse(processedData[columnMapping[SCAN_HEADER]], out scanLcTarget)) { throw new InvalidDataException("Could not parse scan number."); } // Get charge state short chargeState; if (!short.TryParse(processedData[columnMapping[CHARGE_HEADER]], out chargeState)) { throw new InvalidDataException("Could not parse charge."); } // Get code var code = processedData[columnMapping[PEPTIDE_HEADER]]; string empiricalFormula; // Modified species, try to get empirical formula if (code.Contains("(")) { empiricalFormula = GetEmpiricalFormulaForSequenceWithMods(code); // Unknown modification in sequence, skip if (String.IsNullOrEmpty(empiricalFormula)) { ++_dataRowsSkippedUnknownMods; continue; } } else { empiricalFormula = _peptideUtils.GetEmpiricalFormulaForPeptideSequence(code); } // Get monoisotopic mass var monoisotopicMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula); // Get Protein_mass double proteinMass; if (!double.TryParse(processedData[columnMapping[PROTEIN_MASS_HEADER]], out proteinMass)) { throw new InvalidDataException("Could not parse protein mass."); } // Get protein name var proteinName = processedData[columnMapping[PROTEIN_NAME_HEADER]]; // Get score double eValueDbl; if (!double.TryParse(processedData[columnMapping[E_VALUE_HEADER]], out eValueDbl)) { if (processedData[columnMapping[E_VALUE_HEADER]].ToLower() == "Infinity") { eValueDbl = float.MaxValue; } else { throw new InvalidDataException("Could not parse e-value."); } } float eValue; if (eValueDbl > float.MaxValue) { eValue = float.MaxValue; } else { eValue = (float)eValueDbl; } // Make Prsm prsmData.Add(prsmId, new PrsmData { ProteinMass = proteinMass, ProteinName = proteinName, EValue = eValue }); // Create target var target = new LcmsFeatureTarget { FeatureToMassTagID = prsmId, ID = -1, ElutionTimeUnit = DeconTools.Backend.Globals.ElutionTimeUnit.ScanNum, ScanLCTarget = scanLcTarget, Code = code, EmpiricalFormula = empiricalFormula, MonoIsotopicMass = monoisotopicMass, ChargeState = chargeState, MZ = monoisotopicMass / chargeState + DeconTools.Backend.Globals.PROTON_MASS, }; if (!proteinSpeciesGroups.ContainsKey(code)) { proteinSpeciesGroups.Add(code, new List <LcmsFeatureTarget>()); } proteinSpeciesGroups[code].Add(target); /* * // Create range * const int maxOffset = 0; * * for (short offset = -maxOffset; offset <= maxOffset; offset++) * { * // Create new target * var newCharge = (short) (chargeState + offset); * var targetCopy = new LcmsFeatureTarget(target) * { * ID = ++idCounter, * ChargeState = newCharge, * MZ = target.MonoIsotopicMass / newCharge + DeconTools.Backend.Globals.PROTON_MASS * }; * // Add target * targets.TargetIDList.Add(idCounter); * targets.TargetList.Add(targetCopy); * } */ } sr.Close(); } // Loop through each protein species group and add in the missing charge states var idCounter = 0; foreach (var keyValuePair in proteinSpeciesGroups) { var targetGroup = AddChargeStates(keyValuePair.Value, EXTEND_CHARGE_RANGE); var chargeStates = new List <short> { 0 }; // Add all targets in group and IDs to list foreach (var target in targetGroup) { if (!chargeStates.Contains(target.ChargeState)) { chargeStates.Add(target.ChargeState); target.ID = ++idCounter; targets.TargetList.Add(target); targets.TargetIDList.Add(idCounter); } } } return(targets); }
protected virtual void UpdateTargetMissingInfo() { var canUseReferenceMassTags = MassTagsForReference != null && MassTagsForReference.TargetList.Count > 0; var massTagIDsAvailableForLookup = new List <int>(); if (canUseReferenceMassTags) { massTagIDsAvailableForLookup = MassTagsForReference.TargetList.Select(p => p.ID).ToList(); } foreach (LcmsFeatureTarget target in Targets.TargetList) { var isMissingMonoMass = target.MonoIsotopicMass <= 0; if (String.IsNullOrEmpty(target.EmpiricalFormula)) { if (canUseReferenceMassTags && massTagIDsAvailableForLookup.Contains(target.FeatureToMassTagID)) { var mt = MassTagsForReference.TargetList.First(p => p.ID == target.FeatureToMassTagID); //in DMS, Sequest will put an 'X' when it can't differentiate 'I' and 'L' // see: \\gigasax\DMS_Parameter_Files\Sequest\sequest_ETD_N14_NE.params //To create the theoretical isotopic profile, we will change the 'X' to 'L' if (mt.Code.Contains("X")) { mt.Code = mt.Code.Replace('X', 'L'); mt.EmpiricalFormula = mt.GetEmpiricalFormulaFromTargetCode(); } target.Code = mt.Code; target.EmpiricalFormula = mt.EmpiricalFormula; } else if (!String.IsNullOrEmpty(target.Code)) { //Create empirical formula based on code. Assume it is an unmodified peptide if (target.Code.Contains("X")) { target.Code = target.Code.Replace('X', 'L'); } target.EmpiricalFormula = new PeptideUtils().GetEmpiricalFormulaForPeptideSequence(target.Code); } else { if (isMissingMonoMass) { throw new ApplicationException( "Trying to prepare target list, but Target is missing both the 'Code' and the Monoisotopic Mass. One or the other is needed."); } target.Code = "AVERAGINE"; target.EmpiricalFormula = IsotopicDistributionCalculator.GetAveragineFormulaAsString(target.MonoIsotopicMass, false); } } if (isMissingMonoMass) { target.MonoIsotopicMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(target.EmpiricalFormula); target.MZ = target.MonoIsotopicMass / target.ChargeState + DeconTools.Backend.Globals.PROTON_MASS; } } }