//Returns an empirical formula string with or without PTMs. //Accounts for the PTMs using the Averagine formula. //Parses out the PTMs and calculates the empirical formula for the known unmodified sequence. //Adds or subtracts the PTM formula from the sequence formula based on the overall mass of the PTMs public string GetEmpiricalFormulaFromSequence(string code, bool cysteinesAreModified = false) { var ptmFormula = ""; var sequenceFormula = ""; var empiricalFormula = ""; var ptmMass = PtmMassFromCode(code); ptmFormula = IsotopicDistributionCalculator.Instance.GetAveragineFormulaAsString(Math.Abs(ptmMass), false); sequenceFormula = SequenceToEmpiricalFormula(code, cysteinesAreModified); if (ptmMass < 0) { empiricalFormula = EmpiricalFormulaUtilities.SubtractFormula(sequenceFormula, ptmFormula); } else { empiricalFormula = EmpiricalFormulaUtilities.AddFormula(sequenceFormula, ptmFormula); } //TEMPORARY HANDLING OF BAD TARGETS WITH PTM > SEQUENCE //CHECK THE UPDATEMISSINGTARGETINFO IN IQTARGETUTILITES WHEN CHANGING if (String.IsNullOrEmpty(empiricalFormula)) { empiricalFormula = "C0H0N0O0S0"; } return(empiricalFormula); }
public List <IqTarget> CreateTargets(IEnumerable <string> empiricalFormulaList, double minMZObs = 400, double maxMZObserved = 1500) { var targetIDCounter = 0; var targetList = new List <IqTarget>(); foreach (var formula in empiricalFormulaList) { IqTarget parentTarget = new IqTargetBasic(); parentTarget.EmpiricalFormula = formula; parentTarget.ID = targetIDCounter++; parentTarget.MonoMassTheor = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(parentTarget.EmpiricalFormula); parentTarget.ElutionTimeTheor = 0.5; parentTarget.ChargeState = 0; //this is the neutral mass var childTargets = CreateChargeStateTargets(parentTarget, minMZObs, maxMZObserved); parentTarget.AddTargetRange(childTargets); targetList.Add(parentTarget); } return(targetList); }
public IsotopicProfile CreateIsotopicProfileFromEmpiricalFormula(string baseEmpiricalFormula, string elementLabelled, int lightIsotope, int heavyIsotope, double percentHeavyLabel, int chargeState = 1) { var isUnlabelled = elementLabelled == "" || percentHeavyLabel == 0; IsotopicProfile iso; if (isUnlabelled) { iso = IsotopicDistributionCalculator.Instance.GetIsotopePattern(baseEmpiricalFormula); } else { var abundanceLightIsotopeLabeled1 = CalculateAbundanceLightIsotope(elementLabelled, lightIsotope, percentHeavyLabel); var abundanceHeavyIsotopeLabeled1 = CalculateAbundanceHeavyIsotope(elementLabelled, heavyIsotope, percentHeavyLabel); IsotopicDistributionCalculator.Instance.SetLabeling(elementLabelled, lightIsotope, abundanceLightIsotopeLabeled1, heavyIsotope, abundanceHeavyIsotopeLabeled1); iso = IsotopicDistributionCalculator.Instance.GetIsotopePattern(baseEmpiricalFormula); IsotopicDistributionCalculator.Instance.ResetToUnlabeled(); } var monoisotopicMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(baseEmpiricalFormula); iso.MonoIsotopicMass = monoisotopicMass; CalculateMZValuesForLabeledProfile(iso, baseEmpiricalFormula, elementLabelled, chargeState, lightIsotope, heavyIsotope); iso.ChargeState = chargeState; return(iso); }
public void GetMonoisotopicMassForEmpiricalFormulaWithIronTest1() { var formula = "C145 H208 N39 O40 S2 Fe1"; var monomass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(formula); Assert.IsTrue(monomass > 0); Console.WriteLine("monoisotopic mass= \t" + monomass); }
public IsotopicProfile GetIsotopePattern(string empiricalFormula, int chargeState) { _elementTable = EmpiricalFormulaUtilities.ParseEmpiricalFormulaString(empiricalFormula); AddElementsToReferenceTable(); var frequencyData = new double[2 * _mercuryArraySize + 1]; double monoIsotopicMass = 0; double averageMass = 0; GetMonoAndAverageMass(_elementTable, out monoIsotopicMass, out averageMass); CalculateFrequencies(_mercuryArraySize, chargeState, ref frequencyData); Apodize(ApodizationType, _mercuryArraySize, averageMass, chargeState, ref frequencyData); Realft(ref frequencyData); var intensityVals = new List <double>(); var mzVals = new List <double>(); for (var i = _mercuryArraySize / 2 + 1; i <= _mercuryArraySize; i++) { var mz = (i - _mercuryArraySize - 1) / _pointsPerAtomicMassUnit + monoIsotopicMass / chargeState + Globals.PROTON_MASS; var intensity = frequencyData[2 * i - 1]; mzVals.Add(mz); intensityVals.Add(intensity); } for (var i = 1; i <= _mercuryArraySize / 2; i++) { var mz = (i - 1) / _pointsPerAtomicMassUnit + monoIsotopicMass / chargeState + Globals.PROTON_MASS; var intensity = frequencyData[2 * i - 1]; mzVals.Add(mz); intensityVals.Add(intensity); } //StringBuilder sb = new StringBuilder(); //for (int i = 0; i < intensityVals.Count; i++) //{ // sb.Append(mzVals[i] + "\t" + intensityVals[i] + "\n"); //} //Console.WriteLine(sb.ToString()); //Console.WriteLine(); //Console.WriteLine("Monomass= " + monoIsotopicMass.ToString("0.000000")); //Console.WriteLine("monoMZ= " + (monoIsotopicMass/chargeState + Globals.PROTON_MASS)); //Console.WriteLine("Average mass= " + averageMass.ToString("0.000000")); return(null); }
public void parseUnimodStyleFormulaTest1() { var formula = @"H(-3) N(-1)"; var formulaDictionary = EmpiricalFormulaUtilities.ParseEmpiricalFormulaString(formula); Assert.AreEqual(-3, formulaDictionary["H"]); Assert.AreEqual(-1, formulaDictionary["N"]); var parsedFormula = EmpiricalFormulaUtilities.GetEmpiricalFormulaFromElementTable(formulaDictionary); }
public void parseUnimodStyleFormulaTest4() { var formula = @"H32C34N4O4Fe"; var formulaDictionary = EmpiricalFormulaUtilities.ParseEmpiricalFormulaString(formula); Assert.AreEqual(34, formulaDictionary["C"]); Assert.AreEqual(32, formulaDictionary["H"]); Assert.AreEqual(1, formulaDictionary["Fe"]); }
public void ParseUnimodStyleTest5() { var formula = "H(-3) 2H(3) C(-1) 13C O 15N(10)"; var formulaDictionary = EmpiricalFormulaUtilities.ParseEmpiricalFormulaString(formula); Assert.AreEqual(-1, formulaDictionary["C"]); Assert.AreEqual(-3, formulaDictionary["H"]); Assert.AreEqual(1, formulaDictionary["13C"]); Assert.AreEqual(10, formulaDictionary["15N"]); }
public bool CheckSequenceIntegrity(string sequence) { var ptmMass = PtmMassFromCode(sequence); var sequenceFormula = SequenceToEmpiricalFormula(sequence); var sequenceMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(sequenceFormula); if ((ptmMass < 0) && (Math.Abs(ptmMass) > (sequenceMass / 2))) { return(false); } return(true); }
public string GetAveragineFormulaAsString(double inputMass, bool roundToIntegers = true) { if (roundToIntegers) { var formulaTable = GetAveragineFormulaAsTableRoundedToInteger(inputMass); return(EmpiricalFormulaUtilities.GetEmpiricalFormulaFromElementTable(formulaTable)); } else { var formulaTable = GetAveragineFormulaAsTable(inputMass); return(EmpiricalFormulaUtilities.GetEmpiricalFormulaFromElementTable(formulaTable)); } }
public void GetMonoisotopicMassFromEmpiricalFormulaTest1() { var testPeptide = "SAMPLER"; var peptideUtils = new PeptideUtils(); var formula = peptideUtils.GetEmpiricalFormulaForPeptideSequence(testPeptide); var monomass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(formula); Assert.IsTrue(monomass > 0); Console.WriteLine("SAMPLER monoisotopic mass= \t" + monomass); Assert.AreEqual(802.40072m, (decimal)Math.Round(monomass, 5)); //note that Peptide Util reports 802.40071, as does MacCoss's lab: http://proteome.gs.washington.edu/cgi-bin/aa_calc.pl }
public void parseUnimodStyleFormulaTest2B() { var formula = @"H26 2H(8) C20 N4 O5 S"; var formulaDictionary = EmpiricalFormulaUtilities.ParseDoubleEmpiricalFormulaString(formula); Assert.AreEqual(8, formulaDictionary["2H"]); Assert.AreEqual(26, formulaDictionary["H"]); Assert.AreEqual(1, formulaDictionary["S"]); var parsedFormula = EmpiricalFormulaUtilities.GetEmpiricalFormulaFromElementTable(formulaDictionary); }
public void addUnimodFormulaTest1() { var testPeptide = "SAMPLER"; var peptideUtils = new PeptideUtils(); var baseFormula = peptideUtils.GetEmpiricalFormulaForPeptideSequence(testPeptide); var mod = "H3 C2 N O"; var modFormula = EmpiricalFormulaUtilities.AddFormula(baseFormula, mod); Console.WriteLine("Unmodified peptide= " + baseFormula); Console.WriteLine("Modified peptide= " + modFormula); Assert.AreEqual("C35H61N11O12S", modFormula); }
public override IsotopicProfile GenerateTheorProfile(string empiricalFormula, int chargeState) { var iso = _isotopicDistCalculator.GetIsotopePattern(empiricalFormula); iso.ChargeState = chargeState; var monoMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula); if (chargeState != 0) { CalculateMassesForIsotopicProfile(iso, monoMass, chargeState); } return(iso); }
public List <IqTarget> Import() { var targets = GetMassTagDataFromDb(); if (IsEmpiricalFormulaExtracted) { GetModDataFromDb(_targetsContainingMods); var peptideUtils = new PeptideUtils(); foreach (IqTargetDms iqTarget in targets) { var baseEmpiricalFormula = peptideUtils.GetEmpiricalFormulaForPeptideSequence(iqTarget.Code); if (!string.IsNullOrEmpty(iqTarget.ModDescription)) { var target = iqTarget; var mods = (from n in _massTagModData where n.Item1 == target.ID select n); foreach (var tuple in mods) { var modString = tuple.Item4; try { baseEmpiricalFormula = EmpiricalFormulaUtilities.AddFormula(baseEmpiricalFormula, modString); } catch (Exception ex) { IqLogger.Log.Debug("Failed to calculate empirical formula for the Target " + target.ID + " (" + ex.Message + ")" + "; Having trouble with the mod: " + modString + "; This Target was NOT imported!!"); } } } iqTarget.EmpiricalFormula = baseEmpiricalFormula; if (IsMonoMassCalculatedFromEmpiricalFormula) { iqTarget.MonoMassTheor = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(iqTarget.EmpiricalFormula); } } } return(targets); }
private void CalculateMZValuesForLabeledProfile(IsotopicProfile iso, string empiricalFormula, string elementLabelled, int chargeState, int lightIsotope, int heavyIsotope) { var elementTable = EmpiricalFormulaUtilities.ParseEmpiricalFormulaString(empiricalFormula); var numLabelledAtoms = elementTable[elementLabelled]; for (var i = 0; i < iso.Peaklist.Count; i++) { var keyLightIsotope = elementLabelled + lightIsotope; var keyHeavyIsotope = elementLabelled + heavyIsotope; var lightIsotopeMass = Constants.Elements[elementLabelled].IsotopeDictionary[keyLightIsotope].Mass; var heavyIsotopeMass = Constants.Elements[elementLabelled].IsotopeDictionary[keyHeavyIsotope].Mass; var massDiff = heavyIsotopeMass - lightIsotopeMass; var monoMZ = iso.MonoIsotopicMass / chargeState + Globals.PROTON_MASS; var peakMZIfUnlabelled = monoMZ + (i * Globals.MASS_DIFF_BETWEEN_ISOTOPICPEAKS) / chargeState; var monoPeakFullyLabelled = monoMZ + massDiff * numLabelledAtoms / chargeState; var peakMZBasedOnLabeled = monoPeakFullyLabelled - (numLabelledAtoms - i) * Globals.MASS_DIFF_BETWEEN_ISOTOPICPEAKS / chargeState; int peaksToUse; if (i > numLabelledAtoms) { peaksToUse = numLabelledAtoms; } else { peaksToUse = i; } iso.Peaklist[i].XValue = peakMZBasedOnLabeled * peaksToUse / numLabelledAtoms + peakMZIfUnlabelled * (numLabelledAtoms - peaksToUse) / numLabelledAtoms; } }
public void AveragineRoundTripTest() { var parser = new IqCodeParser(); var ptmDouble = -11849.17; var ptmMass = "[" + ptmDouble.ToString() + "]"; // this is done just for formatting of the function call below var absPtmDouble = Math.Abs(ptmDouble); //this is done because the emperical formula returns a positive amount of atoms (not negative) //so in the assert comparison we have to use the positive mass value var empiricalFormula = parser.GetEmpiricalFormulaFromSequence(ptmMass); Console.WriteLine("This is my emperical formula:" + empiricalFormula + ":"); var returnedMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula); Console.WriteLine(empiricalFormula); Console.WriteLine(returnedMass); Assert.AreEqual(absPtmDouble, returnedMass, .0001); }
public void ProteinSequenceToMassHugePTMTest() { var parser = new IqCodeParser(); var proteoform = "M.V(HLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVC)[-11849.17]VLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH."; var trueMass = 4008.08; // only one significant decimal really. could be 4008.07 or .08 Can't tell yet var unmodifiedProteoform = "M.VHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH."; var ptm = "[-11849.17]"; var proteoformComposition = parser.GetEmpiricalFormulaFromSequence(proteoform); var unmodifiedProteoformComposition = parser.GetEmpiricalFormulaFromSequence(unmodifiedProteoform); var ptmComposition = parser.GetEmpiricalFormulaFromSequence(ptm); var difference = EmpiricalFormulaUtilities.SubtractFormula(unmodifiedProteoformComposition, ptmComposition); Console.WriteLine(proteoformComposition); Console.WriteLine(difference); Assert.AreEqual(proteoformComposition, difference); var differenceMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(difference); var proteformMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(proteoformComposition); var unmodifiedProteoformMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(unmodifiedProteoformComposition); Console.WriteLine(unmodifiedProteoformMass); var ptmMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(ptmComposition); Console.WriteLine(ptmMass); var conversionFirst = unmodifiedProteoformMass - ptmMass; Console.WriteLine(conversionFirst); Assert.AreEqual(trueMass, conversionFirst, 0.1); Assert.AreEqual(trueMass, differenceMass, .1); Assert.AreEqual(trueMass, proteformMass, .1); }
public void AddPhosphorylationTest1() { const string testPeptide = "SAMPLER"; var peptideUtils = new PeptideUtils(); var formula = peptideUtils.GetEmpiricalFormulaForPeptideSequence(testPeptide); const string phosphorylationMod = "HPO3"; var empiricalFormula = EmpiricalFormulaUtilities.AddFormula(formula, phosphorylationMod); var massUnmodified = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(formula); var massModified = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula); var diff = Math.Round(massModified - massUnmodified, 1, MidpointRounding.AwayFromZero); Console.WriteLine(formula + "\t" + massUnmodified); Console.WriteLine(empiricalFormula + "\t" + massModified); Console.WriteLine("diff= " + diff); Assert.AreEqual(80.0, diff); }
public void PyroglutamateTest1() { const string testPeptide = "SAMPLER"; var peptideUtils = new PeptideUtils(); var formula = peptideUtils.GetEmpiricalFormulaForPeptideSequence(testPeptide); const string pyroglutamateMod = "H3N1"; var empiricalFormula = EmpiricalFormulaUtilities.SubtractFormula(formula, pyroglutamateMod); var massUnmodified = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(formula); var massModified = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula); var diff = Math.Round(massModified - massUnmodified, 1, MidpointRounding.AwayFromZero); Console.WriteLine(formula + "\t" + massUnmodified); Console.WriteLine(empiricalFormula + "\t" + massModified); Console.WriteLine("diff= " + diff); Assert.AreEqual(-17.0, diff); }
public void MsAlignCodeParserTest() { var parser = new IqCodeParser(); var examplemod = "A.AENVVHHKLDGMPISEAVEINAGNNLVF(LSGKVPTKKSADAPEGELASYGNTE)[713.72]EQTINVLEQIKTNLNNLGLDMKDVVKMQVFLVGGEENNGTMDFKGFMNGYSKFYDASKTNQLPARSAFQVA(K)[1.02]LANPAWRVEIEVIAVRPAK."; var checkmass = "[714.74]"; var examplenomod = "A.AENVVHHKLDGMPISEAVEINAGNNLVLSGKVPTKKSADAPEGELASYGNTEFEQTINVLEQIKTNLNNLGLDMKDVVKMQVFLVGGEENNGTMDFKGFMNGYSKFYDASKTNQLPARSAFQVAKLANPAWRVEIEVIAVRPAK."; var examplemodresult = parser.GetEmpiricalFormulaFromSequence(examplemod); var checkmassresult = parser.GetEmpiricalFormulaFromSequence(checkmass); var examplenomodresult = parser.GetEmpiricalFormulaFromSequence(examplenomod); Console.WriteLine(examplemodresult); Console.WriteLine(checkmassresult); Console.WriteLine(examplenomodresult); var difference = EmpiricalFormulaUtilities.SubtractFormula(examplemodresult, checkmassresult); Assert.AreEqual(examplenomodresult, difference); }
public void MsgfCodeParserTest() { var parser = new IqCodeParser(); var examplemod = "+144.102PRYRK+144.102RTPVSLY+79.966QK+144.102T+79.966PNGEK+144.102PYEC+57.021GEC+57.021GK+144.102-202"; var checkmass = "+792.484"; var examplenomod = "PRYRKRTPVSLYQKTPNGEKPYECGECGK"; var examplemodresult = parser.GetEmpiricalFormulaFromSequence(examplemod); var checkmassresult = parser.GetEmpiricalFormulaFromSequence(checkmass); var examplenomodresult = parser.GetEmpiricalFormulaFromSequence(examplenomod); Console.WriteLine(examplemodresult); Console.WriteLine(checkmassresult); Console.WriteLine(examplenomodresult); var difference = EmpiricalFormulaUtilities.SubtractFormula(examplemodresult, checkmassresult); Assert.AreEqual(examplenomodresult, difference); }
public void AddAcetylationTest1() { var testPeptide = "SAMPLER"; var peptideUtils = new PeptideUtils(); var formula = peptideUtils.GetEmpiricalFormulaForPeptideSequence(testPeptide); var acetylationFormula = "C2H2O"; var empiricalFormula = EmpiricalFormulaUtilities.AddFormula(formula, acetylationFormula); var massUnmodified = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(formula); var massModified = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula); var diff = Math.Round(massModified - massUnmodified, 1, MidpointRounding.AwayFromZero); Console.WriteLine(formula + "\t" + massUnmodified); Console.WriteLine(empiricalFormula + "\t" + massModified); Console.WriteLine("diff= " + diff); Assert.AreEqual(42.0, diff); }
//Returns an empirical formula string with or without PTMs. //Accounts for the PTMs using the Averagine formula. //Parses out the PTMs and calculates the empirical formula for the known unmodified sequence. //Adds or subtracts the PTM formula from the sequence formula based on the overall mass of the PTMs public string GetEmpiricalFormulaFromSequence(string code) { string PTM_Formula = ""; string SequenceFormula = ""; string EmpiricalFormula = ""; double ptm_mass = PTMMassFromCode(code); PTM_Formula = IsotopicDistributionCalculator.Instance.GetAveragineFormulaAsString(Math.Abs(ptm_mass), false); SequenceFormula = SequenceToEmpiricalFormula(code); if (ptm_mass < 0) { EmpiricalFormula = EmpiricalFormulaUtilities.SubtractFormula(SequenceFormula, PTM_Formula); } else { EmpiricalFormula = EmpiricalFormulaUtilities.AddFormula(SequenceFormula, PTM_Formula); } return(EmpiricalFormula); }
private void CalculateEmpiricalFormulas(TargetCollection data) { var peptideUtils = new PeptideUtils(); foreach (var targetBase in data.TargetList) { var peptide = (PeptideTarget)targetBase; var baseEmpiricalFormula = peptideUtils.GetEmpiricalFormulaForPeptideSequence(peptide.Code); if (peptide.ContainsMods) { TargetBase peptide1 = peptide; var mods = (from n in _massTagModData where n.Item1 == peptide1.ID select n); foreach (var tuple in mods) { baseEmpiricalFormula = EmpiricalFormulaUtilities.AddFormula(baseEmpiricalFormula, tuple.Item4); } } peptide.EmpiricalFormula = baseEmpiricalFormula; } }
public void TempTest1() { var code = "FEQDGENYTGTIDGNMGAYAR"; var peptideUtils = new PeptideUtils(); var empiricalFormula = peptideUtils.GetEmpiricalFormulaForPeptideSequence(code); var monomass = peptideUtils.GetMonoIsotopicMassForPeptideSequence(code); var mztheo = monomass / 2 + DeconTools.Backend.Globals.PROTON_MASS; Console.WriteLine(monomass + "\t" + mztheo); var formula = EmpiricalFormulaUtilities.ParseEmpiricalFormulaString("H(3) C(2) N O"); var revisedFormula = EmpiricalFormulaUtilities.GetEmpiricalFormulaFromElementTable(formula); var iodoMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(revisedFormula); Console.WriteLine("iodomass= " + iodoMass); }
public virtual void UpdateTargetMissingInfo(IqTarget target, bool calcAveragineForMissingEmpiricalFormula = true, bool cysteinesAreModified = false) { var isMissingMonoMass = target.MonoMassTheor <= 0; if (String.IsNullOrEmpty(target.EmpiricalFormula)) { if (!String.IsNullOrEmpty(target.Code)) { //Create empirical formula based on code. Assume it is an unmodified peptide //target.EmpiricalFormula = _peptideUtils.GetEmpiricalFormulaForPeptideSequence(target.Code); target.EmpiricalFormula = IqCodeParser.GetEmpiricalFormulaFromSequence(target.Code, cysteinesAreModified); } else { if (isMissingMonoMass) { throw new ApplicationException( "Trying to fill in missing data on target, but Target is missing both the 'Code' and the Monoisotopic Mass. One or the other is needed."); } target.Code = "AVERAGINE"; target.EmpiricalFormula = IsotopicDistributionCalculator.GetAveragineFormulaAsString(target.MonoMassTheor); } } if (isMissingMonoMass) { target.MonoMassTheor = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(target.EmpiricalFormula); } if (target.ChargeState != 0) { target.MZTheor = target.MonoMassTheor / target.ChargeState + DeconTools.Backend.Globals.PROTON_MASS; } }
public override void LoadAndInitializeTargets(string targetsFilePath) { if (string.IsNullOrEmpty(targetsFilePath)) { IqLogger.Log.Info("IqMassAndNetAligner - no alignment targets were loaded. The inputted targets file path is NULL."); return; } if (!File.Exists(targetsFilePath)) { IqLogger.Log.Info("IqMassAndNetAligner - no alignment targets were loaded. The inputted targets file path is does not exist."); return; } var importer = new IqTargetsFromFirstHitsFileImporter(targetsFilePath); Targets = importer.Import().Where(p => p.QualityScore < 0.01).OrderBy(p => p.ID).ToList(); //Targets = Targets.Where(p => p.Code.Contains("FEQDGENYTGTIDGNMGAYAR")).ToList(); var filteredList = new List <IqTarget>(); //calculate empirical formula for targets using Code and then monoisotopic mass foreach (var iqTarget in Targets) { iqTarget.Code = _peptideUtils.CleanUpPeptideSequence(iqTarget.Code); if (_peptideUtils.ValidateSequence(iqTarget.Code)) { iqTarget.EmpiricalFormula = _peptideUtils.GetEmpiricalFormulaForPeptideSequence(iqTarget.Code, true, true); var calcMonoMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(iqTarget.EmpiricalFormula); var monoMassFromFirstHitsFile = iqTarget.MonoMassTheor; var massCalculationsAgree = Math.Abs(monoMassFromFirstHitsFile - calcMonoMass) < 0.02; if (massCalculationsAgree) { iqTarget.MonoMassTheor = calcMonoMass; iqTarget.ElutionTimeTheor = iqTarget.ScanLC / (double)Run.MaxLCScan; filteredList.Add(iqTarget); _targetUtilities.UpdateTargetMissingInfo(iqTarget, true); var chargeStateTarget = new IqTargetMsgfFirstHit(); _targetUtilities.CopyTargetProperties(iqTarget, chargeStateTarget); iqTarget.AddTarget(chargeStateTarget); } } } filteredList = (from n in filteredList group n by new { n.Code, n.ChargeState } into grp select grp.OrderBy(p => p.QualityScore).First() ).ToList(); Targets = filteredList; TargetedWorkflowParameters workflowParameters = new BasicTargetedWorkflowParameters(); workflowParameters.ChromNETTolerance = 0.005; workflowParameters.ChromGenTolerance = 50; //define workflows for parentTarget and childTargets var parentWorkflow = new ChromPeakDeciderIqWorkflow(Run, workflowParameters); var childWorkflow = new ChargeStateChildIqWorkflow(Run, workflowParameters); var workflowAssigner = new IqWorkflowAssigner(); workflowAssigner.AssignWorkflowToParent(parentWorkflow, Targets); workflowAssigner.AssignWorkflowToChildren(childWorkflow, Targets); if (Targets.Count > 0) { IqLogger.Log.Info("IqMassAndNetAligner - Loaded " + Targets.Count + " targets for use in mass and net alignment"); } else { IqLogger.Log.Info("IqMassAndNetAligner - NOTE - no targets have been loaded."); } //IqWorkflowAssigner workflowAssigner = new IqWorkflowAssigner(); //workflowAssigner.AssignWorkflowToParent(workflow, Targets); }
public void TestGetEmpiricalFormulaForModifiedPeptideSequences() { var pyroglutamateCodes = new Dictionary <string, string> { { "M.QVYAMRRLKQWLVGSYQTDNSAFVPYDRTLLWFTFGLAVVGFVMVTSASMPVGQRLAEDPFLFAKRDGIYMIVALCLALVTMRVPMAVWQRYSSLMLFGSILLLLVVLAVGSSVNGASRWIAFGPLRIQPAELSKLALFCYLSSYLVRKVEEVRNNFWGFCKPMGVMLILAVLLLLQPDLGTVVVLFVTTLALLFLAGAKIWQFLAIIGTGIAAVVMLIIVEPYRVRRITSFLEPWEDPFGSGYQLTQSLMAFGRGDLLGQGLGNSVQKLEYLPEAHTDFIFSILAEELGYIGVVLVLLMVFFIAFRAMQIGRRALLLDQRFSGFLACSIGIWFTFQTLVNVGAAAGML.P", "M.(Q)[-17.03]VYAMRRLKQWLVGSYQTDNSAFVPYDRTLLWFTFGLAVVGFVMVTSASMPVGQRLAEDPFLFAKRDGIYMIVALCLALVTMRVPMAVWQRYSSLMLFGSILLLLVVLAVGSSVNGASRWIAFGPLRIQPAELSKLALFCYLSSYLVRKVEEVRNNFWGFCKPMGVMLILAVLLLLQPDLGTVVVLFVTTLALLFLAGAKIWQFLAIIGTGIAAVVMLIIVEPYRVRRITSFLEPWEDPFGSGYQLTQSLMAFGRGDLLGQGLGNSVQKLEYLPEAHTDFIFSILAEELGYIGVVLVLLMVFFIAFRAMQIGRRALLLDQRFSGFLACSIGIWFTFQTLVNVGAAAGML.P" }, { ".QFHIYHSNQLSLLKSLMVHFMQNRPLSSPFEQEVILVQSPGMSQWLQIQLAESLGIAANIRYPLPATFIWEMFTRVLSGIPKESAFSKDAMTWKLMALLPNYLDDPAFKPLRHYLKDDEDKRKLHQLAGRVADLFDQYLVYRPDWLSAWENDQLIDGLSDNQYWQKTLWLALQRYTEDLAQPKWHRANLYQQFISTLNDAPVGALAHCFPSRIFICGISALPQVYLQALQAIGRHTEIYLLFTNPCRYYWGDIQDPKFLARLNSRKPRHYQQLHELPWFKDEQNASTLFNEEGEQNVGNPLLASWGKLGKDNLYFLSELEYSDVLDAFVDIPRD.N", ".(Q)[-17.03]FHIYHSNQLSLLKSLMVHFMQNRPLSSPFEQEVILVQSPGMSQWLQIQLAESLGIAANIRYPLPATFIWEMFTRVLSGIPKESAFSKDAMTWKLMALLPNYLDDPAFKPLRHYLKDDEDKRKLHQLAGRVADLFDQYLVYRPDWLSAWENDQLIDGLSDNQYWQKTLWLALQRYTEDLAQPKWHRANLYQQFISTLNDAPVGALAHCFPSRIFICGISALPQVYLQALQAIGRHTEIYLLFTNPCRYYWGDIQDPKFLARLNSRKPRHYQQLHELPWFKDEQNASTLFNEEGEQNVGNPLLASWGKLGKDNLYFLSELEYSDVLDAFVDIPRD.N" }, { ".QRFKLWVFISLCLHASLVAAAILYVVEDKPIAPEPISIQMLAFAADEPVGEPEPVVEEVTPPEPEPVVEPEPEPEPEPIPDVKPVIEKPIEKKPEPKPKPKPKPVEKPKPPVERPQQQPLA.L", ".(QRFKLW)[-17.03]VFISLCLHASLVAAAILYVVEDKPIAPEPISIQMLAFAADEPVGEPEPVVEEVTPPEPEPVVEPEPEPEPEPIPDVKPVIEKPIEKKPEPKPKPKPKPVEKPKPPVERPQQQPLA.L" } }; var acetylationCodes = new Dictionary <string, string> { { "M.SVYAMRRLKQWLVGSYQTDNSAFVPYDRTLLWFTFGLAVVGFVMVTSASMPVGQRLAEDPFLFAKRDGIYMIVALCLALVTMRVPMAVWQRYSSLMLFGSILLLLVVLAVGSSVNGASRWIAFGPLRIQPAELSKLALFCYLSSYLVRKVEEVRNNFWGFCKPMGVMLILAVLLLLQPDLGTVVVLFVTTLALLFLAGAKIWQFLAIIGTGIAAVVMLIIVEPYRVRRITSFLEPWEDPFGSGYQLTQSLMAFGRGDLLGQGLGNSVQKLEYLPEAHTDFIFSILAEELGYIGVVLVLLMVFFIAFRAMQIGRRALLLDQRFSGFLACSIGIWFTFQTLVNVGAAAGML.P", "M.(S)[42.01]VYAMRRLKQWLVGSYQTDNSAFVPYDRTLLWFTFGLAVVGFVMVTSASMPVGQRLAEDPFLFAKRDGIYMIVALCLALVTMRVPMAVWQRYSSLMLFGSILLLLVVLAVGSSVNGASRWIAFGPLRIQPAELSKLALFCYLSSYLVRKVEEVRNNFWGFCKPMGVMLILAVLLLLQPDLGTVVVLFVTTLALLFLAGAKIWQFLAIIGTGIAAVVMLIIVEPYRVRRITSFLEPWEDPFGSGYQLTQSLMAFGRGDLLGQGLGNSVQKLEYLPEAHTDFIFSILAEELGYIGVVLVLLMVFFIAFRAMQIGRRALLLDQRFSGFLACSIGIWFTFQTLVNVGAAAGML.P" }, { ".MFHIYHSNQLSLLKSLMVHFMQNRPLSSPFEQEVILVQSPGMSQWLQIQLAESLGIAANIRYPLPATFIWEMFTRVLSGIPKESAFSKDAMTWKLMALLPNYLDDPAFKPLRHYLKDDEDKRKLHQLAGRVADLFDQYLVYRPDWLSAWENDQLIDGLSDNQYWQKTLWLALQRYTEDLAQPKWHRANLYQQFISTLNDAPVGALAHCFPSRIFICGISALPQVYLQALQAIGRHTEIYLLFTNPCRYYWGDIQDPKFLARLNSRKPRHYQQLHELPWFKDEQNASTLFNEEGEQNVGNPLLASWGKLGKDNLYFLSELEYSDVLDAFVDIPRD.N", ".(M)[42.01]FHIYHSNQLSLLKSLMVHFMQNRPLSSPFEQEVILVQSPGMSQWLQIQLAESLGIAANIRYPLPATFIWEMFTRVLSGIPKESAFSKDAMTWKLMALLPNYLDDPAFKPLRHYLKDDEDKRKLHQLAGRVADLFDQYLVYRPDWLSAWENDQLIDGLSDNQYWQKTLWLALQRYTEDLAQPKWHRANLYQQFISTLNDAPVGALAHCFPSRIFICGISALPQVYLQALQAIGRHTEIYLLFTNPCRYYWGDIQDPKFLARLNSRKPRHYQQLHELPWFKDEQNASTLFNEEGEQNVGNPLLASWGKLGKDNLYFLSELEYSDVLDAFVDIPRD.N" }, { ".MRFKLWVFISLCLHASLVAAAILYVVEDKPIAPEPISIQMLAFAADEPVGEPEPVVEEVTPPEPEPVVEPEPEPEPEPIPDVKPVIEKPIEKKPEPKPKPKPKPVEKPKPPVERPQQQPLA.L", ".(MRFKLW)[42.01]VFISLCLHASLVAAAILYVVEDKPIAPEPISIQMLAFAADEPVGEPEPVVEEVTPPEPEPVVEPEPEPEPEPIPDVKPVIEKPIEKKPEPKPKPKPKPVEKPKPPVERPQQQPLA.L" } }; var msAlignImporter = new MassTagFromMSAlignFileImporter(String.Empty); var peptideUtils = new PeptideUtils(); Console.WriteLine("pyroglutamate"); foreach (var pair in pyroglutamateCodes) { string empiricalFormula = peptideUtils.GetEmpiricalFormulaForPeptideSequence(pair.Key); string empiricalFormulaWithMod = msAlignImporter.GetEmpiricalFormulaForSequenceWithMods(pair.Value); double monoMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula); double monoMassWithMod = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormulaWithMod); double diff = monoMassWithMod - monoMass; Console.WriteLine(empiricalFormula + "(" + monoMass + ")\t" + empiricalFormulaWithMod + "(" + monoMassWithMod + "); diff=" + diff); Assert.AreEqual(-17, Math.Round(diff, 0, MidpointRounding.AwayFromZero)); } Console.WriteLine("acetylation"); foreach (var pair in acetylationCodes) { string empiricalFormula = peptideUtils.GetEmpiricalFormulaForPeptideSequence(pair.Key); string empiricalFormulaWithMod = msAlignImporter.GetEmpiricalFormulaForSequenceWithMods(pair.Value); double monoMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula); double monoMassWithMod = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormulaWithMod); double diff = monoMassWithMod - monoMass; Console.WriteLine(empiricalFormula + "(" + monoMass + ")\t" + empiricalFormulaWithMod + "(" + monoMassWithMod + "); diff=" + diff); Assert.AreEqual(42, Math.Round(diff, 0, MidpointRounding.AwayFromZero)); } }
public TargetCollection Import(List <int> TargetIDsToFilterOn) { var filterOnTargetIDs = TargetIDsToFilterOn != null && TargetIDsToFilterOn.Count > 0; var data = new TargetCollection(); using (var reader = new StreamReader(m_filename)) { var headerLine = reader.ReadLine(); //first line is the header line. _headers = processLine(headerLine); var lineCounter = 1; while (reader.Peek() != -1) { var line = reader.ReadLine(); lineCounter++; var lineData = processLine(line); PeptideTarget massTag; try { massTag = convertTextToMassTag(lineData); if (filterOnTargetIDs) { if (!TargetIDsToFilterOn.Contains(massTag.ID)) { continue; } } } catch (Exception ex) { var msg = "Importer failed. Error reading line: " + lineCounter.ToString() + "\nDetails: " + ex.Message; throw new Exception(msg); } if (!massTag.ContainsMods && String.IsNullOrEmpty(massTag.EmpiricalFormula)) { massTag.EmpiricalFormula = massTag.GetEmpiricalFormulaFromTargetCode(); } var massTagMassInfoMissing = (Math.Abs(massTag.MonoIsotopicMass - 0) < double.Epsilon); var chargeStateInfoIsAvailable = massTag.ChargeState != 0; if (massTagMassInfoMissing) { if (!String.IsNullOrEmpty(massTag.EmpiricalFormula)) { massTag.MonoIsotopicMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula( massTag.EmpiricalFormula); if (chargeStateInfoIsAvailable) { massTag.MZ = massTag.MonoIsotopicMass / massTag.ChargeState + Globals.PROTON_MASS; } } } if (!chargeStateInfoIsAvailable) { double minMZToConsider = 400; double maxMZToConsider = 1500; var targetList = new List <PeptideTarget>(); for (var chargeState = 1; chargeState < 100; chargeState++) { var calcMZ = massTag.MonoIsotopicMass / chargeState + Globals.PROTON_MASS; if (calcMZ > minMZToConsider && calcMZ < maxMZToConsider) { var copiedMassTag = new PeptideTarget(massTag); //we need to create multiple mass tags copiedMassTag.ChargeState = (short)chargeState; copiedMassTag.MZ = calcMZ; targetList.Add(copiedMassTag); } } data.TargetList.AddRange(targetList.Take(3)); } else { data.TargetList.Add(massTag); } } } foreach (PeptideTarget peptideTarget in data.TargetList) { //bool noNormalizedElutionTimeInfoAvailable = Math.Abs(peptideTarget.NormalizedElutionTime - -1) < Single.Epsilon; //if (noNormalizedElutionTimeInfoAvailable) //{ // peptideTarget.NormalizedElutionTime = 0.5f; //} } return(data); }