Exemple #1
0
        public IsotopicProfile CreateIsotopicProfileFromEmpiricalFormula(string baseEmpiricalFormula, string elementLabelled, int lightIsotope, int heavyIsotope, double percentHeavyLabel, int chargeState = 1)
        {
            var isUnlabelled = elementLabelled == "" || percentHeavyLabel == 0;

            IsotopicProfile iso;

            if (isUnlabelled)
            {
                iso = IsotopicDistributionCalculator.Instance.GetIsotopePattern(baseEmpiricalFormula);
            }
            else
            {
                var abundanceLightIsotopeLabeled1 = CalculateAbundanceLightIsotope(elementLabelled, lightIsotope, percentHeavyLabel);
                var abundanceHeavyIsotopeLabeled1 = CalculateAbundanceHeavyIsotope(elementLabelled, heavyIsotope, percentHeavyLabel);

                IsotopicDistributionCalculator.Instance.SetLabeling(elementLabelled, lightIsotope, abundanceLightIsotopeLabeled1, heavyIsotope, abundanceHeavyIsotopeLabeled1);
                iso = IsotopicDistributionCalculator.Instance.GetIsotopePattern(baseEmpiricalFormula);

                IsotopicDistributionCalculator.Instance.ResetToUnlabeled();
            }


            var monoisotopicMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(baseEmpiricalFormula);

            iso.MonoIsotopicMass = monoisotopicMass;
            CalculateMZValuesForLabeledProfile(iso, baseEmpiricalFormula, elementLabelled, chargeState,
                                               lightIsotope, heavyIsotope);

            iso.ChargeState = chargeState;


            return(iso);
        }
        public List <IqTarget> CreateTargets(IEnumerable <string> empiricalFormulaList, double minMZObs = 400, double maxMZObserved = 1500)
        {
            var targetIDCounter = 0;

            var targetList = new List <IqTarget>();


            foreach (var formula in empiricalFormulaList)
            {
                IqTarget parentTarget = new IqTargetBasic();


                parentTarget.EmpiricalFormula = formula;
                parentTarget.ID = targetIDCounter++;

                parentTarget.MonoMassTheor =
                    EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(parentTarget.EmpiricalFormula);

                parentTarget.ElutionTimeTheor = 0.5;
                parentTarget.ChargeState      = 0; //this is the neutral mass


                var childTargets = CreateChargeStateTargets(parentTarget, minMZObs, maxMZObserved);
                parentTarget.AddTargetRange(childTargets);


                targetList.Add(parentTarget);
            }

            return(targetList);
        }
        public void GetMonoisotopicMassForEmpiricalFormulaWithIronTest1()
        {
            var formula  = "C145 H208 N39 O40 S2 Fe1";
            var monomass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(formula);

            Assert.IsTrue(monomass > 0);
            Console.WriteLine("monoisotopic mass= \t" + monomass);
        }
Exemple #4
0
        public bool CheckSequenceIntegrity(string sequence)
        {
            var ptmMass         = PtmMassFromCode(sequence);
            var sequenceFormula = SequenceToEmpiricalFormula(sequence);
            var sequenceMass    = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(sequenceFormula);

            if ((ptmMass < 0) && (Math.Abs(ptmMass) > (sequenceMass / 2)))
            {
                return(false);
            }
            return(true);
        }
        public void GetMonoisotopicMassFromEmpiricalFormulaTest1()
        {
            var testPeptide  = "SAMPLER";
            var peptideUtils = new PeptideUtils();
            var formula      = peptideUtils.GetEmpiricalFormulaForPeptideSequence(testPeptide);

            var monomass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(formula);

            Assert.IsTrue(monomass > 0);
            Console.WriteLine("SAMPLER monoisotopic mass= \t" + monomass);

            Assert.AreEqual(802.40072m, (decimal)Math.Round(monomass, 5));    //note that Peptide Util reports 802.40071, as does MacCoss's lab: http://proteome.gs.washington.edu/cgi-bin/aa_calc.pl
        }
        public override IsotopicProfile GenerateTheorProfile(string empiricalFormula, int chargeState)
        {
            var iso = _isotopicDistCalculator.GetIsotopePattern(empiricalFormula);

            iso.ChargeState = chargeState;

            var monoMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula);

            if (chargeState != 0)
            {
                CalculateMassesForIsotopicProfile(iso, monoMass, chargeState);
            }

            return(iso);
        }
        public List <IqTarget> Import()
        {
            var targets = GetMassTagDataFromDb();

            if (IsEmpiricalFormulaExtracted)
            {
                GetModDataFromDb(_targetsContainingMods);

                var peptideUtils = new PeptideUtils();

                foreach (IqTargetDms iqTarget in targets)
                {
                    var baseEmpiricalFormula = peptideUtils.GetEmpiricalFormulaForPeptideSequence(iqTarget.Code);
                    if (!string.IsNullOrEmpty(iqTarget.ModDescription))
                    {
                        var target = iqTarget;
                        var mods   = (from n in _massTagModData where n.Item1 == target.ID select n);

                        foreach (var tuple in mods)
                        {
                            var modString = tuple.Item4;

                            try
                            {
                                baseEmpiricalFormula = EmpiricalFormulaUtilities.AddFormula(baseEmpiricalFormula, modString);
                            }
                            catch (Exception ex)
                            {
                                IqLogger.Log.Debug("Failed to calculate empirical formula for the Target " + target.ID + " (" + ex.Message + ")" +
                                                   "; Having trouble with the mod: " + modString + "; This Target was NOT imported!!");
                            }
                        }
                    }

                    iqTarget.EmpiricalFormula = baseEmpiricalFormula;

                    if (IsMonoMassCalculatedFromEmpiricalFormula)
                    {
                        iqTarget.MonoMassTheor = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(iqTarget.EmpiricalFormula);
                    }
                }
            }

            return(targets);
        }
        public void AveragineRoundTripTest()
        {
            var parser = new IqCodeParser();

            var ptmDouble    = -11849.17;
            var ptmMass      = "[" + ptmDouble.ToString() + "]"; // this is done just for formatting of the function call below
            var absPtmDouble = Math.Abs(ptmDouble);              //this is done because the emperical formula returns a positive amount of atoms (not negative)
            //so in the assert comparison we have to use the positive mass value

            var empiricalFormula = parser.GetEmpiricalFormulaFromSequence(ptmMass);

            Console.WriteLine("This is my emperical formula:" + empiricalFormula + ":");

            var returnedMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula);

            Console.WriteLine(empiricalFormula);
            Console.WriteLine(returnedMass);
            Assert.AreEqual(absPtmDouble, returnedMass, .0001);
        }
        public void ProteinSequenceToMassHugePTMTest()
        {
            var parser = new IqCodeParser();

            var proteoform =
                "M.V(HLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVC)[-11849.17]VLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH.";
            var trueMass             = 4008.08; // only one significant decimal really.  could be 4008.07 or .08 Can't tell yet
            var unmodifiedProteoform =
                "M.VHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH.";
            var ptm = "[-11849.17]";

            var proteoformComposition = parser.GetEmpiricalFormulaFromSequence(proteoform);

            var unmodifiedProteoformComposition = parser.GetEmpiricalFormulaFromSequence(unmodifiedProteoform);

            var ptmComposition = parser.GetEmpiricalFormulaFromSequence(ptm);

            var difference = EmpiricalFormulaUtilities.SubtractFormula(unmodifiedProteoformComposition, ptmComposition);

            Console.WriteLine(proteoformComposition);
            Console.WriteLine(difference);

            Assert.AreEqual(proteoformComposition, difference);

            var differenceMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(difference);
            var proteformMass  = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(proteoformComposition);

            var unmodifiedProteoformMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(unmodifiedProteoformComposition);

            Console.WriteLine(unmodifiedProteoformMass);
            var ptmMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(ptmComposition);

            Console.WriteLine(ptmMass);

            var conversionFirst = unmodifiedProteoformMass - ptmMass;

            Console.WriteLine(conversionFirst);

            Assert.AreEqual(trueMass, conversionFirst, 0.1);
            Assert.AreEqual(trueMass, differenceMass, .1);
            Assert.AreEqual(trueMass, proteformMass, .1);
        }
        public void AddPhosphorylationTest1()
        {
            const string testPeptide  = "SAMPLER";
            var          peptideUtils = new PeptideUtils();
            var          formula      = peptideUtils.GetEmpiricalFormulaForPeptideSequence(testPeptide);

            const string phosphorylationMod = "HPO3";

            var empiricalFormula = EmpiricalFormulaUtilities.AddFormula(formula, phosphorylationMod);

            var massUnmodified = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(formula);
            var massModified   = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula);
            var diff           = Math.Round(massModified - massUnmodified, 1, MidpointRounding.AwayFromZero);

            Console.WriteLine(formula + "\t" + massUnmodified);
            Console.WriteLine(empiricalFormula + "\t" + massModified);
            Console.WriteLine("diff= " + diff);

            Assert.AreEqual(80.0, diff);
        }
        public void PyroglutamateTest1()
        {
            const string testPeptide  = "SAMPLER";
            var          peptideUtils = new PeptideUtils();
            var          formula      = peptideUtils.GetEmpiricalFormulaForPeptideSequence(testPeptide);

            const string pyroglutamateMod = "H3N1";

            var empiricalFormula = EmpiricalFormulaUtilities.SubtractFormula(formula, pyroglutamateMod);

            var massUnmodified = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(formula);
            var massModified   = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula);
            var diff           = Math.Round(massModified - massUnmodified, 1, MidpointRounding.AwayFromZero);

            Console.WriteLine(formula + "\t" + massUnmodified);
            Console.WriteLine(empiricalFormula + "\t" + massModified);
            Console.WriteLine("diff= " + diff);

            Assert.AreEqual(-17.0, diff);
        }
        public void AddAcetylationTest1()
        {
            var testPeptide  = "SAMPLER";
            var peptideUtils = new PeptideUtils();
            var formula      = peptideUtils.GetEmpiricalFormulaForPeptideSequence(testPeptide);

            var acetylationFormula = "C2H2O";

            var empiricalFormula = EmpiricalFormulaUtilities.AddFormula(formula, acetylationFormula);

            var massUnmodified = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(formula);
            var massModified   = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula);
            var diff           = Math.Round(massModified - massUnmodified, 1, MidpointRounding.AwayFromZero);

            Console.WriteLine(formula + "\t" + massUnmodified);
            Console.WriteLine(empiricalFormula + "\t" + massModified);

            Console.WriteLine("diff= " + diff);

            Assert.AreEqual(42.0, diff);
        }
Exemple #13
0
        public void TempTest1()
        {
            var code = "FEQDGENYTGTIDGNMGAYAR";

            var peptideUtils = new PeptideUtils();

            var empiricalFormula = peptideUtils.GetEmpiricalFormulaForPeptideSequence(code);

            var monomass = peptideUtils.GetMonoIsotopicMassForPeptideSequence(code);

            var mztheo = monomass / 2 + DeconTools.Backend.Globals.PROTON_MASS;

            Console.WriteLine(monomass + "\t" + mztheo);


            var formula = EmpiricalFormulaUtilities.ParseEmpiricalFormulaString("H(3) C(2) N O");

            var revisedFormula = EmpiricalFormulaUtilities.GetEmpiricalFormulaFromElementTable(formula);


            var iodoMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(revisedFormula);

            Console.WriteLine("iodomass= " + iodoMass);
        }
        public virtual void UpdateTargetMissingInfo(IqTarget target, bool calcAveragineForMissingEmpiricalFormula = true, bool cysteinesAreModified = false)
        {
            var isMissingMonoMass = target.MonoMassTheor <= 0;

            if (String.IsNullOrEmpty(target.EmpiricalFormula))
            {
                if (!String.IsNullOrEmpty(target.Code))
                {
                    //Create empirical formula based on code. Assume it is an unmodified peptide
                    //target.EmpiricalFormula = _peptideUtils.GetEmpiricalFormulaForPeptideSequence(target.Code);
                    target.EmpiricalFormula = IqCodeParser.GetEmpiricalFormulaFromSequence(target.Code, cysteinesAreModified);
                }
                else
                {
                    if (isMissingMonoMass)
                    {
                        throw new ApplicationException(
                                  "Trying to fill in missing data on target, but Target is missing both the 'Code' and the Monoisotopic Mass. One or the other is needed.");
                    }
                    target.Code             = "AVERAGINE";
                    target.EmpiricalFormula =
                        IsotopicDistributionCalculator.GetAveragineFormulaAsString(target.MonoMassTheor);
                }
            }

            if (isMissingMonoMass)
            {
                target.MonoMassTheor =
                    EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(target.EmpiricalFormula);
            }

            if (target.ChargeState != 0)
            {
                target.MZTheor = target.MonoMassTheor / target.ChargeState + DeconTools.Backend.Globals.PROTON_MASS;
            }
        }
        public TargetCollection Import(List <int> TargetIDsToFilterOn)
        {
            var filterOnTargetIDs = TargetIDsToFilterOn != null && TargetIDsToFilterOn.Count > 0;

            var data = new TargetCollection();

            using (var reader = new StreamReader(m_filename))
            {
                var headerLine = reader.ReadLine();    //first line is the header line.

                _headers = processLine(headerLine);

                var lineCounter = 1;
                while (reader.Peek() != -1)
                {
                    var line = reader.ReadLine();
                    lineCounter++;



                    var lineData = processLine(line);

                    PeptideTarget massTag;
                    try
                    {
                        massTag = convertTextToMassTag(lineData);


                        if (filterOnTargetIDs)
                        {
                            if (!TargetIDsToFilterOn.Contains(massTag.ID))
                            {
                                continue;
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        var msg = "Importer failed. Error reading line: " + lineCounter.ToString() + "\nDetails: " + ex.Message;
                        throw new Exception(msg);
                    }


                    if (!massTag.ContainsMods && String.IsNullOrEmpty(massTag.EmpiricalFormula))
                    {
                        massTag.EmpiricalFormula = massTag.GetEmpiricalFormulaFromTargetCode();
                    }


                    var massTagMassInfoMissing = (Math.Abs(massTag.MonoIsotopicMass - 0) < double.Epsilon);

                    var chargeStateInfoIsAvailable = massTag.ChargeState != 0;
                    if (massTagMassInfoMissing)
                    {
                        if (!String.IsNullOrEmpty(massTag.EmpiricalFormula))
                        {
                            massTag.MonoIsotopicMass =
                                EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(
                                    massTag.EmpiricalFormula);

                            if (chargeStateInfoIsAvailable)
                            {
                                massTag.MZ = massTag.MonoIsotopicMass / massTag.ChargeState + Globals.PROTON_MASS;
                            }
                        }
                    }


                    if (!chargeStateInfoIsAvailable)
                    {
                        double minMZToConsider = 400;
                        double maxMZToConsider = 1500;

                        var targetList = new List <PeptideTarget>();

                        for (var chargeState = 1; chargeState < 100; chargeState++)
                        {
                            var calcMZ = massTag.MonoIsotopicMass / chargeState + Globals.PROTON_MASS;
                            if (calcMZ > minMZToConsider && calcMZ < maxMZToConsider)
                            {
                                var copiedMassTag = new PeptideTarget(massTag);    //we need to create multiple mass tags
                                copiedMassTag.ChargeState = (short)chargeState;
                                copiedMassTag.MZ          = calcMZ;

                                targetList.Add(copiedMassTag);
                            }
                        }

                        data.TargetList.AddRange(targetList.Take(3));
                    }
                    else
                    {
                        data.TargetList.Add(massTag);
                    }
                }
            }

            foreach (PeptideTarget peptideTarget in data.TargetList)
            {
                //bool noNormalizedElutionTimeInfoAvailable = Math.Abs(peptideTarget.NormalizedElutionTime - -1) < Single.Epsilon;
                //if (noNormalizedElutionTimeInfoAvailable)
                //{
                //    peptideTarget.NormalizedElutionTime = 0.5f;
                //}
            }



            return(data);
        }
Exemple #16
0
        public void TestGetEmpiricalFormulaForModifiedPeptideSequences()
        {
            var pyroglutamateCodes = new Dictionary <string, string>
            {
                {
                    "M.QVYAMRRLKQWLVGSYQTDNSAFVPYDRTLLWFTFGLAVVGFVMVTSASMPVGQRLAEDPFLFAKRDGIYMIVALCLALVTMRVPMAVWQRYSSLMLFGSILLLLVVLAVGSSVNGASRWIAFGPLRIQPAELSKLALFCYLSSYLVRKVEEVRNNFWGFCKPMGVMLILAVLLLLQPDLGTVVVLFVTTLALLFLAGAKIWQFLAIIGTGIAAVVMLIIVEPYRVRRITSFLEPWEDPFGSGYQLTQSLMAFGRGDLLGQGLGNSVQKLEYLPEAHTDFIFSILAEELGYIGVVLVLLMVFFIAFRAMQIGRRALLLDQRFSGFLACSIGIWFTFQTLVNVGAAAGML.P",
                    "M.(Q)[-17.03]VYAMRRLKQWLVGSYQTDNSAFVPYDRTLLWFTFGLAVVGFVMVTSASMPVGQRLAEDPFLFAKRDGIYMIVALCLALVTMRVPMAVWQRYSSLMLFGSILLLLVVLAVGSSVNGASRWIAFGPLRIQPAELSKLALFCYLSSYLVRKVEEVRNNFWGFCKPMGVMLILAVLLLLQPDLGTVVVLFVTTLALLFLAGAKIWQFLAIIGTGIAAVVMLIIVEPYRVRRITSFLEPWEDPFGSGYQLTQSLMAFGRGDLLGQGLGNSVQKLEYLPEAHTDFIFSILAEELGYIGVVLVLLMVFFIAFRAMQIGRRALLLDQRFSGFLACSIGIWFTFQTLVNVGAAAGML.P"
                },
                {
                    ".QFHIYHSNQLSLLKSLMVHFMQNRPLSSPFEQEVILVQSPGMSQWLQIQLAESLGIAANIRYPLPATFIWEMFTRVLSGIPKESAFSKDAMTWKLMALLPNYLDDPAFKPLRHYLKDDEDKRKLHQLAGRVADLFDQYLVYRPDWLSAWENDQLIDGLSDNQYWQKTLWLALQRYTEDLAQPKWHRANLYQQFISTLNDAPVGALAHCFPSRIFICGISALPQVYLQALQAIGRHTEIYLLFTNPCRYYWGDIQDPKFLARLNSRKPRHYQQLHELPWFKDEQNASTLFNEEGEQNVGNPLLASWGKLGKDNLYFLSELEYSDVLDAFVDIPRD.N",
                    ".(Q)[-17.03]FHIYHSNQLSLLKSLMVHFMQNRPLSSPFEQEVILVQSPGMSQWLQIQLAESLGIAANIRYPLPATFIWEMFTRVLSGIPKESAFSKDAMTWKLMALLPNYLDDPAFKPLRHYLKDDEDKRKLHQLAGRVADLFDQYLVYRPDWLSAWENDQLIDGLSDNQYWQKTLWLALQRYTEDLAQPKWHRANLYQQFISTLNDAPVGALAHCFPSRIFICGISALPQVYLQALQAIGRHTEIYLLFTNPCRYYWGDIQDPKFLARLNSRKPRHYQQLHELPWFKDEQNASTLFNEEGEQNVGNPLLASWGKLGKDNLYFLSELEYSDVLDAFVDIPRD.N"
                },
                {
                    ".QRFKLWVFISLCLHASLVAAAILYVVEDKPIAPEPISIQMLAFAADEPVGEPEPVVEEVTPPEPEPVVEPEPEPEPEPIPDVKPVIEKPIEKKPEPKPKPKPKPVEKPKPPVERPQQQPLA.L",
                    ".(QRFKLW)[-17.03]VFISLCLHASLVAAAILYVVEDKPIAPEPISIQMLAFAADEPVGEPEPVVEEVTPPEPEPVVEPEPEPEPEPIPDVKPVIEKPIEKKPEPKPKPKPKPVEKPKPPVERPQQQPLA.L"
                }
            };

            var acetylationCodes = new Dictionary <string, string>
            {
                {
                    "M.SVYAMRRLKQWLVGSYQTDNSAFVPYDRTLLWFTFGLAVVGFVMVTSASMPVGQRLAEDPFLFAKRDGIYMIVALCLALVTMRVPMAVWQRYSSLMLFGSILLLLVVLAVGSSVNGASRWIAFGPLRIQPAELSKLALFCYLSSYLVRKVEEVRNNFWGFCKPMGVMLILAVLLLLQPDLGTVVVLFVTTLALLFLAGAKIWQFLAIIGTGIAAVVMLIIVEPYRVRRITSFLEPWEDPFGSGYQLTQSLMAFGRGDLLGQGLGNSVQKLEYLPEAHTDFIFSILAEELGYIGVVLVLLMVFFIAFRAMQIGRRALLLDQRFSGFLACSIGIWFTFQTLVNVGAAAGML.P",
                    "M.(S)[42.01]VYAMRRLKQWLVGSYQTDNSAFVPYDRTLLWFTFGLAVVGFVMVTSASMPVGQRLAEDPFLFAKRDGIYMIVALCLALVTMRVPMAVWQRYSSLMLFGSILLLLVVLAVGSSVNGASRWIAFGPLRIQPAELSKLALFCYLSSYLVRKVEEVRNNFWGFCKPMGVMLILAVLLLLQPDLGTVVVLFVTTLALLFLAGAKIWQFLAIIGTGIAAVVMLIIVEPYRVRRITSFLEPWEDPFGSGYQLTQSLMAFGRGDLLGQGLGNSVQKLEYLPEAHTDFIFSILAEELGYIGVVLVLLMVFFIAFRAMQIGRRALLLDQRFSGFLACSIGIWFTFQTLVNVGAAAGML.P"
                },
                {
                    ".MFHIYHSNQLSLLKSLMVHFMQNRPLSSPFEQEVILVQSPGMSQWLQIQLAESLGIAANIRYPLPATFIWEMFTRVLSGIPKESAFSKDAMTWKLMALLPNYLDDPAFKPLRHYLKDDEDKRKLHQLAGRVADLFDQYLVYRPDWLSAWENDQLIDGLSDNQYWQKTLWLALQRYTEDLAQPKWHRANLYQQFISTLNDAPVGALAHCFPSRIFICGISALPQVYLQALQAIGRHTEIYLLFTNPCRYYWGDIQDPKFLARLNSRKPRHYQQLHELPWFKDEQNASTLFNEEGEQNVGNPLLASWGKLGKDNLYFLSELEYSDVLDAFVDIPRD.N",
                    ".(M)[42.01]FHIYHSNQLSLLKSLMVHFMQNRPLSSPFEQEVILVQSPGMSQWLQIQLAESLGIAANIRYPLPATFIWEMFTRVLSGIPKESAFSKDAMTWKLMALLPNYLDDPAFKPLRHYLKDDEDKRKLHQLAGRVADLFDQYLVYRPDWLSAWENDQLIDGLSDNQYWQKTLWLALQRYTEDLAQPKWHRANLYQQFISTLNDAPVGALAHCFPSRIFICGISALPQVYLQALQAIGRHTEIYLLFTNPCRYYWGDIQDPKFLARLNSRKPRHYQQLHELPWFKDEQNASTLFNEEGEQNVGNPLLASWGKLGKDNLYFLSELEYSDVLDAFVDIPRD.N"
                },
                {
                    ".MRFKLWVFISLCLHASLVAAAILYVVEDKPIAPEPISIQMLAFAADEPVGEPEPVVEEVTPPEPEPVVEPEPEPEPEPIPDVKPVIEKPIEKKPEPKPKPKPKPVEKPKPPVERPQQQPLA.L",
                    ".(MRFKLW)[42.01]VFISLCLHASLVAAAILYVVEDKPIAPEPISIQMLAFAADEPVGEPEPVVEEVTPPEPEPVVEPEPEPEPEPIPDVKPVIEKPIEKKPEPKPKPKPKPVEKPKPPVERPQQQPLA.L"
                }
            };

            var msAlignImporter = new MassTagFromMSAlignFileImporter(String.Empty);
            var peptideUtils    = new PeptideUtils();

            Console.WriteLine("pyroglutamate");
            foreach (var pair in pyroglutamateCodes)
            {
                string empiricalFormula        = peptideUtils.GetEmpiricalFormulaForPeptideSequence(pair.Key);
                string empiricalFormulaWithMod = msAlignImporter.GetEmpiricalFormulaForSequenceWithMods(pair.Value);

                double monoMass        = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula);
                double monoMassWithMod = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormulaWithMod);
                double diff            = monoMassWithMod - monoMass;

                Console.WriteLine(empiricalFormula + "(" + monoMass + ")\t" + empiricalFormulaWithMod + "(" + monoMassWithMod + "); diff=" + diff);
                Assert.AreEqual(-17, Math.Round(diff, 0, MidpointRounding.AwayFromZero));
            }

            Console.WriteLine("acetylation");
            foreach (var pair in acetylationCodes)
            {
                string empiricalFormula        = peptideUtils.GetEmpiricalFormulaForPeptideSequence(pair.Key);
                string empiricalFormulaWithMod = msAlignImporter.GetEmpiricalFormulaForSequenceWithMods(pair.Value);

                double monoMass        = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula);
                double monoMassWithMod = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormulaWithMod);
                double diff            = monoMassWithMod - monoMass;

                Console.WriteLine(empiricalFormula + "(" + monoMass + ")\t" + empiricalFormulaWithMod + "(" + monoMassWithMod + "); diff=" + diff);
                Assert.AreEqual(42, Math.Round(diff, 0, MidpointRounding.AwayFromZero));
            }
        }
        public override void LoadAndInitializeTargets(string targetsFilePath)
        {
            if (string.IsNullOrEmpty(targetsFilePath))
            {
                IqLogger.Log.Info("IqMassAndNetAligner - no alignment targets were loaded. The inputted targets file path is NULL.");
                return;
            }

            if (!File.Exists(targetsFilePath))
            {
                IqLogger.Log.Info("IqMassAndNetAligner - no alignment targets were loaded. The inputted targets file path is does not exist.");
                return;
            }

            var importer = new IqTargetsFromFirstHitsFileImporter(targetsFilePath);

            Targets = importer.Import().Where(p => p.QualityScore < 0.01).OrderBy(p => p.ID).ToList();

            //Targets = Targets.Where(p => p.Code.Contains("FEQDGENYTGTIDGNMGAYAR")).ToList();

            var filteredList = new List <IqTarget>();

            //calculate empirical formula for targets using Code and then monoisotopic mass

            foreach (var iqTarget in Targets)
            {
                iqTarget.Code = _peptideUtils.CleanUpPeptideSequence(iqTarget.Code);

                if (_peptideUtils.ValidateSequence(iqTarget.Code))
                {
                    iqTarget.EmpiricalFormula = _peptideUtils.GetEmpiricalFormulaForPeptideSequence(iqTarget.Code, true, true);
                    var calcMonoMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(iqTarget.EmpiricalFormula);
                    var monoMassFromFirstHitsFile = iqTarget.MonoMassTheor;

                    var massCalculationsAgree = Math.Abs(monoMassFromFirstHitsFile - calcMonoMass) < 0.02;
                    if (massCalculationsAgree)
                    {
                        iqTarget.MonoMassTheor    = calcMonoMass;
                        iqTarget.ElutionTimeTheor = iqTarget.ScanLC / (double)Run.MaxLCScan;

                        filteredList.Add(iqTarget);
                        _targetUtilities.UpdateTargetMissingInfo(iqTarget, true);

                        var chargeStateTarget = new IqTargetMsgfFirstHit();

                        _targetUtilities.CopyTargetProperties(iqTarget, chargeStateTarget);

                        iqTarget.AddTarget(chargeStateTarget);
                    }
                }
            }

            filteredList = (from n in filteredList
                            group n by new
            {
                n.Code,
                n.ChargeState
            }
                            into grp
                            select grp.OrderBy(p => p.QualityScore).First()
                            ).ToList();


            Targets = filteredList;

            TargetedWorkflowParameters workflowParameters = new BasicTargetedWorkflowParameters();

            workflowParameters.ChromNETTolerance = 0.005;
            workflowParameters.ChromGenTolerance = 50;

            //define workflows for parentTarget and childTargets
            var parentWorkflow = new ChromPeakDeciderIqWorkflow(Run, workflowParameters);
            var childWorkflow  = new ChargeStateChildIqWorkflow(Run, workflowParameters);

            var workflowAssigner = new IqWorkflowAssigner();

            workflowAssigner.AssignWorkflowToParent(parentWorkflow, Targets);
            workflowAssigner.AssignWorkflowToChildren(childWorkflow, Targets);


            if (Targets.Count > 0)
            {
                IqLogger.Log.Info("IqMassAndNetAligner - Loaded " + Targets.Count + " targets for use in mass and net alignment");
            }
            else
            {
                IqLogger.Log.Info("IqMassAndNetAligner - NOTE - no targets have been loaded.");
            }



            //IqWorkflowAssigner workflowAssigner = new IqWorkflowAssigner();
            //workflowAssigner.AssignWorkflowToParent(workflow, Targets);
        }
        public TargetCollection Import(out Dictionary <int, PrsmData> prsmData)
        {
            StreamReader reader;

            prsmData = new Dictionary <int, PrsmData>();

            if (!File.Exists(_filename))
            {
                throw new FileNotFoundException("Input file not found: " + _filename);
            }

            try
            {
                reader = new StreamReader(_filename);
            }
            catch (Exception ex)
            {
                throw new IOException("There was a problem importing from the file.", ex);
            }

            var targets = new TargetCollection();

            // Group LcmsFeatureTargets by their code
            var proteinSpeciesGroups = new Dictionary <string, List <LcmsFeatureTarget> >();

            using (var sr = reader)
            {
                if (sr.Peek() == -1)
                {
                    sr.Close();
                    throw new InvalidDataException("There is no data in the file we are trying to read.");
                }

                var columnHeaders = sr.ReadLine().Split('\t').ToList();
                var columnMapping = GetColumnMapping(columnHeaders);

                var lineCounter = 0; //used for tracking which line is being processed.

                //read and process each line of the file
                while (sr.Peek() > -1)
                {
                    ++lineCounter;
                    _dataRowsProcessed = lineCounter;

                    var processedData = sr.ReadLine().Split('\t').ToList();

                    //ensure that processed line is the same size as the header line
                    if (processedData.Count != columnHeaders.Count)
                    {
                        throw new InvalidDataException("Data in row #" + lineCounter.ToString(CultureInfo.InvariantCulture) +
                                                       "is invalid - \nThe number of columns does not match that of the header line");
                    }

                    // Get Prsm_ID
                    int prsmId;
                    if (!int.TryParse(processedData[columnMapping[PRSM_ID_HEADER]], out prsmId))
                    {
                        throw new InvalidDataException("Could not parse Prsm ID.");
                    }

                    // Get scan
                    int scanLcTarget;
                    if (!int.TryParse(processedData[columnMapping[SCAN_HEADER]], out scanLcTarget))
                    {
                        throw new InvalidDataException("Could not parse scan number.");
                    }

                    // Get charge state
                    short chargeState;
                    if (!short.TryParse(processedData[columnMapping[CHARGE_HEADER]], out chargeState))
                    {
                        throw new InvalidDataException("Could not parse charge.");
                    }

                    // Get code
                    var code = processedData[columnMapping[PEPTIDE_HEADER]];

                    string empiricalFormula;
                    // Modified species, try to get empirical formula
                    if (code.Contains("("))
                    {
                        empiricalFormula = GetEmpiricalFormulaForSequenceWithMods(code);
                        // Unknown modification in sequence, skip
                        if (String.IsNullOrEmpty(empiricalFormula))
                        {
                            ++_dataRowsSkippedUnknownMods;
                            continue;
                        }
                    }
                    else
                    {
                        empiricalFormula = _peptideUtils.GetEmpiricalFormulaForPeptideSequence(code);
                    }

                    // Get monoisotopic mass
                    var monoisotopicMass = EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(empiricalFormula);

                    // Get Protein_mass
                    double proteinMass;
                    if (!double.TryParse(processedData[columnMapping[PROTEIN_MASS_HEADER]], out proteinMass))
                    {
                        throw new InvalidDataException("Could not parse protein mass.");
                    }

                    // Get protein name
                    var proteinName = processedData[columnMapping[PROTEIN_NAME_HEADER]];

                    // Get score
                    double eValueDbl;
                    if (!double.TryParse(processedData[columnMapping[E_VALUE_HEADER]], out eValueDbl))
                    {
                        if (processedData[columnMapping[E_VALUE_HEADER]].ToLower() == "Infinity")
                        {
                            eValueDbl = float.MaxValue;
                        }
                        else
                        {
                            throw new InvalidDataException("Could not parse e-value.");
                        }
                    }

                    float eValue;
                    if (eValueDbl > float.MaxValue)
                    {
                        eValue = float.MaxValue;
                    }
                    else
                    {
                        eValue = (float)eValueDbl;
                    }

                    // Make Prsm
                    prsmData.Add(prsmId, new PrsmData {
                        ProteinMass = proteinMass, ProteinName = proteinName, EValue = eValue
                    });

                    // Create target
                    var target = new LcmsFeatureTarget
                    {
                        FeatureToMassTagID = prsmId,
                        ID = -1,
                        ElutionTimeUnit  = DeconTools.Backend.Globals.ElutionTimeUnit.ScanNum,
                        ScanLCTarget     = scanLcTarget,
                        Code             = code,
                        EmpiricalFormula = empiricalFormula,
                        MonoIsotopicMass = monoisotopicMass,
                        ChargeState      = chargeState,
                        MZ = monoisotopicMass / chargeState + DeconTools.Backend.Globals.PROTON_MASS,
                    };

                    if (!proteinSpeciesGroups.ContainsKey(code))
                    {
                        proteinSpeciesGroups.Add(code, new List <LcmsFeatureTarget>());
                    }
                    proteinSpeciesGroups[code].Add(target);

                    /*
                     * // Create range
                     * const int maxOffset = 0;
                     *
                     * for (short offset = -maxOffset; offset <= maxOffset; offset++)
                     * {
                     *  // Create new target
                     *  var newCharge = (short) (chargeState + offset);
                     *  var targetCopy = new LcmsFeatureTarget(target)
                     *  {
                     *      ID = ++idCounter,
                     *      ChargeState = newCharge,
                     *      MZ = target.MonoIsotopicMass / newCharge + DeconTools.Backend.Globals.PROTON_MASS
                     *  };
                     *  // Add target
                     *  targets.TargetIDList.Add(idCounter);
                     *  targets.TargetList.Add(targetCopy);
                     * }
                     */
                }
                sr.Close();
            }

            // Loop through each protein species group and add in the missing charge states
            var idCounter = 0;

            foreach (var keyValuePair in proteinSpeciesGroups)
            {
                var targetGroup = AddChargeStates(keyValuePair.Value, EXTEND_CHARGE_RANGE);

                var chargeStates = new List <short> {
                    0
                };
                // Add all targets in group and IDs to list
                foreach (var target in targetGroup)
                {
                    if (!chargeStates.Contains(target.ChargeState))
                    {
                        chargeStates.Add(target.ChargeState);
                        target.ID = ++idCounter;
                        targets.TargetList.Add(target);
                        targets.TargetIDList.Add(idCounter);
                    }
                }
            }

            return(targets);
        }
        protected virtual void UpdateTargetMissingInfo()
        {
            var canUseReferenceMassTags = MassTagsForReference != null && MassTagsForReference.TargetList.Count > 0;

            var massTagIDsAvailableForLookup = new List <int>();

            if (canUseReferenceMassTags)
            {
                massTagIDsAvailableForLookup = MassTagsForReference.TargetList.Select(p => p.ID).ToList();
            }



            foreach (LcmsFeatureTarget target in Targets.TargetList)
            {
                var isMissingMonoMass = target.MonoIsotopicMass <= 0;

                if (String.IsNullOrEmpty(target.EmpiricalFormula))
                {
                    if (canUseReferenceMassTags && massTagIDsAvailableForLookup.Contains(target.FeatureToMassTagID))
                    {
                        var mt = MassTagsForReference.TargetList.First(p => p.ID == target.FeatureToMassTagID);

                        //in DMS, Sequest will put an 'X' when it can't differentiate 'I' and 'L'
                        //  see:   \\gigasax\DMS_Parameter_Files\Sequest\sequest_ETD_N14_NE.params
                        //To create the theoretical isotopic profile, we will change the 'X' to 'L'
                        if (mt.Code.Contains("X"))
                        {
                            mt.Code             = mt.Code.Replace('X', 'L');
                            mt.EmpiricalFormula = mt.GetEmpiricalFormulaFromTargetCode();
                        }

                        target.Code             = mt.Code;
                        target.EmpiricalFormula = mt.EmpiricalFormula;
                    }
                    else if (!String.IsNullOrEmpty(target.Code))
                    {
                        //Create empirical formula based on code. Assume it is an unmodified peptide

                        if (target.Code.Contains("X"))
                        {
                            target.Code = target.Code.Replace('X', 'L');
                        }

                        target.EmpiricalFormula = new PeptideUtils().GetEmpiricalFormulaForPeptideSequence(target.Code);
                    }
                    else
                    {
                        if (isMissingMonoMass)
                        {
                            throw new ApplicationException(
                                      "Trying to prepare target list, but Target is missing both the 'Code' and the Monoisotopic Mass. One or the other is needed.");
                        }
                        target.Code             = "AVERAGINE";
                        target.EmpiricalFormula =
                            IsotopicDistributionCalculator.GetAveragineFormulaAsString(target.MonoIsotopicMass, false);
                    }
                }


                if (isMissingMonoMass)
                {
                    target.MonoIsotopicMass =
                        EmpiricalFormulaUtilities.GetMonoisotopicMassFromEmpiricalFormula(target.EmpiricalFormula);



                    target.MZ = target.MonoIsotopicMass / target.ChargeState + DeconTools.Backend.Globals.PROTON_MASS;
                }
            }
        }