/// <summary> /// Get a heavy sequence given a sequence and list of heavy peptide modifications. /// </summary> /// <param name="sequence">The sequence to convert to heavy sequence.</param> /// <param name="mods">The heavy peptide modifications.</param> /// <returns>Sequence with heavy peptide modifications.</returns> public static Sequence GetHeavySequence(Sequence sequence, SearchModification[] mods) { sequence = new Sequence(sequence); if (sequence.Count == 0) { return(sequence); } foreach (var mod in mods) { if (mod.Location == SequenceLocation.PeptideNTerm || mod.Location == SequenceLocation.ProteinNTerm) { sequence[0] = new ModifiedAminoAcid(sequence[0], mod.Modification); } else if (mod.Location == SequenceLocation.PeptideCTerm || mod.Location == SequenceLocation.ProteinCTerm) { sequence[sequence.Count - 1] = new ModifiedAminoAcid(sequence[sequence.Count - 1], mod.Modification); } else { for (var i = 0; i < sequence.Count; i++) { if (sequence[i].Residue == mod.TargetResidue) { sequence[i] = new ModifiedAminoAcid(sequence[i], mod.Modification); } } } } return(sequence); }
/// <summary> /// Parse a CLEAN sequence (containing no pre/post residues or modifications). /// </summary> /// <param name="sequenceText">The clean sequence.</param> /// <param name="modInfo">The modification info for the sequence.</param> /// <returns>The parsed sequence.</returns> private Sequence ParseSequence(string sequenceText, List <clsAminoAcidModInfo> modInfo) { var sequenceReader = new SequenceReader(); var sequence = sequenceReader.Read(sequenceText); foreach (var mod in modInfo) { if (mod.AmbiguousMod) { continue; } var location = mod.ResidueLocInPeptide - 1; var aminoAcid = sequence[location]; var modification = TryGetExistingModification( mod.ModDefinition.MassCorrectionTag, mod.ModDefinition.ModificationMass); if (modification == null) { // could not find existing modification modification = new Modification(0, mod.ModDefinition.ModificationMass, mod.ModDefinition.MassCorrectionTag); Modifications.Add(modification); } sequence[location] = new ModifiedAminoAcid(aminoAcid, modification); } // Force it to recalculate mass now that the modifications have been added. sequence = new Sequence(sequence); return(sequence); }
/// <summary> /// Update the <see cref="ModificationSymbol" /> based on the modified amino acid. /// </summary> /// <param name="modifiedAminoAcid"> /// The modified amino acid to extract the modification from. /// </param> private void SetModSymbol(ModifiedAminoAcid modifiedAminoAcid) { if (modifiedAminoAcid != null) { var modification = modifiedAminoAcid.Modification; this.ModificationSymbol = modification.Name.Substring(0, Math.Min(2, modification.Name.Length)); } }
private List <Psm> ParseIdFile(string filePath, bool isTarget) { var aminoAcidSet = new AminoAcidSet(); var psms = new List <Psm>(); var headers = new Dictionary <string, int>(); int count = 0; foreach (var line in File.ReadLines(filePath)) { var parts = line.Split('\t'); if (parts.Length < 18) { continue; } if (count++ == 0) { for (int i = 0; i < parts.Length; i++) { headers.Add(parts[i], i); } continue; } int scan = Convert.ToInt32(parts[headers["Scan"]]); int charge = Convert.ToInt32(parts[headers["Charge"]]); Sequence cleanSeq = new Sequence(parts[headers["Sequence"]], aminoAcidSet); string modsString = parts[headers["Modifications"]]; var mods = modsString.Split(','); foreach (var mod in mods) { var modParts = mod.Split(' '); if (modParts.Length < 2) { continue; } string name = modParts[0]; int index = Math.Min(Convert.ToInt32(modParts[1]), cleanSeq.Count - 1); cleanSeq[index] = new ModifiedAminoAcid(cleanSeq[index], Modification.Get(name)); } var sequence = new Sequence(cleanSeq); psms.Add(new Psm(scan, charge, sequence, isTarget)); } return(psms); }
/// <summary> /// Convert the sequence information from the external types to the internal types /// </summary> /// <param name="peptide"></param> /// <returns></returns> public static Sequence GetIpSequence(this SimpleMZIdentMLReader.PeptideRef peptide) { var aminoAcidSet = new AminoAcidSet(); var sequence = new Sequence(peptide.Sequence, aminoAcidSet); foreach (var mod in peptide.Mods) { var seqIndex = Math.Max(0, mod.Key - 1); var mzidMod = mod.Value; var modification = Modification.Get(mzidMod.Tag, mzidMod.Mass); sequence[seqIndex] = new ModifiedAminoAcid(sequence[seqIndex], modification); } return(new Sequence(sequence)); }
public static AminoAcid[] GetExtendedAminoAcidArray(AminoAcidSet aaSet) { var ret = new List <AminoAcid>(); var modParam = aaSet.GetModificationParams(); var aminoAcidArray = AminoAcid.StandardAminoAcidArr; foreach (var aa in aminoAcidArray) { ret.Add(aa); foreach (var modIndex in aaSet.GetModificationIndices(aa.Residue, SequenceLocation.Everywhere)) { var aa2 = new ModifiedAminoAcid(aa, modParam.GetModification(modIndex)); ret.Add(aa2); } } return(ret.ToArray()); }
/// <summary> /// Precompute edges for the scoring graph. /// </summary> /// <param name="aminoAcidSet">Amino acid set to build the graph edges from.</param> /// <param name="aminoAcidProbabilities">The amino acid probabilities.</param> /// <returns>A list of all scoring graph edges.</returns> private List <FlipScoringGraphEdge> InitEdges(AminoAcidSet aminoAcidSet, Dictionary <char, double> aminoAcidProbabilities) { var adjList = new LinkedList <FlipScoringGraphEdge> [this.massBins.NumberOfBins]; for (var i = 0; i < this.massBins.NumberOfBins; i++) { adjList[i] = new LinkedList <FlipScoringGraphEdge>(); } var terminalModifications = FilteredProteinMassBinning.GetTerminalModifications(aminoAcidSet); var aminoAcidArray = FilteredProteinMassBinning.GetExtendedAminoAcidArray(aminoAcidSet); for (var i = 0; i < this.massBins.NumberOfBins; i++) { var mi = this.massBins.GetMass(i); var fineNodeMass = mi; foreach (var aa in aminoAcidArray) { var j = this.massBins.GetBinNumber(fineNodeMass + aa.Mass); if (j < 0 || j >= this.massBins.NumberOfBins) { continue; } var aaWeight = aminoAcidProbabilities.ContainsKey(aa.Residue) ? Math.Log10(aminoAcidProbabilities[aa.Residue]) : 0; adjList[j].AddLast(new FlipScoringGraphEdge(i, j, aaWeight, aa, null)); if (i == 0 && !(aa is ModifiedAminoAcid)) { foreach (var terminalMod in terminalModifications) { var modifiedAa = new ModifiedAminoAcid(aa, terminalMod); j = this.massBins.GetBinNumber(fineNodeMass + modifiedAa.Mass); if (j < 0 || j >= this.massBins.NumberOfBins) { continue; } adjList[j].AddLast(new FlipScoringGraphEdge(i, j, aaWeight, modifiedAa, null)); } } } } return(adjList.SelectMany(edge => edge).ToList()); }
public void TestITraqMod() { var aminoAcidSet = new AminoAcidSet(); var p = aminoAcidSet.GetAminoAcid('P'); var a = aminoAcidSet.GetAminoAcid('A'); var q = aminoAcidSet.GetAminoAcid('Q'); var itraqMod = Modification.Itraq4Plex; Console.WriteLine(itraqMod.Mass); var modp = new ModifiedAminoAcid(p, itraqMod); var sequence = new Sequence(new List <AminoAcid> { modp, a, q }); Console.WriteLine(sequence.Mass); }
public ProteinScoringGraphFactory(IMassBinning comparer, AminoAcidSet aminoAcidSet) { _comparer = comparer; _adjList = new LinkedList <ScoringGraphEdge> [_comparer.NumberOfBins]; for (var i = 0; i < _comparer.NumberOfBins; i++) { _adjList[i] = new LinkedList <ScoringGraphEdge>(); } var terminalModifications = FilteredProteinMassBinning.GetTerminalModifications(aminoAcidSet); var aminoAcidArray = FilteredProteinMassBinning.GetExtendedAminoAcidArray(aminoAcidSet); for (var i = 0; i < _comparer.NumberOfBins; i++) { var mi = _comparer.GetMass(i); var fineNodeMass = mi; for (var a = 0; a < aminoAcidArray.Length; a++) { var aa = aminoAcidArray[a]; var j = _comparer.GetBinNumber(fineNodeMass + aa.Mass); if (j < 0 || j >= _comparer.NumberOfBins) { continue; } _adjList[j].AddLast(new ScoringGraphEdge(i)); if (i == 0 && !(aa is ModifiedAminoAcid)) { foreach (var terminalMod in terminalModifications) { var modifiedAa = new ModifiedAminoAcid(aa, terminalMod); j = _comparer.GetBinNumber(fineNodeMass + modifiedAa.Mass); if (j < 0 || j >= _comparer.NumberOfBins) { continue; } _adjList[j].AddLast(new ScoringGraphEdge(i)); } } } } }
public FilteredProteinMassBinning(AminoAcidSet aaSet, double maxProteinMass = 50000, int numBits = 27) { // _aminoAcidSet = aaSet; var terminalModifications = GetTerminalModifications(aaSet); var extendedAminoAcidArray = GetExtendedAminoAcidArray(aaSet); MaxMass = maxProteinMass; MinMass = MaxMass; foreach (var aa in extendedAminoAcidArray) { if (aa.Mass < MinMass) { MinMass = aa.Mass; } foreach (var mod in terminalModifications) { var modAa = new ModifiedAminoAcid(aa, mod); if (modAa.Mass < MinMass) { MinMass = modAa.Mass; } } } _mzComparer = new MzComparerWithBinning(numBits); _minMzBinIndex = _mzComparer.GetBinNumber(MinMass); _maxMzBinIndex = _mzComparer.GetBinNumber(MaxMass); var numberOfMzBins = _maxMzBinIndex - _minMzBinIndex + 2; // pad zero mass bin _mzBinToFilteredBinMap = new int[numberOfMzBins]; for (var i = 0; i < numberOfMzBins; i++) { _mzBinToFilteredBinMap[i] = -1; } var tempMap = new int[numberOfMzBins]; // ReSharper disable once UseObjectOrCollectionInitializer var fineNodes = new BitArray(Constants.GetBinNumHighPrecision(MaxMass)); fineNodes[0] = true; var effectiveBinCounter = 0; for (var fineBinIdx = 0; fineBinIdx < fineNodes.Length; fineBinIdx++) { if (!fineNodes[fineBinIdx]) { continue; } var fineNodeMass = fineBinIdx / Constants.RescalingConstantHighPrecision; foreach (var aa in extendedAminoAcidArray) { var validFineNodeIndex = Constants.GetBinNumHighPrecision(fineNodeMass + aa.Mass); if (validFineNodeIndex >= fineNodes.Length) { break; } fineNodes[validFineNodeIndex] = true; if (fineBinIdx == 0 && !(aa is ModifiedAminoAcid)) // include terminal modifications { foreach (var terminalMod in terminalModifications) { var modifiedAa = new ModifiedAminoAcid(aa, terminalMod); validFineNodeIndex = Constants.GetBinNumHighPrecision(fineNodeMass + modifiedAa.Mass); if (validFineNodeIndex >= fineNodes.Length) { break; } fineNodes[validFineNodeIndex] = true; } } } /*foreach (var m in massList) * { * var validFineNodeIndex = Constants.GetBinNumHighPrecision(fineNodeMass + m); * if (validFineNodeIndex >= fineNodes.Length) break; * fineNodes[validFineNodeIndex] = true; * }*/ var binNum = _mzComparer.GetBinNumber(fineNodeMass); if (fineBinIdx == 0 || (binNum >= _minMzBinIndex && binNum <= _maxMzBinIndex && _mzBinToFilteredBinMap[binNum - _minMzBinIndex + 1] < 0)) { _mzBinToFilteredBinMap[binNum == 0 ? 0 : binNum - _minMzBinIndex + 1] = effectiveBinCounter; tempMap[effectiveBinCounter] = binNum; effectiveBinCounter++; } } _filteredBinToMzBinMap = new int[effectiveBinCounter]; Array.Copy(tempMap, _filteredBinToMzBinMap, effectiveBinCounter); }