public void Correct(string prefix, IProteinSet proteinSet) { int count = 0; for (int i = 0; i < proteinIndex.Length; i++) { if (T07SearchEngineEnhancement.IsReverseProtein(proteinSet.GetName(proteinIndex[i]), prefix)) { count++; } } if (count == 0 || count == proteinIndex.Length) { return; } List <int> result = new List <int>(); for (int i = 0; i < proteinIndex.Length; i++) { if (!proteinSet.GetName(proteinIndex[i]).StartsWith(prefix)) { result.Add(proteinIndex[i]); } } proteinIndex = result.ToArray(); }
public void ApplyFixedModifications(Modification[] modifications, IProteinSet proteinSet, string sequence) { PeptideModificationCounts modCounts; monoisotopicMass += ApplyFixedModifications(modifications, sequence, IsNterm(), IsCterm(proteinSet), length, out modCounts); fixedModifications = modCounts; }
public string[] GetProteinIds(IProteinSet proteinSet) { string[] result = new string[proteinIndex.Length]; for (int i = 0; i < result.Length; i++) { result[i] = proteinSet.GetName(proteinIndex[i]); } return(result); }
public char GetResidueBefore(int index, IProteinSet proteinSet) { int pos = proteinOffsets[index] - 1; if (pos < 0) { return(' '); } return(proteinSet.Get(proteinIndices[index]).Sequence[pos]); }
public static void FdrThresholding(string[] rawFiles, string decoyPrefix, double peptideFdr, double peptidePep, MascotQueryType type, bool perFileThreshold, bool debug, HashSet <string> sequences, bool onlyCollectSequences, IProteinSet proteinSet, HashSet <string> labelModificationSet, IIdentificationProvider ip) { CalcFdr(rawFiles, decoyPrefix, type, proteinSet, debug, labelModificationSet, ip); double pepThreshVal; double[] pepThreshVals = new double[rawFiles.Length]; if (peptideFdr < 1) { for (int i = 0; i < rawFiles.Length; i++) { pepThreshVals[i] = FindReverseHitThresholdValue(ip.GetIdentifications(rawFiles[i], type), peptideFdr, decoyPrefix, proteinSet); ip.Dispose(); if (peptidePep < 1) { pepThreshVals[i] = Math.Min(pepThreshVals[i], peptidePep); } } pepThreshVal = ArrayUtil.Median(pepThreshVals); if (peptidePep < 1) { pepThreshVal = Math.Min(pepThreshVal, peptidePep); } } else { for (int i = 0; i < rawFiles.Length; i++) { pepThreshVals[i] = peptidePep; } pepThreshVal = peptidePep; } if (onlyCollectSequences) { for (int i = 0; i < rawFiles.Length; i++) { CollectSequences(ip.GetIdentifications(rawFiles[i], type), perFileThreshold ? pepThreshVals[i] : pepThreshVal, sequences); ip.Dispose(); } } else { for (int i = 0; i < rawFiles.Length; i++) { ApplyReverseHitThreshold(ip.GetIdentifications(rawFiles[i], type), perFileThreshold ? pepThreshVals[i] : pepThreshVal); ip.Dispose(); } } }
public bool HasOnlyReverseHits(string reverseStr, IProteinSet proteinSet) { for (int i = 0; i < proteinIndex.Length; i++) { if (!T07SearchEngineEnhancement.IsReverseProtein(proteinSet.GetName(proteinIndex[i]), reverseStr)) { return(false); } } return(true); }
public char GetResidueAfter(int index, IProteinSet proteinSet) { int pos = proteinOffsets[index] + length; string seq = proteinSet.Get(proteinIndices[index]).Sequence; if (pos >= seq.Length) { return(' '); } return(seq[pos]); }
private bool IsCterm(IProteinSet proteinSet) { for (int i = 0; i < ProteinCount; i++) { int pos = proteinOffsets[i] + length; int len = proteinSet.GetLength(proteinIndices[i]); if (pos >= len) { return(true); } } return(false); }
public DatabaseModifiedPeptide[] ApplyVariableModifications(Modification[] modifications, Modification[][] lMods, int index, IProteinSet proteinSet) { string sequence = GetSequence(proteinSet); ModifiedPeptide[] result = new ModifiedPeptide[] { CreateNonmodifiedVersion(index, sequence) }; result = ApplyLabelModifications(result, lMods, sequence); for (int i = 0; i < modifications.Length; i++) { result = ApplyVariableModification(result, modifications[i], sequence, proteinSet); } result = FilterEqualMods(result); return(ConvertToDatabasePeptides(result)); }
public int GetOccurenceCount(string residues, IProteinSet proteinSet) { int count = 0; string s = GetSequence(proteinSet); for (int i = 0; i < s.Length; i++) { if (residues.IndexOf(s[i]) != -1) { count++; } } return(count); }
private ModifiedPeptide[] ApplyVariableModification(IEnumerable <ModifiedPeptide> peptides, Modification mod, string sequence, IProteinSet proteinSet) { List <ModifiedPeptide> result = new List <ModifiedPeptide>(); foreach (ModifiedPeptide p in peptides) { ModifiedPeptide[] x = ApplyVariableModification(p, mod, sequence, proteinSet); foreach (ModifiedPeptide y in x) { result.Add(y); } } //if (result.Count > 500000) { // return FilterEqualMods(result.ToArray()); //} else { return(result.ToArray()); //} }
public static void FdrThresholding(string[] rawFiles, string[] recalFiles, string[] nonRecalFiles, string revstring, double peptideFdr, double peptidePep, bool perFileThreshold, bool keepLowScorers, IProteinSet proteinSet, HashSet <string> labelModificationSet, IIdentificationProvider ip, bool writeOut) { HashSet <string> sequences = new HashSet <string>(); if (recalFiles.Length > 0) { FdrThresholding(recalFiles, revstring, peptideFdr, peptidePep, MascotQueryType.Silac, perFileThreshold, writeOut, sequences, keepLowScorers, proteinSet, labelModificationSet, ip); FdrThresholding(recalFiles, revstring, peptideFdr, peptidePep, MascotQueryType.Isotope, perFileThreshold, false, sequences, keepLowScorers, proteinSet, labelModificationSet, ip); } if (nonRecalFiles.Length > 0) { FdrThresholding(nonRecalFiles, revstring, peptideFdr, peptidePep, MascotQueryType.Silac, perFileThreshold, false, sequences, keepLowScorers, proteinSet, labelModificationSet, ip); FdrThresholding(nonRecalFiles, revstring, peptideFdr, peptidePep, MascotQueryType.Isotope, perFileThreshold, false, sequences, keepLowScorers, proteinSet, labelModificationSet, ip); } FdrThresholding(rawFiles, revstring, peptideFdr, peptidePep, MascotQueryType.Peak, perFileThreshold, false, sequences, keepLowScorers, proteinSet, labelModificationSet, ip); if (keepLowScorers) { FilterBySequence(rawFiles, revstring, peptideFdr, peptidePep, MascotQueryType.Silac, perFileThreshold, writeOut, sequences, keepLowScorers, ip); FilterBySequence(rawFiles, revstring, peptideFdr, peptidePep, MascotQueryType.Isotope, perFileThreshold, false, sequences, keepLowScorers, ip); FilterBySequence(rawFiles, revstring, peptideFdr, peptidePep, MascotQueryType.Peak, perFileThreshold, false, sequences, keepLowScorers, ip); } LimitPep(rawFiles, MascotQueryType.Silac, ip); LimitPep(rawFiles, MascotQueryType.Isotope, ip); LimitPep(rawFiles, MascotQueryType.Peak, ip); }
public static double FindReverseHitThresholdValue(Identifications identifications, double totalPeptideFDR, string reverseStr, IProteinSet proteinSet) { int n = identifications.Count; if (n == 0) { return(1); } double[] peps = new double[n]; bool[] correct = new bool[n]; for (int i = 0; i < n; i++) { MascotPeptide p = identifications.GetPeptidesAt(i)[0]; peps[i] = p.Pep; correct[i] = !p.HasOnlyReverseHits(reverseStr, proteinSet); } int[] o = ArrayUtil.Order(peps); double forwardCount = 0; List <double> validPeps = new List <double>(); for (int i = 0; i < n; i++) { int index = o[i]; if (correct[index]) { forwardCount++; } double reverseCount = (i + 1) - forwardCount; if (reverseCount / forwardCount <= totalPeptideFDR) { validPeps.Add(peps[index]); } } if (validPeps.Count > 0) { return(ArrayUtil.Max(validPeps.ToArray())); } return(0); }
public string GetSequence(IProteinSet proteinSet) { string seq = proteinSet.Get(proteinIndices[0]).Sequence; return(seq.Substring(proteinOffsets[0], length)); }
public bool IsHighestScoringCorrect(int index, string revstring, IProteinSet proteinSet) { MascotPeptide p = GetPeptidesAt(index)[0]; return(!p.HasOnlyReverseHits(revstring, proteinSet)); }
private ModifiedPeptide[] ApplyVariableModification(ModifiedPeptide peptide, Modification mod, string s, IProteinSet proteinSet) { help.Clear(); help.Add(peptide); for (int i = 0; i < mod.AaCount; i++) { switch (mod.GetTermType(i)) { case ModificationSiteType.aa: { toBeAdded.Clear(); foreach (ModifiedPeptide w in help) { List <int> indices = new List <int>(); for (int j = 0; j < s.Length; j++) { if (s[j] == mod.GetAaAt(i) && w.modifications.GetModificationAt(j) == ushort.MaxValue) { indices.Add(j); } } for (int j = 1; j <= indices.Count; j++) { ModifiedPeptide q = w.Clone(); q.mass += j * mod.DeltaMass; for (int k = 0; k < j; k++) { q.modifications.SetModificationAt(indices[k], mod.Index); } toBeAdded.Add(q); } } foreach (ModifiedPeptide a in toBeAdded) { help.Add(a); } break; } case ModificationSiteType.nterm: { toBeAdded.Clear(); foreach (ModifiedPeptide w in help) { if (s[0] == mod.GetAaAt(i) && w.modifications.GetNTermModification() == ushort.MaxValue) { ModifiedPeptide q = w.Clone(); q.mass += mod.DeltaMass; q.modifications.SetNTermModification(mod.Index); toBeAdded.Add(q); } } foreach (ModifiedPeptide a in toBeAdded) { help.Add(a); } break; } case ModificationSiteType.cterm: { toBeAdded.Clear(); foreach (ModifiedPeptide w in help) { if (s[s.Length - 1] == mod.GetAaAt(i) && w.modifications.GetCTermModification() == ushort.MaxValue) { ModifiedPeptide q = w.Clone(); q.mass += mod.DeltaMass; q.modifications.SetCTermModification(mod.Index); toBeAdded.Add(q); } } foreach (ModifiedPeptide a in toBeAdded) { help.Add(a); } break; } } } if (mod.GetPosition() == ModificationPosition.anyNterm) { toBeAdded.Clear(); foreach (ModifiedPeptide w in help) { if (w.modifications.GetNTermModification() == ushort.MaxValue) { ModifiedPeptide q = w.Clone(); q.mass += mod.DeltaMass; q.modifications.SetNTermModification(mod.Index); toBeAdded.Add(q); } } foreach (ModifiedPeptide a in toBeAdded) { help.Add(a); } } if (mod.GetPosition() == ModificationPosition.anyCterm) { toBeAdded.Clear(); foreach (ModifiedPeptide w in help) { if (w.modifications.GetCTermModification() == ushort.MaxValue) { ModifiedPeptide q = w.Clone(); q.mass += mod.DeltaMass; q.modifications.SetCTermModification(mod.Index); toBeAdded.Add(q); } } foreach (ModifiedPeptide a in toBeAdded) { help.Add(a); } } if (mod.GetPosition() == ModificationPosition.proteinNterm && IsNterm()) { toBeAdded.Clear(); foreach (ModifiedPeptide w in help) { if (w.modifications.GetNTermModification() == ushort.MaxValue) { ModifiedPeptide q = w.Clone(); q.mass += mod.DeltaMass; q.modifications.SetNTermModification(mod.Index); toBeAdded.Add(q); } } foreach (ModifiedPeptide a in toBeAdded) { help.Add(a); } } if (mod.GetPosition() == ModificationPosition.proteinCterm && IsCterm(proteinSet)) { toBeAdded.Clear(); foreach (ModifiedPeptide w in help) { if (w.modifications.GetCTermModification() == ushort.MaxValue) { ModifiedPeptide q = w.Clone(); q.mass += mod.DeltaMass; q.modifications.SetCTermModification(mod.Index); toBeAdded.Add(q); } } foreach (ModifiedPeptide a in toBeAdded) { help.Add(a); } } return(help.ToArray()); }
public void ProcessPeptides(int fileIndex, Dictionary <string, int> proteinIdToGroupIndex, IPeakList peakList, MsmsData msmsData, IIdentifiedPeptide[] identifiedPeptides, string[] peptideSequences, ReQuantitationResult reQuantitationResult, bool reQuantify, HashSet <string> labelModificationSet, IProteinSet proteinSet, SilacType silacType, SilacLabel[] labels1, SilacLabel[] labels2, double ms2Tol, string ms2TolUnit, int topx, string[] fixedMods) { if (peptides == null) { Read(); } double[] monoIsoMz = peakList.MS2MonoisotopicMz; for (int i = 0; i < peptides.Length; i++) { MascotPeptide[] p = peptides[i]; int scanNumber = scanNumbers[i]; int ms2ind = peakList.GetMs2IndexFromScanNumber(scanNumber); double mz = peakList.GetMs2Mz(ms2ind); double monotopicMz = monoIsoMz[ms2ind]; double time = peakList.GetMs2Rt(ms2ind); int silacId = -1; int isotopeId = -1; int silacIndex = -1; SilacCluster silacCluster = null; IsotopeCluster isotopeCluster = null; if (type == MascotQueryType.Silac) { int[] silacInfo = peakList.GetSilacInfoForMsmsScanNumber(scanNumber); silacId = silacInfo[0]; silacIndex = silacInfo[1]; silacCluster = peakList.GetSilacCluster(silacId); } else if (type == MascotQueryType.Isotope) { isotopeId = peakList.GetIsotopeIndexForMsmsScanNumber(scanNumber); isotopeCluster = peakList.GetIsotopeCluster(isotopeId); } int index = Array.BinarySearch(peptideSequences, p[0].Sequence); if (index < 0) { continue; } HashSet <int> tmpGroupInds = new HashSet <int>(); foreach (int pi in p[0].ProteinIndex) { string protId = proteinSet.GetName(pi); if (!proteinIdToGroupIndex.ContainsKey(protId)) { continue; } int groupInd = proteinIdToGroupIndex[protId]; if (!tmpGroupInds.Contains(groupInd)) { tmpGroupInds.Add(groupInd); } } double[] specMasses; float[] specIntensities; bool uniqueProtein = (p[0].ProteinIndex.Length == 1); bool uniqueGroup = (tmpGroupInds.Count == 1); msmsData.GetSpectrumFromScanNumber(scanNumber, out specMasses, out specIntensities); identifiedPeptides[index].AddMascotPeptideHit(p, scanNumber, fileIndex, type, silacId, silacIndex, isotopeId, silacCluster, isotopeCluster, time, peakList, mz, monotopicMz, fixedModifications[i], specMasses, specIntensities, reQuantitationResult, reQuantify, labelModificationSet, silacType, labels1, labels2, ms2Tol, ms2TolUnit, topx, fixedMods); identifiedPeptides[index].UniqueProtein = uniqueProtein; identifiedPeptides[index].UniqueGroup = uniqueGroup; } }
public static void CalcFdr(string[] rawFiles, string decoyPrefix, MascotQueryType type, IProteinSet proteinSet, bool debug, HashSet <string> labelModificationSet, IIdentificationProvider ip) { CalcFdr(rawFiles, decoyPrefix, type, proteinSet, debug, ip); }
private static void CalcFdr(string[] rawFiles, string decoyPrefix, MascotQueryType type, IProteinSet proteinSet, bool debug, IIdentificationProvider ip) { List <bool> correct = new List <bool>(); List <double> scores = new List <double>(); List <double> seqLen = new List <double>(); for (int i = 0; i < rawFiles.Length; i++) { Identifications ident = ip.GetIdentifications(rawFiles[i], type); int n = ident.Count; for (int j = 0; j < n; j++) { bool c = ident.IsHighestScoringCorrect(j, decoyPrefix, proteinSet); double s = ident.GetHighestAltScore(j); double l = Math.Log(ident.GetBestSequence(j).Length); if (!double.IsNaN(s) && !double.IsInfinity(s)) { correct.Add(c); scores.Add(s); seqLen.Add(l); } } ip.Dispose(); if (correct.Count > 10000000) { break; } } if (correct.Count == 0) { return; } bool write = debug && (type == MascotQueryType.Silac); BayesianInversion2D bi = new BayesianInversion2D(scores.ToArray(), seqLen.ToArray(), correct.ToArray(), write); if (write) { Write(rawFiles, bi); } for (int i = 0; i < rawFiles.Length; i++) { SetPep(ip.GetIdentifications(rawFiles[i], type), bi); ip.Dispose(); } }
public void FillProteinToPepTable(Dictionary <string, HashSet <string> > protIdToPepSeqs, IProteinSet proteinSet) { if (peptides == null) { Read(); } for (int i = 0; i < peptides.Length; i++) { MascotPeptide p = peptides[i][0]; int[] proteinIndex = p.ProteinIndex; if (!T07SearchEngineEnhancement.ValidPeptide(p.Sequence)) { continue; } foreach (int pi in proteinIndex) { string protId = proteinSet.GetName(pi); if (!protIdToPepSeqs.ContainsKey(protId)) { protIdToPepSeqs.Add(protId, new HashSet <string>()); } string key = p.Sequence; if (!protIdToPepSeqs[protId].Contains(key)) { protIdToPepSeqs[protId].Add(key); } } } }