Ejemplo n.º 1
0
        public void Correct(string prefix, IProteinSet proteinSet)
        {
            int count = 0;

            for (int i = 0; i < proteinIndex.Length; i++)
            {
                if (T07SearchEngineEnhancement.IsReverseProtein(proteinSet.GetName(proteinIndex[i]), prefix))
                {
                    count++;
                }
            }
            if (count == 0 || count == proteinIndex.Length)
            {
                return;
            }
            List <int> result = new List <int>();

            for (int i = 0; i < proteinIndex.Length; i++)
            {
                if (!proteinSet.GetName(proteinIndex[i]).StartsWith(prefix))
                {
                    result.Add(proteinIndex[i]);
                }
            }
            proteinIndex = result.ToArray();
        }
Ejemplo n.º 2
0
        public void ApplyFixedModifications(Modification[] modifications, IProteinSet proteinSet, string sequence)
        {
            PeptideModificationCounts modCounts;

            monoisotopicMass +=
                ApplyFixedModifications(modifications, sequence, IsNterm(), IsCterm(proteinSet), length, out modCounts);
            fixedModifications = modCounts;
        }
Ejemplo n.º 3
0
 public string[] GetProteinIds(IProteinSet proteinSet)
 {
     string[] result = new string[proteinIndex.Length];
     for (int i = 0; i < result.Length; i++)
     {
         result[i] = proteinSet.GetName(proteinIndex[i]);
     }
     return(result);
 }
Ejemplo n.º 4
0
        public char GetResidueBefore(int index, IProteinSet proteinSet)
        {
            int pos = proteinOffsets[index] - 1;

            if (pos < 0)
            {
                return(' ');
            }
            return(proteinSet.Get(proteinIndices[index]).Sequence[pos]);
        }
Ejemplo n.º 5
0
        public static void FdrThresholding(string[] rawFiles, string decoyPrefix, double peptideFdr, double peptidePep,
                                           MascotQueryType type, bool perFileThreshold, bool debug, HashSet <string> sequences,
                                           bool onlyCollectSequences, IProteinSet proteinSet,
                                           HashSet <string> labelModificationSet, IIdentificationProvider ip)
        {
            CalcFdr(rawFiles, decoyPrefix, type, proteinSet, debug, labelModificationSet, ip);
            double pepThreshVal;

            double[] pepThreshVals = new double[rawFiles.Length];
            if (peptideFdr < 1)
            {
                for (int i = 0; i < rawFiles.Length; i++)
                {
                    pepThreshVals[i] = FindReverseHitThresholdValue(ip.GetIdentifications(rawFiles[i], type), peptideFdr,
                                                                    decoyPrefix, proteinSet);
                    ip.Dispose();
                    if (peptidePep < 1)
                    {
                        pepThreshVals[i] = Math.Min(pepThreshVals[i], peptidePep);
                    }
                }
                pepThreshVal = ArrayUtil.Median(pepThreshVals);
                if (peptidePep < 1)
                {
                    pepThreshVal = Math.Min(pepThreshVal, peptidePep);
                }
            }
            else
            {
                for (int i = 0; i < rawFiles.Length; i++)
                {
                    pepThreshVals[i] = peptidePep;
                }
                pepThreshVal = peptidePep;
            }
            if (onlyCollectSequences)
            {
                for (int i = 0; i < rawFiles.Length; i++)
                {
                    CollectSequences(ip.GetIdentifications(rawFiles[i], type),
                                     perFileThreshold ? pepThreshVals[i] : pepThreshVal, sequences);
                    ip.Dispose();
                }
            }
            else
            {
                for (int i = 0; i < rawFiles.Length; i++)
                {
                    ApplyReverseHitThreshold(ip.GetIdentifications(rawFiles[i], type),
                                             perFileThreshold ? pepThreshVals[i] : pepThreshVal);
                    ip.Dispose();
                }
            }
        }
Ejemplo n.º 6
0
 public bool HasOnlyReverseHits(string reverseStr, IProteinSet proteinSet)
 {
     for (int i = 0; i < proteinIndex.Length; i++)
     {
         if (!T07SearchEngineEnhancement.IsReverseProtein(proteinSet.GetName(proteinIndex[i]), reverseStr))
         {
             return(false);
         }
     }
     return(true);
 }
Ejemplo n.º 7
0
        public char GetResidueAfter(int index, IProteinSet proteinSet)
        {
            int    pos = proteinOffsets[index] + length;
            string seq = proteinSet.Get(proteinIndices[index]).Sequence;

            if (pos >= seq.Length)
            {
                return(' ');
            }
            return(seq[pos]);
        }
Ejemplo n.º 8
0
 private bool IsCterm(IProteinSet proteinSet)
 {
     for (int i = 0; i < ProteinCount; i++)
     {
         int pos = proteinOffsets[i] + length;
         int len = proteinSet.GetLength(proteinIndices[i]);
         if (pos >= len)
         {
             return(true);
         }
     }
     return(false);
 }
Ejemplo n.º 9
0
        public DatabaseModifiedPeptide[] ApplyVariableModifications(Modification[] modifications, Modification[][] lMods,
                                                                    int index, IProteinSet proteinSet)
        {
            string sequence = GetSequence(proteinSet);

            ModifiedPeptide[] result = new ModifiedPeptide[] { CreateNonmodifiedVersion(index, sequence) };
            result = ApplyLabelModifications(result, lMods, sequence);
            for (int i = 0; i < modifications.Length; i++)
            {
                result = ApplyVariableModification(result, modifications[i], sequence, proteinSet);
            }
            result = FilterEqualMods(result);
            return(ConvertToDatabasePeptides(result));
        }
Ejemplo n.º 10
0
        public int GetOccurenceCount(string residues, IProteinSet proteinSet)
        {
            int    count = 0;
            string s     = GetSequence(proteinSet);

            for (int i = 0; i < s.Length; i++)
            {
                if (residues.IndexOf(s[i]) != -1)
                {
                    count++;
                }
            }
            return(count);
        }
Ejemplo n.º 11
0
        private ModifiedPeptide[] ApplyVariableModification(IEnumerable <ModifiedPeptide> peptides, Modification mod,
                                                            string sequence, IProteinSet proteinSet)
        {
            List <ModifiedPeptide> result = new List <ModifiedPeptide>();

            foreach (ModifiedPeptide p in peptides)
            {
                ModifiedPeptide[] x = ApplyVariableModification(p, mod, sequence, proteinSet);
                foreach (ModifiedPeptide y in x)
                {
                    result.Add(y);
                }
            }
            //if (result.Count > 500000) {
            //    return FilterEqualMods(result.ToArray());
            //} else {
            return(result.ToArray());
            //}
        }
Ejemplo n.º 12
0
        public static void FdrThresholding(string[] rawFiles, string[] recalFiles, string[] nonRecalFiles, string revstring,
                                           double peptideFdr, double peptidePep, bool perFileThreshold, bool keepLowScorers,
                                           IProteinSet proteinSet, HashSet <string> labelModificationSet, IIdentificationProvider ip, bool writeOut)
        {
            HashSet <string> sequences = new HashSet <string>();

            if (recalFiles.Length > 0)
            {
                FdrThresholding(recalFiles, revstring, peptideFdr, peptidePep, MascotQueryType.Silac,
                                perFileThreshold, writeOut, sequences, keepLowScorers, proteinSet,
                                labelModificationSet, ip);
                FdrThresholding(recalFiles, revstring, peptideFdr, peptidePep, MascotQueryType.Isotope,
                                perFileThreshold, false, sequences, keepLowScorers, proteinSet,
                                labelModificationSet, ip);
            }
            if (nonRecalFiles.Length > 0)
            {
                FdrThresholding(nonRecalFiles, revstring, peptideFdr, peptidePep, MascotQueryType.Silac,
                                perFileThreshold, false, sequences, keepLowScorers, proteinSet,
                                labelModificationSet, ip);
                FdrThresholding(nonRecalFiles, revstring, peptideFdr, peptidePep, MascotQueryType.Isotope,
                                perFileThreshold, false, sequences, keepLowScorers, proteinSet,
                                labelModificationSet, ip);
            }
            FdrThresholding(rawFiles, revstring, peptideFdr, peptidePep, MascotQueryType.Peak,
                            perFileThreshold,
                            false, sequences, keepLowScorers, proteinSet,
                            labelModificationSet, ip);
            if (keepLowScorers)
            {
                FilterBySequence(rawFiles, revstring, peptideFdr, peptidePep, MascotQueryType.Silac,
                                 perFileThreshold, writeOut, sequences, keepLowScorers, ip);
                FilterBySequence(rawFiles, revstring, peptideFdr, peptidePep, MascotQueryType.Isotope,
                                 perFileThreshold, false, sequences, keepLowScorers, ip);
                FilterBySequence(rawFiles, revstring, peptideFdr, peptidePep, MascotQueryType.Peak,
                                 perFileThreshold,
                                 false, sequences, keepLowScorers, ip);
            }
            LimitPep(rawFiles, MascotQueryType.Silac, ip);
            LimitPep(rawFiles, MascotQueryType.Isotope, ip);
            LimitPep(rawFiles, MascotQueryType.Peak, ip);
        }
Ejemplo n.º 13
0
        public static double FindReverseHitThresholdValue(Identifications identifications, double totalPeptideFDR,
                                                          string reverseStr, IProteinSet proteinSet)
        {
            int n = identifications.Count;

            if (n == 0)
            {
                return(1);
            }
            double[] peps    = new double[n];
            bool[]   correct = new bool[n];
            for (int i = 0; i < n; i++)
            {
                MascotPeptide p = identifications.GetPeptidesAt(i)[0];
                peps[i]    = p.Pep;
                correct[i] = !p.HasOnlyReverseHits(reverseStr, proteinSet);
            }
            int[]         o            = ArrayUtil.Order(peps);
            double        forwardCount = 0;
            List <double> validPeps    = new List <double>();

            for (int i = 0; i < n; i++)
            {
                int index = o[i];
                if (correct[index])
                {
                    forwardCount++;
                }
                double reverseCount = (i + 1) - forwardCount;
                if (reverseCount / forwardCount <= totalPeptideFDR)
                {
                    validPeps.Add(peps[index]);
                }
            }
            if (validPeps.Count > 0)
            {
                return(ArrayUtil.Max(validPeps.ToArray()));
            }
            return(0);
        }
Ejemplo n.º 14
0
        public string GetSequence(IProteinSet proteinSet)
        {
            string seq = proteinSet.Get(proteinIndices[0]).Sequence;

            return(seq.Substring(proteinOffsets[0], length));
        }
Ejemplo n.º 15
0
        public bool IsHighestScoringCorrect(int index, string revstring, IProteinSet proteinSet)
        {
            MascotPeptide p = GetPeptidesAt(index)[0];

            return(!p.HasOnlyReverseHits(revstring, proteinSet));
        }
Ejemplo n.º 16
0
        private ModifiedPeptide[] ApplyVariableModification(ModifiedPeptide peptide, Modification mod, string s, IProteinSet proteinSet)
        {
            help.Clear();
            help.Add(peptide);
            for (int i = 0; i < mod.AaCount; i++)
            {
                switch (mod.GetTermType(i))
                {
                case ModificationSiteType.aa: {
                    toBeAdded.Clear();
                    foreach (ModifiedPeptide w in help)
                    {
                        List <int> indices = new List <int>();
                        for (int j = 0; j < s.Length; j++)
                        {
                            if (s[j] == mod.GetAaAt(i) && w.modifications.GetModificationAt(j) == ushort.MaxValue)
                            {
                                indices.Add(j);
                            }
                        }
                        for (int j = 1; j <= indices.Count; j++)
                        {
                            ModifiedPeptide q = w.Clone();
                            q.mass += j * mod.DeltaMass;
                            for (int k = 0; k < j; k++)
                            {
                                q.modifications.SetModificationAt(indices[k], mod.Index);
                            }
                            toBeAdded.Add(q);
                        }
                    }
                    foreach (ModifiedPeptide a in toBeAdded)
                    {
                        help.Add(a);
                    }
                    break;
                }

                case ModificationSiteType.nterm: {
                    toBeAdded.Clear();
                    foreach (ModifiedPeptide w in help)
                    {
                        if (s[0] == mod.GetAaAt(i) && w.modifications.GetNTermModification() == ushort.MaxValue)
                        {
                            ModifiedPeptide q = w.Clone();
                            q.mass += mod.DeltaMass;
                            q.modifications.SetNTermModification(mod.Index);
                            toBeAdded.Add(q);
                        }
                    }
                    foreach (ModifiedPeptide a in toBeAdded)
                    {
                        help.Add(a);
                    }
                    break;
                }

                case ModificationSiteType.cterm: {
                    toBeAdded.Clear();
                    foreach (ModifiedPeptide w in help)
                    {
                        if (s[s.Length - 1] == mod.GetAaAt(i) && w.modifications.GetCTermModification() == ushort.MaxValue)
                        {
                            ModifiedPeptide q = w.Clone();
                            q.mass += mod.DeltaMass;
                            q.modifications.SetCTermModification(mod.Index);
                            toBeAdded.Add(q);
                        }
                    }
                    foreach (ModifiedPeptide a in toBeAdded)
                    {
                        help.Add(a);
                    }
                    break;
                }
                }
            }
            if (mod.GetPosition() == ModificationPosition.anyNterm)
            {
                toBeAdded.Clear();
                foreach (ModifiedPeptide w in help)
                {
                    if (w.modifications.GetNTermModification() == ushort.MaxValue)
                    {
                        ModifiedPeptide q = w.Clone();
                        q.mass += mod.DeltaMass;
                        q.modifications.SetNTermModification(mod.Index);
                        toBeAdded.Add(q);
                    }
                }
                foreach (ModifiedPeptide a in toBeAdded)
                {
                    help.Add(a);
                }
            }
            if (mod.GetPosition() == ModificationPosition.anyCterm)
            {
                toBeAdded.Clear();
                foreach (ModifiedPeptide w in help)
                {
                    if (w.modifications.GetCTermModification() == ushort.MaxValue)
                    {
                        ModifiedPeptide q = w.Clone();
                        q.mass += mod.DeltaMass;
                        q.modifications.SetCTermModification(mod.Index);
                        toBeAdded.Add(q);
                    }
                }
                foreach (ModifiedPeptide a in toBeAdded)
                {
                    help.Add(a);
                }
            }
            if (mod.GetPosition() == ModificationPosition.proteinNterm && IsNterm())
            {
                toBeAdded.Clear();
                foreach (ModifiedPeptide w in help)
                {
                    if (w.modifications.GetNTermModification() == ushort.MaxValue)
                    {
                        ModifiedPeptide q = w.Clone();
                        q.mass += mod.DeltaMass;
                        q.modifications.SetNTermModification(mod.Index);
                        toBeAdded.Add(q);
                    }
                }
                foreach (ModifiedPeptide a in toBeAdded)
                {
                    help.Add(a);
                }
            }
            if (mod.GetPosition() == ModificationPosition.proteinCterm && IsCterm(proteinSet))
            {
                toBeAdded.Clear();
                foreach (ModifiedPeptide w in help)
                {
                    if (w.modifications.GetCTermModification() == ushort.MaxValue)
                    {
                        ModifiedPeptide q = w.Clone();
                        q.mass += mod.DeltaMass;
                        q.modifications.SetCTermModification(mod.Index);
                        toBeAdded.Add(q);
                    }
                }
                foreach (ModifiedPeptide a in toBeAdded)
                {
                    help.Add(a);
                }
            }
            return(help.ToArray());
        }
Ejemplo n.º 17
0
 public void ProcessPeptides(int fileIndex, Dictionary <string, int> proteinIdToGroupIndex, IPeakList peakList,
                             MsmsData msmsData, IIdentifiedPeptide[] identifiedPeptides, string[] peptideSequences,
                             ReQuantitationResult reQuantitationResult, bool reQuantify,
                             HashSet <string> labelModificationSet, IProteinSet proteinSet,
                             SilacType silacType, SilacLabel[] labels1, SilacLabel[] labels2, double ms2Tol,
                             string ms2TolUnit, int topx, string[] fixedMods)
 {
     if (peptides == null)
     {
         Read();
     }
     double[] monoIsoMz = peakList.MS2MonoisotopicMz;
     for (int i = 0; i < peptides.Length; i++)
     {
         MascotPeptide[] p              = peptides[i];
         int             scanNumber     = scanNumbers[i];
         int             ms2ind         = peakList.GetMs2IndexFromScanNumber(scanNumber);
         double          mz             = peakList.GetMs2Mz(ms2ind);
         double          monotopicMz    = monoIsoMz[ms2ind];
         double          time           = peakList.GetMs2Rt(ms2ind);
         int             silacId        = -1;
         int             isotopeId      = -1;
         int             silacIndex     = -1;
         SilacCluster    silacCluster   = null;
         IsotopeCluster  isotopeCluster = null;
         if (type == MascotQueryType.Silac)
         {
             int[] silacInfo = peakList.GetSilacInfoForMsmsScanNumber(scanNumber);
             silacId      = silacInfo[0];
             silacIndex   = silacInfo[1];
             silacCluster = peakList.GetSilacCluster(silacId);
         }
         else if (type == MascotQueryType.Isotope)
         {
             isotopeId      = peakList.GetIsotopeIndexForMsmsScanNumber(scanNumber);
             isotopeCluster = peakList.GetIsotopeCluster(isotopeId);
         }
         int index = Array.BinarySearch(peptideSequences, p[0].Sequence);
         if (index < 0)
         {
             continue;
         }
         HashSet <int> tmpGroupInds = new HashSet <int>();
         foreach (int pi in p[0].ProteinIndex)
         {
             string protId = proteinSet.GetName(pi);
             if (!proteinIdToGroupIndex.ContainsKey(protId))
             {
                 continue;
             }
             int groupInd = proteinIdToGroupIndex[protId];
             if (!tmpGroupInds.Contains(groupInd))
             {
                 tmpGroupInds.Add(groupInd);
             }
         }
         double[] specMasses;
         float[]  specIntensities;
         bool     uniqueProtein = (p[0].ProteinIndex.Length == 1);
         bool     uniqueGroup   = (tmpGroupInds.Count == 1);
         msmsData.GetSpectrumFromScanNumber(scanNumber, out specMasses, out specIntensities);
         identifiedPeptides[index].AddMascotPeptideHit(p, scanNumber, fileIndex, type, silacId, silacIndex, isotopeId,
                                                       silacCluster, isotopeCluster, time, peakList, mz, monotopicMz,
                                                       fixedModifications[i], specMasses, specIntensities,
                                                       reQuantitationResult, reQuantify, labelModificationSet, silacType,
                                                       labels1, labels2, ms2Tol, ms2TolUnit, topx, fixedMods);
         identifiedPeptides[index].UniqueProtein = uniqueProtein;
         identifiedPeptides[index].UniqueGroup   = uniqueGroup;
     }
 }
Ejemplo n.º 18
0
 public static void CalcFdr(string[] rawFiles, string decoyPrefix, MascotQueryType type, IProteinSet proteinSet, bool debug,
                            HashSet <string> labelModificationSet, IIdentificationProvider ip)
 {
     CalcFdr(rawFiles, decoyPrefix, type, proteinSet, debug, ip);
 }
Ejemplo n.º 19
0
        private static void CalcFdr(string[] rawFiles, string decoyPrefix, MascotQueryType type, IProteinSet proteinSet, bool debug,
                                    IIdentificationProvider ip)
        {
            List <bool>   correct = new List <bool>();
            List <double> scores  = new List <double>();
            List <double> seqLen  = new List <double>();

            for (int i = 0; i < rawFiles.Length; i++)
            {
                Identifications ident = ip.GetIdentifications(rawFiles[i], type);
                int             n     = ident.Count;
                for (int j = 0; j < n; j++)
                {
                    bool   c = ident.IsHighestScoringCorrect(j, decoyPrefix, proteinSet);
                    double s = ident.GetHighestAltScore(j);
                    double l = Math.Log(ident.GetBestSequence(j).Length);
                    if (!double.IsNaN(s) && !double.IsInfinity(s))
                    {
                        correct.Add(c);
                        scores.Add(s);
                        seqLen.Add(l);
                    }
                }
                ip.Dispose();
                if (correct.Count > 10000000)
                {
                    break;
                }
            }
            if (correct.Count == 0)
            {
                return;
            }
            bool write             = debug && (type == MascotQueryType.Silac);
            BayesianInversion2D bi = new BayesianInversion2D(scores.ToArray(), seqLen.ToArray(), correct.ToArray(), write);

            if (write)
            {
                Write(rawFiles, bi);
            }
            for (int i = 0; i < rawFiles.Length; i++)
            {
                SetPep(ip.GetIdentifications(rawFiles[i], type), bi);
                ip.Dispose();
            }
        }
Ejemplo n.º 20
0
 public void FillProteinToPepTable(Dictionary <string, HashSet <string> > protIdToPepSeqs, IProteinSet proteinSet)
 {
     if (peptides == null)
     {
         Read();
     }
     for (int i = 0; i < peptides.Length; i++)
     {
         MascotPeptide p            = peptides[i][0];
         int[]         proteinIndex = p.ProteinIndex;
         if (!T07SearchEngineEnhancement.ValidPeptide(p.Sequence))
         {
             continue;
         }
         foreach (int pi in proteinIndex)
         {
             string protId = proteinSet.GetName(pi);
             if (!protIdToPepSeqs.ContainsKey(protId))
             {
                 protIdToPepSeqs.Add(protId, new HashSet <string>());
             }
             string key = p.Sequence;
             if (!protIdToPepSeqs[protId].Contains(key))
             {
                 protIdToPepSeqs[protId].Add(key);
             }
         }
     }
 }