Пример #1
0
        /// <summary>
        /// This function takes peptide and protein groups and 
        /// assembles them together. A peptide group contains
        /// all peptide identifications that mapped to same 
        /// protein and a protein group contains all proteins that
        /// mapped to same peptides. The function generate clusters 
        /// and assign the proteins that are connected to the same 
        /// peptides to a single cluster. The proteins that map to
        /// a subset of the peptides in the generated cluster are 
        /// also assigned to the same cluster in a recursive fashion.
        /// </summary>
        public void assembleClusters()
        {
            clusters.Clear();

            if (statusOutput == null)
            {
                // Get each protein
                foreach (ProteinGroupInfo proGroup in proteinGroups)
                {
                    foreach (ResultInfo r in proGroup.results)
                    {
                        // Assign each peptide to corresponding group
                        proGroup.peptideGroups.Add(r.peptideGroup);
                        // Set the peptide to protein group mapping
                        r.peptideGroup.proteinGroups.Add(proGroup);
                    }
                }
            }
            else
            {
                // Do the same as above while reporting the status to the user
                ProteinGroupList.Enumerator itr = proteinGroups.GetEnumerator();
                for (int i = 0; i < proteinGroups.Count; ++i)
                {
                    itr.MoveNext();
                    ProteinGroupInfo proGroup = itr.Current;
                    IEnumerator<ResultInfo> itr2 = proGroup.results.GetEnumerator();
                    for (int j = 0; j < proGroup.results.Count; ++j)
                    {
                        itr2.MoveNext();
                        ResultInfo r = itr2.Current;
                        reportStatus("linking peptide group " + (j + 1) + " of " + proGroup.results.Count + " to protein group " + (i + 1) + " of " + proteinGroups.Count, j + 1 == proGroup.results.Count);
                        proGroup.peptideGroups.Add(r.peptideGroup);
                        r.peptideGroup.proteinGroups.Add(proGroup);
                    }
                }
            }

            // Set the protein and peptide group cluster identifications to 0.
            foreach (ProteinGroupInfo proGroup in proteinGroups)
            {
                proGroup.cluster = 0;
            }
            foreach (PeptideGroupInfo pepGroup in peptideGroups)
            {
                pepGroup.cluster = 0;
            }

            // For each protein group
            foreach (ProteinGroupInfo proGroup in proteinGroups)
            {
                if (proGroup.cluster == 0)
                {
                    // Generate a new cluster and assign an incremental ID.
                    ClusterInfo c = new ClusterInfo();
                    c.id = clusters.Count + 1;
                    // Add the protein to the cluster. Also add proteins that
                    // matched to the peptides of the given protein to the 
                    // cluster
                    recursivelyAssignProteinGroupsToCluster(proGroup, c);
                    clusters.Add(c);
                }
            }

            // Sort the clusters
            clusters.Sort(ClusterList.SortDescendingBySequencesThenSpectra);

            // Assign cluster idenfication numbers. These numbers signify
            // how many proteins in a cluster share peptides, and also 
            // how many protein are identified per peptide cluster.
            for (int i = 0; i < clusters.Count; ++i)
            {
                clusters[i].id = i + 1;
                foreach (ProteinGroupInfo proGroup in clusters[i].proteinGroups)
                    proGroup.cluster = i + 1;
                foreach (PeptideGroupInfo pepGroup in clusters[i].peptideGroups)
                    pepGroup.cluster = i + 1;
            }
        }
Пример #2
0
        public void assembleMinimumCoveringSet(ClusterInfo c)
        {
            if (c.proteinGroups.Count == 1) // degenerate case
            {
                foreach (ProteinGroupInfo proGroup in c.proteinGroups)
                    proGroup.uniquePeptideCount = int.MaxValue; // value is n/a
                return;
            }

            /*Set<ResultInfo> clusterResults = new Set<ResultInfo>( c.results );
            ProteinGroupList clusterGroups = new ProteinGroupList();
            foreach( ProteinGroupInfo proGroup in c.proteinGroups )
                clusterGroups.Add( proGroup );
            //Console.WriteLine(); 
            while( clusterResults.Count > 0 )
            {
                List<ProteinGroupInfo> minRemainingResults = new List<ProteinGroupInfo>();
                int minRemainingResultCount = clusterResults.Count;
                //int n = 0;
                //Console.WriteLine( "groups: " + clusterGroups.Count + "; results: " + clusterResults.Count );
                foreach( ProteinGroupInfo proGroup in clusterGroups )
                {
                    //Console.Write( n++ + " of " + clusterGroups.Count + "\r" );
                    int count = clusterResults.Count;
                    foreach( ResultInfo r in proGroup.results )
                        if( clusterResults.Contains( r ) )
                            --count;
                    if( count <= minRemainingResultCount )
                    {
                        if( count < minRemainingResultCount )
                            minRemainingResults.Clear();
                        minRemainingResults.Add( proGroup );
                    }
                }

                ProteinGroupInfo mostGreedyGroup = minRemainingResults[0];
                minRemainingResults.Clear();
                int oldCount = clusterResults.Count;
                clusterResults.Subtract( mostGreedyGroup.results );
                if( clusterResults.Count >= oldCount )
                {
                    Console.Error.WriteLine( "Something has gone terribly wrong!" );
                    System.Diagnostics.Process.GetCurrentProcess().Kill();
                }

                mostGreedyGroup.minSet = true;
                clusterGroups.Remove( mostGreedyGroup );
            }*/

            // Get the results in the cluster
            Set<ResultInfo> clusterResults = new Set<ResultInfo>(c.results);
            // Get the protein groups in the cluster
            ProteinGroupList clusterGroups = new ProteinGroupList();
            foreach (ProteinGroupInfo proGroup in c.proteinGroups)
                clusterGroups.Add(proGroup);
            //Console.WriteLine(); 
            // while there are results in the cluster
            while (clusterResults.Count > 0)
            {
                // Maps the number of results to a protein group
                Map<int, List<ProteinGroupInfo>> remainingResults = new Map<int, List<ProteinGroupInfo>>();
                //int n = 0;
                //Console.WriteLine( "groups: " + clusterGroups.Count + "; results: " + clusterResults.Count );
                // Iterate through protein groups
                foreach (ProteinGroupInfo proGroup in clusterGroups)
                {
                    //Console.Write( n++ + " of " + clusterGroups.Count + "\n" );
                    // Get the number of results in the cluster
                    int count = clusterResults.Count;
                    // Iterate over the cluster results and see how 
                    // many cluster group results can be explained
                    // by that protein group
                    foreach (ResultInfo r in proGroup.results)
                    {
                        if (clusterResults.Contains(r))
                            --count;
                    }
                    // Map the number of remaining results to that
                    // protein group
                    remainingResults[count].Add(proGroup);
                }

                // Take the first protein group that can explain the most results
                ProteinGroupInfo mostGreedyGroup = remainingResults.Values[0][0];
                // Subtract its results from the cluster results
                mostGreedyGroup.uniquePeptideCount = clusterResults.Count - remainingResults.Keys[0];
                clusterResults.Subtract(mostGreedyGroup.results);
                // Remove the most greedy group from the cluster groups
                clusterGroups.Remove(mostGreedyGroup);
            }
        }
Пример #3
0
        /// <summary>
        /// This function takes a protein group, assigns all the peptides and results to the supplied
        /// cluster. It then takes all the peptides and the results in the cluster, traces all the 
        /// proteins mapped to those peptides and the resutls and adds them to the same cluster.
        /// </summary>
        /// <param name="proGroup">A <see cref="IDPicker.ProteinGroupInfo"/> object containing
        /// protein identification results</param>
        /// <param name="c">A <see cref="IDPicker.ClusterInfo"/> object</param>
        /// 
        private void recursivelyAssignProteinGroupsToCluster(ProteinGroupInfo proGroup, ClusterInfo c)
        {
            reportStatus("assigning protein group " + proGroup.id + " to cluster " + c.id, true);

            if (proGroup.cluster > 0)
            {
                if (proGroup.cluster != c.id)
                    throw new InvalidDataException("protein groups that are connected are assigned to different clusters");

                return;
            }

            // Add the protein group to the cluster
            proGroup.cluster = c.id;
            c.proteinGroups.Add(proGroup);

            // For each protein in the group
            foreach (ProteinList.MapPair proItr in proGroup.proteins)
            {
                c.proteins[proItr.Value.locus] = proItr.Value;
                // Get the results and assign them to the cluster.
                // Also assign the corresponding peptides to the 
                // same cluster.
                foreach (ResultInfo r in proItr.Value.results)
                {
                    c.results.Add(r);
                    c.peptideGroups.Add(r.peptideGroup);
                    r.peptideGroup.cluster = c.id;
                }
            }

            // recursively add all "cousin" protein groups to the same cluster
            foreach (ResultInfo r in proGroup.results)
                foreach (ProteinGroupInfo cousinProGroup in r.peptideGroup.proteinGroups)
                {
                    if (!ReferenceEquals(cousinProGroup, proGroup) && cousinProGroup.cluster == 0)
                        recursivelyAssignProteinGroupsToCluster(cousinProGroup, c);
                    else if (cousinProGroup.cluster != c.id)
                        throw new InvalidDataException("protein groups that are connected are assigned to different clusters");
                }
        }