Exemplo n.º 1
0
        private void WriteClusters(List<DatasetInformation> datasets, List<UMCClusterLight> clusters,
            int minimumClusterSize, int charge, string crossPath, string databasePath, int minDatabase)
        {
            Console.WriteLine("Mapping cluster ids");
            // here we map the clusters to a dictionary for quick lookup
            var clusterMap = new Dictionary<int, ClusterUltraLight>();

            var clustersCut = new Dictionary<int, int>();
            for (var i = 0; i < minimumClusterSize; i++)
            {
                clustersCut.Add(i, 0);
            }

            // Map the clusters so we can add features.
            foreach (var cluster in clusters)
            {
                // Only keep clusters of a given size to cut down on files.
                if (cluster.MemberCount < minimumClusterSize)
                {
                    clustersCut[cluster.MemberCount]++;
                    continue;
                }

                var id = cluster.Id;
                if (!clusterMap.ContainsKey(id))
                {
                    var lightCluster = new ClusterUltraLight();
                    lightCluster.Ambiguity = cluster.AmbiguityScore;
                    lightCluster.DatasetCount = cluster.DatasetMemberCount;
                    lightCluster.MemberCount = cluster.MemberCount;
                    lightCluster.Drift = cluster.DriftTime;
                    lightCluster.Id = cluster.Id;
                    lightCluster.Mass = cluster.MassMonoisotopic;
                    lightCluster.Net = cluster.Net;
                    lightCluster.Tightness = cluster.Tightness;
                    lightCluster.abundances = new Dictionary<int, long>();
                    clusterMap.Add(id, lightCluster);
                }
            }
            clusters.Clear();
            clusters = null;

            // Let the user know how many clusters we cut.
            Console.WriteLine("Clusters that were cut");
            foreach (var key in clustersCut.Keys)
            {
                Console.WriteLine("\t{0}\t{1}", key, clustersCut[key]);
            }
            GC.Collect();
            GC.WaitForPendingFinalizers();
            GC.Collect();
            GC.WaitForPendingFinalizers();

            Console.WriteLine("Find all features");
            using (var connection = new SQLiteConnection(string.Format("Data Source = {0}", databasePath)))
            {
                connection.Open();
                long features = 0;

                using (var command = connection.CreateCommand())
                {
                    command.CommandText =
                        string.Format(
                            "SELECT Cluster_ID, Dataset_ID, Abundance_Sum, Abundance_Max FROM T_LCMS_Features where Charge = {0}",
                            charge);
                    command.CommandType = CommandType.Text;

                    var data = new object[4];
                    using (var reader = command.ExecuteReader())
                    {
                        while (reader.Read())
                        {
                            features++;
                            reader.GetValues(data);

                            var id = Convert.ToInt32(data[0]);
                            var did = Convert.ToInt32(data[1]);
                            var sum = Convert.ToInt64(data[2]);
                            var max = Convert.ToInt64(data[3]);

                            if (clusterMap.ContainsKey(id))
                            {
                                try
                                {
                                    var map = clusterMap[id].abundances;
                                    if (map.ContainsKey(did))
                                    {
                                        map[did] += sum;
                                    }
                                    else
                                    {
                                        clusterMap[id].abundances.Add(did, sum);
                                    }
                                }
                                catch
                                {
                                    var x = 0;
                                    x++;
                                    if (x > 1)
                                    {
                                    }
                                }
                            }

                            if ((features%1000000) == 0)
                            {
                                Console.WriteLine("\tPurging Finished Features {0}", features);
                                GC.Collect();
                                GC.WaitForPendingFinalizers();
                                GC.Collect();
                                GC.WaitForPendingFinalizers();
                            }
                        }
                    }
                }
                connection.Close();

                GC.Collect();
                GC.WaitForPendingFinalizers();
                GC.Collect();
                GC.WaitForPendingFinalizers();
            }

            clustersCut.Clear();
            GC.Collect();
            GC.WaitForPendingFinalizers();
            GC.Collect();
            GC.WaitForPendingFinalizers();

            // Here we map the features onto the clusters

            GC.Collect();
            GC.WaitForPendingFinalizers();
            GC.Collect();
            GC.WaitForPendingFinalizers();

            Console.WriteLine("Creating cross tabs");
            var sumPath = crossPath + "-sum.csv";
            using (TextWriter writer = File.CreateText(sumPath))
            {
                var header = "Cluster ID, Total Members, Dataset Members, Tightness, Ambiguity, Mass, NET, DriftTime,";

                var builder = new StringBuilder();
                var ids = new List<int>();
                foreach (var information in datasets)
                {
                    if (information.DatasetId > minDatabase)
                    {
                        continue;
                    }
                    ids.Add(information.DatasetId);
                    builder.AppendFormat("AbundanceSum-{0},", information.DatasetId);
                }
                ids.Sort();

                header += builder.ToString();
                writer.WriteLine(header);

                builder.Clear();

                long total = clusterMap.Keys.Count;
                long totalFeatures = 0;
                var features = 0;

                foreach (var id in clusterMap.Keys)
                {
                    var cluster = clusterMap[id];

                    if (features > 100000)
                    {
                        Console.WriteLine("Written {0} of {1} clusters", totalFeatures, total);
                        features = 0;
                    }
                    totalFeatures++;
                    features ++;

                    builder.AppendFormat("{0},{1},{2},{3:.000},{4:.000},{5:.0000},{6:.0000},{7:.0000},",
                        cluster.Id,
                        cluster.MemberCount,
                        cluster.DatasetCount,
                        cluster.Tightness,
                        cluster.Ambiguity,
                        cluster.Mass,
                        cluster.Net,
                        cluster.Drift);

                    foreach (var did in ids)
                    {
                        if (did > minDatabase)
                        {
                            continue;
                        }
                        // If the cluster does not have an entry for this, then leave it
                        if (cluster.abundances.ContainsKey(did))
                        {
                            builder.AppendFormat("{0},", cluster.abundances[did]);
                        }
                        else
                        {
                            builder.AppendFormat(",");
                        }
                    }
                    writer.WriteLine(builder.ToString());

                    builder.Clear();
                }
            }
        }
Exemplo n.º 2
0
        private void WriteClusters(List <DatasetInformation> datasets, List <UMCClusterLight> clusters,
                                   int minimumClusterSize, int charge, string crossPath, string databasePath, int minDatabase)
        {
            Console.WriteLine("Mapping cluster ids");
            // here we map the clusters to a dictionary for quick lookup
            var clusterMap = new Dictionary <int, ClusterUltraLight>();

            var clustersCut = new Dictionary <int, int>();

            for (var i = 0; i < minimumClusterSize; i++)
            {
                clustersCut.Add(i, 0);
            }

            // Map the clusters so we can add features.
            foreach (var cluster in clusters)
            {
                // Only keep clusters of a given size to cut down on files.
                if (cluster.MemberCount < minimumClusterSize)
                {
                    clustersCut[cluster.MemberCount]++;
                    continue;
                }

                var id = cluster.Id;
                if (!clusterMap.ContainsKey(id))
                {
                    var lightCluster = new ClusterUltraLight();
                    lightCluster.Ambiguity    = cluster.AmbiguityScore;
                    lightCluster.DatasetCount = cluster.DatasetMemberCount;
                    lightCluster.MemberCount  = cluster.MemberCount;
                    lightCluster.Drift        = cluster.DriftTime;
                    lightCluster.Id           = cluster.Id;
                    lightCluster.Mass         = cluster.MassMonoisotopic;
                    lightCluster.Net          = cluster.Net;
                    lightCluster.Tightness    = cluster.Tightness;
                    lightCluster.abundances   = new Dictionary <int, long>();
                    clusterMap.Add(id, lightCluster);
                }
            }
            clusters.Clear();
            clusters = null;

            // Let the user know how many clusters we cut.
            Console.WriteLine("Clusters that were cut");
            foreach (var key in clustersCut.Keys)
            {
                Console.WriteLine("\t{0}\t{1}", key, clustersCut[key]);
            }
            GC.Collect();
            GC.WaitForPendingFinalizers();
            GC.Collect();
            GC.WaitForPendingFinalizers();

            Console.WriteLine("Find all features");
            using (var connection = new SQLiteConnection(string.Format("Data Source = {0}", databasePath)))
            {
                connection.Open();
                long features = 0;

                using (var command = connection.CreateCommand())
                {
                    command.CommandText =
                        string.Format(
                            "SELECT Cluster_ID, Dataset_ID, Abundance_Sum, Abundance_Max FROM T_LCMS_Features where Charge = {0}",
                            charge);
                    command.CommandType = CommandType.Text;

                    var data = new object[4];
                    using (var reader = command.ExecuteReader())
                    {
                        while (reader.Read())
                        {
                            features++;
                            reader.GetValues(data);

                            var id  = Convert.ToInt32(data[0]);
                            var did = Convert.ToInt32(data[1]);
                            var sum = Convert.ToInt64(data[2]);
                            var max = Convert.ToInt64(data[3]);

                            if (clusterMap.ContainsKey(id))
                            {
                                try
                                {
                                    var map = clusterMap[id].abundances;
                                    if (map.ContainsKey(did))
                                    {
                                        map[did] += sum;
                                    }
                                    else
                                    {
                                        clusterMap[id].abundances.Add(did, sum);
                                    }
                                }
                                catch
                                {
                                    var x = 0;
                                    x++;
                                    if (x > 1)
                                    {
                                    }
                                }
                            }

                            if ((features % 1000000) == 0)
                            {
                                Console.WriteLine("\tPurging Finished Features {0}", features);
                                GC.Collect();
                                GC.WaitForPendingFinalizers();
                                GC.Collect();
                                GC.WaitForPendingFinalizers();
                            }
                        }
                    }
                }
                connection.Close();


                GC.Collect();
                GC.WaitForPendingFinalizers();
                GC.Collect();
                GC.WaitForPendingFinalizers();
            }

            clustersCut.Clear();
            GC.Collect();
            GC.WaitForPendingFinalizers();
            GC.Collect();
            GC.WaitForPendingFinalizers();

            // Here we map the features onto the clusters

            GC.Collect();
            GC.WaitForPendingFinalizers();
            GC.Collect();
            GC.WaitForPendingFinalizers();

            Console.WriteLine("Creating cross tabs");
            var sumPath = crossPath + "-sum.csv";

            using (TextWriter writer = File.CreateText(sumPath))
            {
                var header = "Cluster ID, Total Members, Dataset Members, Tightness, Ambiguity, Mass, NET, DriftTime,";

                var builder = new StringBuilder();
                var ids     = new List <int>();
                foreach (var information in datasets)
                {
                    if (information.DatasetId > minDatabase)
                    {
                        continue;
                    }
                    ids.Add(information.DatasetId);
                    builder.AppendFormat("AbundanceSum-{0},", information.DatasetId);
                }
                ids.Sort();

                header += builder.ToString();
                writer.WriteLine(header);

                builder.Clear();

                long total         = clusterMap.Keys.Count;
                long totalFeatures = 0;
                var  features      = 0;


                foreach (var id in clusterMap.Keys)
                {
                    var cluster = clusterMap[id];

                    if (features > 100000)
                    {
                        Console.WriteLine("Written {0} of {1} clusters", totalFeatures, total);
                        features = 0;
                    }
                    totalFeatures++;
                    features++;

                    builder.AppendFormat("{0},{1},{2},{3:.000},{4:.000},{5:.0000},{6:.0000},{7:.0000},",
                                         cluster.Id,
                                         cluster.MemberCount,
                                         cluster.DatasetCount,
                                         cluster.Tightness,
                                         cluster.Ambiguity,
                                         cluster.Mass,
                                         cluster.Net,
                                         cluster.Drift);

                    foreach (var did in ids)
                    {
                        if (did > minDatabase)
                        {
                            continue;
                        }
                        // If the cluster does not have an entry for this, then leave it
                        if (cluster.abundances.ContainsKey(did))
                        {
                            builder.AppendFormat("{0},", cluster.abundances[did]);
                        }
                        else
                        {
                            builder.AppendFormat(",");
                        }
                    }
                    writer.WriteLine(builder.ToString());

                    builder.Clear();
                }
            }
        }