private void WriteClusters(List<DatasetInformation> datasets, List<UMCClusterLight> clusters, int minimumClusterSize, int charge, string crossPath, string databasePath, int minDatabase) { Console.WriteLine("Mapping cluster ids"); // here we map the clusters to a dictionary for quick lookup var clusterMap = new Dictionary<int, ClusterUltraLight>(); var clustersCut = new Dictionary<int, int>(); for (var i = 0; i < minimumClusterSize; i++) { clustersCut.Add(i, 0); } // Map the clusters so we can add features. foreach (var cluster in clusters) { // Only keep clusters of a given size to cut down on files. if (cluster.MemberCount < minimumClusterSize) { clustersCut[cluster.MemberCount]++; continue; } var id = cluster.Id; if (!clusterMap.ContainsKey(id)) { var lightCluster = new ClusterUltraLight(); lightCluster.Ambiguity = cluster.AmbiguityScore; lightCluster.DatasetCount = cluster.DatasetMemberCount; lightCluster.MemberCount = cluster.MemberCount; lightCluster.Drift = cluster.DriftTime; lightCluster.Id = cluster.Id; lightCluster.Mass = cluster.MassMonoisotopic; lightCluster.Net = cluster.Net; lightCluster.Tightness = cluster.Tightness; lightCluster.abundances = new Dictionary<int, long>(); clusterMap.Add(id, lightCluster); } } clusters.Clear(); clusters = null; // Let the user know how many clusters we cut. Console.WriteLine("Clusters that were cut"); foreach (var key in clustersCut.Keys) { Console.WriteLine("\t{0}\t{1}", key, clustersCut[key]); } GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); GC.WaitForPendingFinalizers(); Console.WriteLine("Find all features"); using (var connection = new SQLiteConnection(string.Format("Data Source = {0}", databasePath))) { connection.Open(); long features = 0; using (var command = connection.CreateCommand()) { command.CommandText = string.Format( "SELECT Cluster_ID, Dataset_ID, Abundance_Sum, Abundance_Max FROM T_LCMS_Features where Charge = {0}", charge); command.CommandType = CommandType.Text; var data = new object[4]; using (var reader = command.ExecuteReader()) { while (reader.Read()) { features++; reader.GetValues(data); var id = Convert.ToInt32(data[0]); var did = Convert.ToInt32(data[1]); var sum = Convert.ToInt64(data[2]); var max = Convert.ToInt64(data[3]); if (clusterMap.ContainsKey(id)) { try { var map = clusterMap[id].abundances; if (map.ContainsKey(did)) { map[did] += sum; } else { clusterMap[id].abundances.Add(did, sum); } } catch { var x = 0; x++; if (x > 1) { } } } if ((features%1000000) == 0) { Console.WriteLine("\tPurging Finished Features {0}", features); GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); GC.WaitForPendingFinalizers(); } } } } connection.Close(); GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); GC.WaitForPendingFinalizers(); } clustersCut.Clear(); GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); GC.WaitForPendingFinalizers(); // Here we map the features onto the clusters GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); GC.WaitForPendingFinalizers(); Console.WriteLine("Creating cross tabs"); var sumPath = crossPath + "-sum.csv"; using (TextWriter writer = File.CreateText(sumPath)) { var header = "Cluster ID, Total Members, Dataset Members, Tightness, Ambiguity, Mass, NET, DriftTime,"; var builder = new StringBuilder(); var ids = new List<int>(); foreach (var information in datasets) { if (information.DatasetId > minDatabase) { continue; } ids.Add(information.DatasetId); builder.AppendFormat("AbundanceSum-{0},", information.DatasetId); } ids.Sort(); header += builder.ToString(); writer.WriteLine(header); builder.Clear(); long total = clusterMap.Keys.Count; long totalFeatures = 0; var features = 0; foreach (var id in clusterMap.Keys) { var cluster = clusterMap[id]; if (features > 100000) { Console.WriteLine("Written {0} of {1} clusters", totalFeatures, total); features = 0; } totalFeatures++; features ++; builder.AppendFormat("{0},{1},{2},{3:.000},{4:.000},{5:.0000},{6:.0000},{7:.0000},", cluster.Id, cluster.MemberCount, cluster.DatasetCount, cluster.Tightness, cluster.Ambiguity, cluster.Mass, cluster.Net, cluster.Drift); foreach (var did in ids) { if (did > minDatabase) { continue; } // If the cluster does not have an entry for this, then leave it if (cluster.abundances.ContainsKey(did)) { builder.AppendFormat("{0},", cluster.abundances[did]); } else { builder.AppendFormat(","); } } writer.WriteLine(builder.ToString()); builder.Clear(); } } }
private void WriteClusters(List <DatasetInformation> datasets, List <UMCClusterLight> clusters, int minimumClusterSize, int charge, string crossPath, string databasePath, int minDatabase) { Console.WriteLine("Mapping cluster ids"); // here we map the clusters to a dictionary for quick lookup var clusterMap = new Dictionary <int, ClusterUltraLight>(); var clustersCut = new Dictionary <int, int>(); for (var i = 0; i < minimumClusterSize; i++) { clustersCut.Add(i, 0); } // Map the clusters so we can add features. foreach (var cluster in clusters) { // Only keep clusters of a given size to cut down on files. if (cluster.MemberCount < minimumClusterSize) { clustersCut[cluster.MemberCount]++; continue; } var id = cluster.Id; if (!clusterMap.ContainsKey(id)) { var lightCluster = new ClusterUltraLight(); lightCluster.Ambiguity = cluster.AmbiguityScore; lightCluster.DatasetCount = cluster.DatasetMemberCount; lightCluster.MemberCount = cluster.MemberCount; lightCluster.Drift = cluster.DriftTime; lightCluster.Id = cluster.Id; lightCluster.Mass = cluster.MassMonoisotopic; lightCluster.Net = cluster.Net; lightCluster.Tightness = cluster.Tightness; lightCluster.abundances = new Dictionary <int, long>(); clusterMap.Add(id, lightCluster); } } clusters.Clear(); clusters = null; // Let the user know how many clusters we cut. Console.WriteLine("Clusters that were cut"); foreach (var key in clustersCut.Keys) { Console.WriteLine("\t{0}\t{1}", key, clustersCut[key]); } GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); GC.WaitForPendingFinalizers(); Console.WriteLine("Find all features"); using (var connection = new SQLiteConnection(string.Format("Data Source = {0}", databasePath))) { connection.Open(); long features = 0; using (var command = connection.CreateCommand()) { command.CommandText = string.Format( "SELECT Cluster_ID, Dataset_ID, Abundance_Sum, Abundance_Max FROM T_LCMS_Features where Charge = {0}", charge); command.CommandType = CommandType.Text; var data = new object[4]; using (var reader = command.ExecuteReader()) { while (reader.Read()) { features++; reader.GetValues(data); var id = Convert.ToInt32(data[0]); var did = Convert.ToInt32(data[1]); var sum = Convert.ToInt64(data[2]); var max = Convert.ToInt64(data[3]); if (clusterMap.ContainsKey(id)) { try { var map = clusterMap[id].abundances; if (map.ContainsKey(did)) { map[did] += sum; } else { clusterMap[id].abundances.Add(did, sum); } } catch { var x = 0; x++; if (x > 1) { } } } if ((features % 1000000) == 0) { Console.WriteLine("\tPurging Finished Features {0}", features); GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); GC.WaitForPendingFinalizers(); } } } } connection.Close(); GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); GC.WaitForPendingFinalizers(); } clustersCut.Clear(); GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); GC.WaitForPendingFinalizers(); // Here we map the features onto the clusters GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); GC.WaitForPendingFinalizers(); Console.WriteLine("Creating cross tabs"); var sumPath = crossPath + "-sum.csv"; using (TextWriter writer = File.CreateText(sumPath)) { var header = "Cluster ID, Total Members, Dataset Members, Tightness, Ambiguity, Mass, NET, DriftTime,"; var builder = new StringBuilder(); var ids = new List <int>(); foreach (var information in datasets) { if (information.DatasetId > minDatabase) { continue; } ids.Add(information.DatasetId); builder.AppendFormat("AbundanceSum-{0},", information.DatasetId); } ids.Sort(); header += builder.ToString(); writer.WriteLine(header); builder.Clear(); long total = clusterMap.Keys.Count; long totalFeatures = 0; var features = 0; foreach (var id in clusterMap.Keys) { var cluster = clusterMap[id]; if (features > 100000) { Console.WriteLine("Written {0} of {1} clusters", totalFeatures, total); features = 0; } totalFeatures++; features++; builder.AppendFormat("{0},{1},{2},{3:.000},{4:.000},{5:.0000},{6:.0000},{7:.0000},", cluster.Id, cluster.MemberCount, cluster.DatasetCount, cluster.Tightness, cluster.Ambiguity, cluster.Mass, cluster.Net, cluster.Drift); foreach (var did in ids) { if (did > minDatabase) { continue; } // If the cluster does not have an entry for this, then leave it if (cluster.abundances.ContainsKey(did)) { builder.AppendFormat("{0},", cluster.abundances[did]); } else { builder.AppendFormat(","); } } writer.WriteLine(builder.ToString()); builder.Clear(); } } }