public void CalculateStatisticsTestSingleUmc(double umcMass, double umcNet, float umcDriftTime, int umcCharge, int umcAbundance, ClusterCentroidRepresentation representation) { var cluster = new UMCClusterLight { UmcList = new List <UMCLight>() }; var umc = new UMCLight { MassMonoisotopicAligned = umcMass, Net = umcNet, DriftTime = umcDriftTime, ChargeState = umcCharge, Abundance = umcAbundance }; cluster.UmcList.Add(umc); cluster.CalculateStatistics(representation); Assert.AreEqual(umc.MassMonoisotopicAligned, cluster.MassMonoisotopic, "Monoisotopic Mass"); Assert.AreEqual(umc.Net, cluster.Net, "NET"); Assert.AreEqual(umc.DriftTime, cluster.DriftTime, "Drift Time"); Assert.AreEqual(umc.ChargeState, cluster.ChargeState, "Charge State"); }
public void CalculateStatisticsTestNullUMC() { var cluster = new UMCClusterLight(); cluster.UmcList = null; Assert.Throws <NullReferenceException>(() => cluster.CalculateStatistics(ClusterCentroidRepresentation.Median)); }
public void CalculateStatisticsTestEmptyUMC() { var cluster = new UMCClusterLight(); cluster.UmcList = new List <UMCLight>(); Assert.Throws <Exception>(() => cluster.CalculateStatistics(ClusterCentroidRepresentation.Median)); }
public void CalculateStatisticsTestEmptyUMC() { var cluster = new UMCClusterLight(); cluster.UmcList = new List <UMCLight>(); cluster.CalculateStatistics(ClusterCentroidRepresentation.Median); }
public void CalculateStatisticsMultipleNet(ClusterCentroidRepresentation representation) { var cluster = new UMCClusterLight(); cluster.UmcList = new List <UMCLight>(); var umc = new UMCLight(); umc.MassMonoisotopicAligned = 100; umc.Net = 100; umc.DriftTime = 100; umc.ChargeState = 2; umc.Abundance = 100; cluster.UmcList.Add(umc); var umc2 = new UMCLight(); umc2.MassMonoisotopicAligned = 100; umc2.Net = 200; umc2.DriftTime = 100; umc2.ChargeState = 2; umc2.Abundance = 100; cluster.UmcList.Add(umc2); cluster.CalculateStatistics(representation); Assert.AreEqual(150, cluster.Net); }
public void CalculateStatisticsTestNullUMC() { var cluster = new UMCClusterLight(); cluster.UmcList = null; cluster.CalculateStatistics(ClusterCentroidRepresentation.Median); }
public void TestTwoClusters(string path) { Console.WriteLine("Test: " + path); var features = GetClusterData(Path.Combine(TestPathSingleton.TestDirectory, path)); Assert.IsNotEmpty(features); var cluster = new UMCClusterLight(); cluster.Id = features[0].Id; features.ForEach(x => cluster.AddChildFeature(x)); cluster.CalculateStatistics(ClusterCentroidRepresentation.Median); Console.WriteLine("Cluster\tMass\tNET"); Console.WriteLine("{0}\t{1}\t{2}\t", cluster.Id, cluster.MassStandardDeviation, cluster.NetStandardDeviation); Console.WriteLine(); var distance = new EuclideanDistanceMetric <FeatureLight>(); features.ForEach(x => Console.WriteLine(distance.EuclideanDistance(x, cluster))); }
public void TestReprocessing(string path) { Console.WriteLine("Test: " + path); var features = GetClusterData(Path.Combine(TestPathSingleton.TestDirectory, path)); Assert.IsNotEmpty(features); var cluster = new UMCClusterLight(); cluster.Id = features[0].Id; features.ForEach(x => cluster.AddChildFeature(x)); cluster.CalculateStatistics(ClusterCentroidRepresentation.Median); Console.WriteLine("Cluster\tMass\tNET"); Console.WriteLine("{0}\t{1}\t{2}\t", cluster.Id, cluster.MassStandardDeviation, cluster.NetStandardDeviation); Console.WriteLine(); IClusterReprocessor <UMCLight, UMCClusterLight> reprocessor = new MedianSplitReprocessor <UMCLight, UMCClusterLight>(); reprocessor.ProcessClusters(new List <UMCClusterLight> { cluster }); }
public void TestClusterGeneration(string databasePath, string crossPath, int charge, int minimumClusterSize) { File.Delete(databasePath); NHibernateUtil.ConnectToDatabase(databasePath, true); IDatasetDAO datasetCache = new DatasetDAOHibernate(); IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate(); IUmcDAO featureCache = new UmcDAOHibernate(); // Creating a dataset Console.WriteLine("Creating dummy datasets"); var datasets = new List<DatasetInformation>(); var total = 10; for (var i = 0; i < total; i++) { var dataset = new DatasetInformation(); dataset.DatasetId = i; dataset.DatasetName = "test" + i; datasets.Add(dataset); } datasetCache.AddAll(datasets); datasets.Clear(); datasets = datasetCache.FindAll(); // Create features Console.WriteLine("Creating features"); var features = new List<UMCLight>(); var clusters = new List<UMCClusterLight>(); var x = new Random(); var featureId = 0; for (var i = 0; i < 100; i++) { var cluster = new UMCClusterLight(); cluster.Id = i; cluster.AmbiguityScore = i; cluster.Tightness = i; var N = x.Next(1, total); cluster.Id = i; cluster.ChargeState = charge; var hash = new HashSet<int>(); for (var j = 0; j < N; j++) { var did = -1; do { did = x.Next(0, total); if (!hash.Contains(did)) { hash.Add(did); break; } } while (true); var feature = new UMCLight(); feature.GroupId = did; feature.Id = featureId++; feature.ChargeState = charge; feature.MassMonoisotopic = x.NextDouble(); feature.Net = x.NextDouble(); feature.AbundanceSum = x.Next(100, 200); feature.Abundance = feature.Abundance; feature.ClusterId = cluster.Id; cluster.AddChildFeature(feature); features.Add(feature); } cluster.CalculateStatistics(ClusterCentroidRepresentation.Mean); clusters.Add(cluster); } featureCache.AddAll(features); clusterCache.AddAll(clusters); clusters = clusterCache.FindAll(); Console.WriteLine("Find all clusters"); clusters = clusterCache.FindByCharge(charge); WriteClusters(datasets, clusters, minimumClusterSize, charge, crossPath, databasePath, 300000); }
public void CalculateStatisticsTestMultipleUmCs(double umcMass, double umcNet, float umcDrifTime, int umcCharge, int umcAbundance, int multiplier, int numUmCs, ClusterCentroidRepresentation representation) { var cluster = new UMCClusterLight { UmcList = new List <UMCLight>() }; var k = numUmCs / 2; double medianMass = 0; double medianNet = 0; double medianDriftTime = 0; for (var i = 0; i < numUmCs; i++) { var umc = new UMCLight { MassMonoisotopicAligned = umcMass + multiplier * i, Net = umcNet + multiplier * i, DriftTime = umcDrifTime + multiplier * i, ChargeState = umcCharge, Abundance = umcAbundance + multiplier * i }; cluster.UmcList.Add(umc); if (representation == ClusterCentroidRepresentation.Mean) { medianMass += umc.MassMonoisotopicAligned; medianNet += umc.Net; medianDriftTime += umc.DriftTime; } // Odd else if (k == i && (numUmCs % 2 == 1)) { medianMass = umc.MassMonoisotopicAligned; medianNet = umc.Net; medianDriftTime = umc.DriftTime; } // Even else if ((numUmCs % 2) == 0) { // When we have an even number of features // We want to calculate the median as the average between // the two median features (k, k + 1), where k is numUMCs / 2 // Remeber that we use k - 1 because i is zero indexed if (k - 1 == i) { medianMass = umc.MassMonoisotopicAligned; medianNet = umc.Net; medianDriftTime = umc.DriftTime; } else if (k == i) { medianMass += umc.MassMonoisotopicAligned; medianNet += umc.Net; medianDriftTime += umc.DriftTime; medianMass /= 2; medianNet /= 2; medianDriftTime /= 2; } } } // We make sure that we calculate the mean correctly here. if (representation == ClusterCentroidRepresentation.Mean) { medianMass /= numUmCs; medianNet /= numUmCs; medianDriftTime /= numUmCs; } cluster.CalculateStatistics(representation); Assert.AreEqual(medianMass, cluster.MassMonoisotopic, "Monoisotopic Mass"); Assert.AreEqual(medianNet, cluster.Net, "NET"); Assert.AreEqual(medianDriftTime, cluster.DriftTime, "Drift Time"); Assert.AreEqual(umcCharge, cluster.ChargeState, "Charge State"); }
public void TestClusterGeneration(string databasePath, string crossPath, int charge, int minimumClusterSize) { File.Delete(databasePath); NHibernateUtil.ConnectToDatabase(databasePath, true); IDatasetDAO datasetCache = new DatasetDAOHibernate(); IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate(); IUmcDAO featureCache = new UmcDAOHibernate(); // Creating a dataset Console.WriteLine("Creating dummy datasets"); var datasets = new List <DatasetInformation>(); var total = 10; for (var i = 0; i < total; i++) { var dataset = new DatasetInformation(); dataset.DatasetId = i; dataset.DatasetName = "test" + i; datasets.Add(dataset); } datasetCache.AddAll(datasets); datasets.Clear(); datasets = datasetCache.FindAll(); // Create features Console.WriteLine("Creating features"); var features = new List <UMCLight>(); var clusters = new List <UMCClusterLight>(); var x = new Random(); var featureId = 0; for (var i = 0; i < 100; i++) { var cluster = new UMCClusterLight(); cluster.Id = i; cluster.AmbiguityScore = i; cluster.Tightness = i; var N = x.Next(1, total); cluster.Id = i; cluster.ChargeState = charge; var hash = new HashSet <int>(); for (var j = 0; j < N; j++) { var did = -1; do { did = x.Next(0, total); if (!hash.Contains(did)) { hash.Add(did); break; } } while (true); var feature = new UMCLight(); feature.GroupId = did; feature.Id = featureId++; feature.ChargeState = charge; feature.MassMonoisotopic = x.NextDouble(); feature.Net = x.NextDouble(); feature.AbundanceSum = x.Next(100, 200); feature.Abundance = feature.Abundance; feature.ClusterId = cluster.Id; cluster.AddChildFeature(feature); features.Add(feature); } cluster.CalculateStatistics(ClusterCentroidRepresentation.Mean); clusters.Add(cluster); } featureCache.AddAll(features); clusterCache.AddAll(clusters); clusters = clusterCache.FindAll(); Console.WriteLine("Find all clusters"); clusters = clusterCache.FindByCharge(charge); WriteClusters(datasets, clusters, minimumClusterSize, charge, crossPath, databasePath, 300000); }
public void CompareMs2IdsToMs1Ids(string liquidResultsPath, string isosFile, string rawFile) { // Read mass tags. var massTagReader = new LiquidResultsFileLoader(liquidResultsPath); var massTags = massTagReader.LoadDatabase(); // Get identifications - this rereads the liquid results file, but I'm leaving it that way // for now because this is just a test. var scansToIds = this.GetIds(liquidResultsPath); // Read raw data file. var spectraProviderCache = new SpectraProviderCache(); var spectraProvider = spectraProviderCache.GetSpectraProvider(rawFile); // Read isos features var isosReader = new MsFeatureLightFileReader(); isosReader.IsosFilteroptions = new DeconToolsIsosFilterOptions { MaximumIsotopicFit = 0.15 }; var msFeatures = isosReader.ReadFile(isosFile).ToList(); // Get LCMS features var msFeatureClusterer = new MsToLcmsFeatures(spectraProvider); var lcmsFeatures = msFeatureClusterer.Convert(msFeatures); lcmsFeatures.ForEach(feature => { feature.NetAligned = feature.Net; feature.MassMonoisotopicAligned = feature.MassMonoisotopic; }); // Create clusters - Since this is only working on a single dataset, there should be a 1:1 mapping // between LCMS features and clusters. var clusters = new List <UMCClusterLight> { Capacity = lcmsFeatures.Count }; foreach (var lcmsFeature in lcmsFeatures) { var cluster = new UMCClusterLight(lcmsFeature); cluster.CalculateStatistics(ClusterCentroidRepresentation.Median); clusters.Add(cluster); } // Do STAC AMT matching var stacAdapter = new STACAdapter <UMCClusterLight> { Options = new FeatureMatcherParameters { ShouldCalculateShiftFDR = false, UsePriors = true, UseEllipsoid = true, UseDriftTime = false, ShouldCalculateSTAC = true, } }; var amtMatches = stacAdapter.PerformPeakMatching(clusters, massTags); // Group AMT matches by cluster, convert MassTags to Protein objects (represents lipid ID, // rather than Protein ID here) for simplicity in comparing them to the MS/MS IDs. var ms1Matches = clusters.ToDictionary(cluster => cluster, cluster => new List <Protein>()); foreach (var amtMatch in amtMatches) { var cluster = amtMatch.Observed; var massTag = amtMatch.Target; ms1Matches[cluster].Add(new Protein { Name = massTag.ProteinName, Sequence = massTag.PeptideSequence, ChemicalFormula = massTag.PeptideSequence }); } // Now we need to backtrack MS/MS identifications -> clusters var ms2Matches = new Dictionary <UMCClusterLight, List <Protein> >(); foreach (var cluster in clusters) { ms2Matches.Add(cluster, new List <Protein>()); foreach (var lcmsFeature in cluster.UmcList) { foreach (var msFeature in lcmsFeature.MsFeatures) { foreach (var msmsFeature in msFeature.MSnSpectra) { if (scansToIds.ContainsKey(msmsFeature.Scan)) { ms2Matches[cluster].AddRange(scansToIds[msmsFeature.Scan]); } } } } } // How many clusters have IDs from MS/MS? var clusterMs1IdCount = ms1Matches.Values.Count(value => value.Any()); var clusterMs2IdCount = ms2Matches.Values.Count(value => value.Any()); int overlapCount = 0; // Number of IDs that overlapped between MS1 and MS2 identifications. // Finally compare the MS1 IDs to the MS2 IDs. foreach (var cluster in clusters) { // For now only comparing by name var ms1Ids = ms1Matches[cluster]; var ms1Lipids = ms1Ids.Select(id => id.Name); var ms2Ids = ms2Matches[cluster]; var ms2Lipids = ms2Ids.Select(id => id.Name); // Compare MS1 IDs for the cluster vs MS2 IDs for the cluster. var ms1OnlyIds = ms1Lipids.Where(lipid => !ms2Lipids.Contains(lipid)); var ms2OnlyIds = ms2Lipids.Where(lipid => !ms1Lipids.Contains(lipid)); overlapCount += ms1OnlyIds.Intersect(ms2OnlyIds).Count(); // Write Results if (ms1OnlyIds.Any() || ms2OnlyIds.Any()) { Console.WriteLine("Cluster {0}:", cluster.Id); if (ms1OnlyIds.Any()) { Console.WriteLine("\tMs1 Only IDs:"); foreach (var id in ms1OnlyIds) { Console.WriteLine("\t\t{0}", id); } } if (ms2OnlyIds.Any()) { Console.WriteLine("\tMs2 Only IDs:"); foreach (var id in ms2OnlyIds) { Console.WriteLine("\t\t{0}", id); } } } } Console.WriteLine("Overlap: {0}", overlapCount); }