/// <summary> /// Initializes a new instance of the <see cref="T:ExFat.IO.ClusterStream" /> class. /// </summary> /// <param name="clusterReader">The cluster information reader.</param> /// <param name="clusterWriter">The cluster writer.</param> /// <param name="dataDescriptor">The data descriptor.</param> /// <param name="onDisposed">Method invoked when stream is disposed.</param> /// <exception cref="T:System.ArgumentException">If contiguous is true, the length must be specified</exception> /// <inheritdoc /> public ClusterStream(IClusterReader clusterReader, IClusterWriter clusterWriter, DataDescriptor dataDescriptor, Action <DataDescriptor> onDisposed) { _clusterReader = clusterReader; _clusterWriter = clusterWriter; _startCluster = dataDescriptor.FirstCluster; _contiguous = dataDescriptor.Contiguous; _onDisposed = onDisposed; _dataLength = (long)dataDescriptor.PhysicalLength; _validDataLength = (long)dataDescriptor.LogicalLength; _position = 0; _currentCluster = _startCluster; }
/// <summary> /// The main entry point for the application. /// </summary> static int Main(string [] args) { var handle = System.Diagnostics.Process.GetCurrentProcess().MainWindowHandle; SetConsoleMode(handle, ENABLE_EXTENDED_FLAGS); try { if (args.Length < 2) { Console.WriteLine(@"MultiAlignChargeStateProcessor databasePath chargeState crossTabPath [dataset List]"); Console.WriteLine(@"\tThe cross-tab file will be placed in the same directory as the database path"); return(1); } // Setup the analysis processing var databasePath = args[0]; var databaseName = Path.GetFileNameWithoutExtension(databasePath); var path = Path.GetDirectoryName(databasePath); var crossPath = args[2]; var chargeState = Convert.ToInt32(args[1]); List <string> datasetList = null; if (args.Length == 4) { datasetList = File.ReadAllLines(args[3]).ToList(); } if (path == null) { Console.WriteLine(@"The directory path is invalid"); return(1); } NHibernateUtil.ConnectToDatabase(databasePath, false); IDatasetDAO datasetCache = new DatasetDAOHibernate(); var dateSuffix = AnalysisPathUtils.BuildDateSuffix(); Logger.LogPath = Path.Combine(path, string.Format("{0}_charge_{2}_{1}.txt", databaseName, dateSuffix, chargeState)); Logger.PrintMessage("Find all datasets", true); var datasets = datasetCache.FindAll(); Logger.PrintMessage(string.Format("Found {0} datasets", datasets.Count), true); // Create the clustering algorithm - average linkage IClusterer <UMCLight, UMCClusterLight> clusterer = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight>(); // Create the DAO object to extract the features var database = new UmcAdoDAO { DatabasePath = databasePath }; IUmcDAO featureDao = database; Logger.PrintMessage(string.Format("Extracting Features"), true); var tempFeatures = featureDao.FindByCharge(chargeState); Logger.PrintMessage(string.Format("Found {0} features", tempFeatures.Count), true); var features = new List <UMCLight>(); if (datasetList != null) { var featuremap = datasets.ToDictionary(info => info.DatasetName.ToLower()); var focusedDatasetList = new Dictionary <int, DatasetInformation>(); foreach (var name in datasetList) { var key = name.ToLower(); if (featuremap.ContainsKey(key)) { Logger.PrintMessage("Using dataset: " + name); focusedDatasetList.Add(featuremap[key].DatasetId, featuremap[key]); } else { throw new Exception("Didn't find the dataset required..." + name); } } features.AddRange(from feature in tempFeatures let use = focusedDatasetList.ContainsKey(feature.GroupId) where use select feature); Logger.PrintMessage(string.Format("Found {0} filtered features for dataset list", features.Count), true); } else { features = tempFeatures; } // Handle logging progress. clusterer.Progress += clusterer_Progress; clusterer.Parameters.Tolerances.DriftTime = .3; clusterer.Parameters.Tolerances.Mass = 16; clusterer.Parameters.Tolerances.Net = .014; clusterer.Parameters.OnlyClusterSameChargeStates = true; clusterer.Parameters.CentroidRepresentation = ClusterCentroidRepresentation.Mean; clusterer.Parameters.DistanceFunction = DistanceFactory <UMCLight> .CreateDistanceFunction(DistanceMetric.WeightedEuclidean); // Then cluster var clusterWriter = new UmcClusterWriter(); IClusterWriter <UMCClusterLight> writer = clusterWriter; //new UMCClusterDummyWriter(); try { clusterWriter.Open(crossPath); clusterWriter.WriteHeader(datasets); clusterer.ClusterAndProcess(features, writer); Logger.PrintMessage("", true); Logger.PrintMessage("ANALYSIS SUCCESS", true); return(0); } catch (Exception ex) { Logger.PrintMessage("Unhandled Error: " + ex.Message); var innerEx = ex.InnerException; while (innerEx != null) { Logger.PrintMessage("Inner Exception: " + innerEx.Message); innerEx = innerEx.InnerException; } Logger.PrintMessage("Stack: " + ex.StackTrace); Logger.PrintMessage(""); Logger.PrintMessage("ANALYSIS FAILED"); return(1); } finally { clusterWriter.Close(); } } catch (Exception ex) { Logger.PrintMessage("Unhandled Error: " + ex.Message, true); var innerEx = ex.InnerException; while (innerEx != null) { Logger.PrintMessage("Inner Exception: " + innerEx.Message); innerEx = innerEx.InnerException; } Logger.PrintMessage("Stack: " + ex.StackTrace, true); Logger.PrintMessage(""); Logger.PrintMessage("ANALYSIS FAILED"); return(1); } }
public void ClusterAndProcess(List <UMCLight> data, IClusterWriter <UMCClusterLight> writer, IProgress <ProgressData> progress = null) { throw new NotImplementedException(); }
/// <summary> /// Clusters the data but does not store the results, instead immediately writes the data to the stream writer provided. /// </summary> /// <param name="data"></param> /// <param name="writer"></param> public void ClusterAndProcess(List <T> data, IClusterWriter <U> writer) { /* * This clustering algorithm first sorts the list of input UMC's by mass. It then iterates * through this list partitioning the data into blocks of UMC's based on a mass tolerance. * When it finds gaps larger or equal to the mass (ppm) tolerance specified by the user, * it will process the data before the gap (a block) until the current index of the features in question. */ // Make sure we have data to cluster first. if (data == null) { throw new NullReferenceException("The input feature data list was null. Cannot process this data."); } // Make sure there is no null UMC data in the input list. var nullIndex = data.FindIndex(delegate(T x) { return(x == null); }); if (nullIndex > 0) { throw new NullReferenceException("The feature at index " + nullIndex + " was null. Cannot process this data."); } OnNotify("Sorting cluster mass list"); // The first thing we do is to sort the features based on mass since we know that has the least variability in the data across runs. data.Sort(m_massComparer); // Now partition the data based on mass ranges and the parameter values. var massTolerance = Parameters.Tolerances.Mass; // This is the index of first feature of a given mass partition. var startUMCIndex = 0; var totalFeatures = data.Count; OnNotify("Detecting mass partitions"); var tenPercent = Convert.ToInt32(totalFeatures * .1); var counter = 0; var percent = 0; var clusterId = 0; for (var i = 0; i < totalFeatures - 1; i++) { if (counter > tenPercent) { counter = 0; percent += 10; OnNotify(string.Format("Clustering Completed...{0}%", percent)); } counter++; // Here we compute the ppm mass difference between consecutive features (based on mass). // This will determine if we cluster a block of data or not. var umcX = data[i]; var umcY = data[i + 1]; var ppm = Math.Abs(FeatureLight.ComputeMassPPMDifference(umcX.MassMonoisotopicAligned, umcY.MassMonoisotopicAligned)); // If the difference is greater than the tolerance then we cluster // - we dont check the sign of the ppm because the data should be sorted based on mass. if (ppm > massTolerance) { // If start UMC Index is equal to one, then that means the feature at startUMCIndex // could not find any other features near it within the mass tolerance specified. if (startUMCIndex == i) { var cluster = new U(); cluster.AmbiguityScore = m_maxDistance; umcX.SetParentFeature(cluster); cluster.AddChildFeature(umcX); cluster.CalculateStatistics(Parameters.CentroidRepresentation); cluster.Id = clusterId++; writer.WriteCluster(cluster); } else { // Otherwise we have more than one feature to to consider. var distances = CalculatePairWiseDistances(startUMCIndex, i, data); var localClusters = CreateSingletonClusters(data, startUMCIndex, i); var blockClusters = LinkFeatures(distances, localClusters); CalculateAmbiguityScore(blockClusters); foreach (var cluster in localClusters.Values) { cluster.Id = clusterId++; CalculateStatistics(cluster); writer.WriteCluster(cluster); } } startUMCIndex = i + 1; } } // Make sure that we cluster what is left over. if (startUMCIndex < totalFeatures) { OnNotify(string.Format("Clustering last partition...{0}%", percent)); var distances = CalculatePairWiseDistances(startUMCIndex, totalFeatures - 1, data); var localClusters = CreateSingletonClusters(data, startUMCIndex, totalFeatures - 1); var blockClusters = LinkFeatures(distances, localClusters); CalculateAmbiguityScore(blockClusters); if (localClusters.Count < 2) { foreach (var cluster in localClusters.Values) { cluster.Id = clusterId++; CalculateStatistics(cluster); writer.WriteCluster(cluster); } } else { foreach (var cluster in blockClusters) { cluster.Id = clusterId++; CalculateStatistics(cluster); writer.WriteCluster(cluster); } } } // OnNotify("Generating cluster statistics"); }
public void ClusterAndProcess(List <TChildFeature> data, IClusterWriter <TParentFeature> writer, IProgress <ProgressData> progress = null) { throw new NotImplementedException(); }