public void TestRestrictiveBoxMethod(string path, DistanceMetric dist, bool useBoxMethod) { var features = ReadFeatures(path); var clusterer = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight> { ShouldTestClustersWithinTolerance = useBoxMethod, Parameters = { CentroidRepresentation = ClusterCentroidRepresentation.Mean, DistanceFunction = DistanceFactory <UMCLight> .CreateDistanceFunction(dist), OnlyClusterSameChargeStates = true, Tolerances = { Mass = 10, DriftTime = .3, Net = .03 } } }; var clusters = clusterer.Cluster(features); var i = 0; clusters.ForEach(x => x.Id = i++); WriteClusters(clusters); }
public void TestDistancesEuclidean(string path, DistanceMetric dist) { var func = DistanceFactory <UMCClusterLight> .CreateDistanceFunction(DistanceMetric.Euclidean); var oldClusters = ReadClusters(path); var clusters = CreateSingletonClustersFromClusteredFeatures(new List <UMCClusterLight> { oldClusters[1] }); Console.WriteLine("Distance, Mass, NET, DT, Mass, Net, DT"); for (var i = 0; i < clusters.Count; i++) { for (var j = i + 1; j < clusters.Count; j++) { var distance = func(clusters[i], clusters[j]); Console.WriteLine("{0},{1},{2},{3},{4},{5},{6}", distance, clusters[i].MassMonoisotopic, clusters[i].Net, clusters[i].DriftTime, clusters[j].MassMonoisotopic, clusters[j].Net, clusters[j].DriftTime); } } }
/// <summary> /// Resets the parameters to their default values. /// </summary> public virtual void Clear() { Tolerances = new FeatureTolerances(); OnlyClusterSameChargeStates = CONST_DEFAULT_ONLY_CLUSTER_SAME_CHARGE_STATES; DistanceFunction = DistanceFactory <T> .CreateDistanceFunction(DistanceMetric.WeightedEuclidean); RangeFunction = WithinRange; CentroidRepresentation = ClusterCentroidRepresentation.Median; }
public IActionResult ShowResult(HierarchicalCreateVM model) { var data = _fileService.GetData(model.UploadFile, model.DataType); IDistance distance = DistanceFactory.GetDistance(model.DistanceType); IClusterDistance clusterDistance = ClusterDistanceFactory.GetClusterDistance(model.ClusterUnionType); var result = _service.Clustering(data, distance, clusterDistance, model.CountOfUnionsInStep); return(View(result)); }
public IActionResult ShowResult(KMeansCreateVM model) { var data = _fileService.GetData(model.UploadFile, model.DataType); IDistance distance = DistanceFactory.GetDistance(model.DistanceType); var result = _kMeans.Clustering(data, distance, model.ClustersCount); ViewBag.Centroids = result.Centroid; return(View(result.Result)); }
public static FeatureClusterParameters <UMCLight> ConvertToOmics(LcmsClusteringOptions options) { var parameters = new FeatureClusterParameters <UMCLight> { Tolerances = options.InstrumentTolerances, OnlyClusterSameChargeStates = (options.ShouldSeparateCharge == false), CentroidRepresentation = options.ClusterCentroidRepresentation }; parameters.DistanceFunction = DistanceFactory <UMCLight> .CreateDistanceFunction(options.DistanceFunction); return(parameters); }
public void TestDistanceDistributions(string path, DistanceMetric dist) { var features = ReadFeatures(path); var clusterer = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight> { ShouldTestClustersWithinTolerance = false, Parameters = { CentroidRepresentation = ClusterCentroidRepresentation.Mean, DistanceFunction = DistanceFactory <UMCLight> .CreateDistanceFunction(dist), OnlyClusterSameChargeStates = true, Tolerances = { Mass = 10, DriftTime = .3, Net = .03 } } }; var clusters = clusterer.Cluster(features); var distances = new List <double>(); foreach (var cluster in clusters) { var centroid = new UMCLight(); centroid.MassMonoisotopicAligned = cluster.MassMonoisotopic; centroid.Net = cluster.Net; centroid.DriftTime = cluster.DriftTime; var func = clusterer.Parameters.DistanceFunction; foreach (var feature in cluster.Features) { var distance = func(feature, centroid); distances.Add(distance); } distances.Sort(); var sum = 0; foreach (var distance in distances) { sum++; Console.WriteLine("{0},{1}", distance, sum); } } }
public static bool IsFuzzySimilar(this string input, string parameter, int fuzzyness = 3, FuzzyAlgorithm fuzzyAlgorithm = FuzzyAlgorithm.LevenshteinDistance) { if (string.IsNullOrEmpty(parameter)) { throw new ArgumentNullException($"parameter can't be empty or null"); } if (fuzzyness < 0) { throw new InvalidOperationException($"fuzzyness can't be less than 0"); } var string1 = ""; var string2 = ""; return((DistanceFactory.GetDistances(GetCanonicalForm(input), GetCanonicalForm(parameter)) < fuzzyness) || input.Contains(parameter) || parameter.Contains(input)); }
/// <summary> /// The main entry point for the application. /// </summary> static int Main(string [] args) { var handle = System.Diagnostics.Process.GetCurrentProcess().MainWindowHandle; SetConsoleMode(handle, ENABLE_EXTENDED_FLAGS); try { if (args.Length < 2) { Console.WriteLine(@"MultiAlignChargeStateProcessor databasePath chargeState crossTabPath [dataset List]"); Console.WriteLine(@"\tThe cross-tab file will be placed in the same directory as the database path"); return(1); } // Setup the analysis processing var databasePath = args[0]; var databaseName = Path.GetFileNameWithoutExtension(databasePath); var path = Path.GetDirectoryName(databasePath); var crossPath = args[2]; var chargeState = Convert.ToInt32(args[1]); List <string> datasetList = null; if (args.Length == 4) { datasetList = File.ReadAllLines(args[3]).ToList(); } if (path == null) { Console.WriteLine(@"The directory path is invalid"); return(1); } NHibernateUtil.ConnectToDatabase(databasePath, false); IDatasetDAO datasetCache = new DatasetDAOHibernate(); var dateSuffix = AnalysisPathUtils.BuildDateSuffix(); Logger.LogPath = Path.Combine(path, string.Format("{0}_charge_{2}_{1}.txt", databaseName, dateSuffix, chargeState)); Logger.PrintMessage("Find all datasets", true); var datasets = datasetCache.FindAll(); Logger.PrintMessage(string.Format("Found {0} datasets", datasets.Count), true); // Create the clustering algorithm - average linkage IClusterer <UMCLight, UMCClusterLight> clusterer = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight>(); // Create the DAO object to extract the features var database = new UmcAdoDAO { DatabasePath = databasePath }; IUmcDAO featureDao = database; Logger.PrintMessage(string.Format("Extracting Features"), true); var tempFeatures = featureDao.FindByCharge(chargeState); Logger.PrintMessage(string.Format("Found {0} features", tempFeatures.Count), true); var features = new List <UMCLight>(); if (datasetList != null) { var featuremap = datasets.ToDictionary(info => info.DatasetName.ToLower()); var focusedDatasetList = new Dictionary <int, DatasetInformation>(); foreach (var name in datasetList) { var key = name.ToLower(); if (featuremap.ContainsKey(key)) { Logger.PrintMessage("Using dataset: " + name); focusedDatasetList.Add(featuremap[key].DatasetId, featuremap[key]); } else { throw new Exception("Didn't find the dataset required..." + name); } } features.AddRange(from feature in tempFeatures let use = focusedDatasetList.ContainsKey(feature.GroupId) where use select feature); Logger.PrintMessage(string.Format("Found {0} filtered features for dataset list", features.Count), true); } else { features = tempFeatures; } // Handle logging progress. clusterer.Progress += clusterer_Progress; clusterer.Parameters.Tolerances.DriftTime = .3; clusterer.Parameters.Tolerances.Mass = 16; clusterer.Parameters.Tolerances.Net = .014; clusterer.Parameters.OnlyClusterSameChargeStates = true; clusterer.Parameters.CentroidRepresentation = ClusterCentroidRepresentation.Mean; clusterer.Parameters.DistanceFunction = DistanceFactory <UMCLight> .CreateDistanceFunction(DistanceMetric.WeightedEuclidean); // Then cluster var clusterWriter = new UmcClusterWriter(); IClusterWriter <UMCClusterLight> writer = clusterWriter; //new UMCClusterDummyWriter(); try { clusterWriter.Open(crossPath); clusterWriter.WriteHeader(datasets); clusterer.ClusterAndProcess(features, writer); Logger.PrintMessage("", true); Logger.PrintMessage("ANALYSIS SUCCESS", true); return(0); } catch (Exception ex) { Logger.PrintMessage("Unhandled Error: " + ex.Message); var innerEx = ex.InnerException; while (innerEx != null) { Logger.PrintMessage("Inner Exception: " + innerEx.Message); innerEx = innerEx.InnerException; } Logger.PrintMessage("Stack: " + ex.StackTrace); Logger.PrintMessage(""); Logger.PrintMessage("ANALYSIS FAILED"); return(1); } finally { clusterWriter.Close(); } } catch (Exception ex) { Logger.PrintMessage("Unhandled Error: " + ex.Message, true); var innerEx = ex.InnerException; while (innerEx != null) { Logger.PrintMessage("Inner Exception: " + innerEx.Message); innerEx = innerEx.InnerException; } Logger.PrintMessage("Stack: " + ex.StackTrace, true); Logger.PrintMessage(""); Logger.PrintMessage("ANALYSIS FAILED"); return(1); } }
public LargeScaleClusterTests() { m_massComparer = FeatureLight.MassAlignedComparison; DistanceFunction = DistanceFactory <UMCLight> .CreateDistanceFunction(DistanceMetric.Euclidean); }