void TrainTransform() { lock (_lock) { if (_Results != null) { return; } using (var ch = _host.Start("Optics")) { var sw = Stopwatch.StartNew(); sw.Start(); var points = new List <IPointIdFloat>(); var index = SchemaHelper.GetColumnIndexDC(_input.Schema, _args.features); // Caching data. ch.Info(MessageSensitivity.None, "Caching the data."); using (var cursor = _input.GetRowCursor(_input.Schema.Where(c => c.Index == index.Index))) { var getter = cursor.GetGetter <VBuffer <float> >(index); var getterId = cursor.GetIdGetter(); DataViewRowId id = new DataViewRowId(); VBuffer <float> tmp = new VBuffer <float>(); for (int i = 0; cursor.MoveNext(); i++) { getter(ref tmp); getterId(ref id); points.Add(new PointIdFloat((long)id.Low, tmp.DenseValues().Select(c => (float)c))); } } // Mapping. // int: index of a cluster // long: index of a point var mapping = new int[points.Count]; var mapprev = new Dictionary <long, int>(); float[] distances = null; if (_args.epsilons == null || _args.epsilons.Count() == 0) { float mind, maxd; distances = new[] { EstimateDistance(ch, points, out mind, out maxd) }; ch.Info(MessageSensitivity.UserData, "epsilon (=Radius) was estimating on random couples of points: {0} in [{1}, {2}]", distances.First(), mind, maxd); } else { distances = _args.epsilonsDouble; } var maxEpsilon = distances.Max(); _Results = new List <Dictionary <int, ClusteringResult> >(); _reversedMapping = new List <Dictionary <long, int> >(); Optics opticsAlgo = new Optics(points, _args.seed); //Ordering ch.Info(MessageSensitivity.UserData, "Generating OPTICS ordering for {0} points.", points.Count); int nPoints = points.Count; int cyclesBetweenLogging = Math.Min(1000, nPoints / 10); int currentIteration = 0; Action progressLogger = () => { if (++currentIteration % cyclesBetweenLogging == 0) { ch.Info(MessageSensitivity.UserData, "Processing {0}/{1}", currentIteration, nPoints); } }; OpticsOrdering opticsOrdering = opticsAlgo.Ordering( maxEpsilon, _args.minPoints, seed: _args.seed, onShuffle: msg => ch.Info(MessageSensitivity.UserData, msg), onPointProcessing: progressLogger); // Clustering. foreach (var epsilon in distances) { ch.Info(MessageSensitivity.UserData, "Clustering {0} points using epsilon={1}.", points.Count, epsilon); Dictionary <long, int> results = opticsOrdering.Cluster(epsilon); HashSet <int> clusterIds = new HashSet <int>(); for (int i = 0; i < results.Count; ++i) { var p = points[i]; int cluster = results[p.id]; mapprev[p.id] = cluster; mapping[i] = cluster; if (cluster != DBScan.NOISE) { clusterIds.Add(cluster); } } _reversedMapping.Add(mapprev); // Cleaning small clusters. ch.Info(MessageSensitivity.UserData, "Removing clusters with less than {0} points.", _args.minPoints); var finalCounts_ = results.GroupBy(c => c.Value, (key, g) => new { key = key, nb = g.Count() }); var finalCounts = finalCounts_.ToDictionary(c => c.key, d => d.nb); results = results.Select(c => new KeyValuePair <long, int>(c.Key, finalCounts[c.Value] < _args.minPoints ? -1 : c.Value)) .ToDictionary(c => c.Key, c => c.Value); // Cleaning. ch.Info(MessageSensitivity.None, "Cleaning."); // We replace by the original labels. var runResults = new Dictionary <int, ClusteringResult>(); for (int i = 0; i < results.Count; ++i) { runResults[i] = new ClusteringResult() { cl = results[i] != DBScan.NOISE ? results[i] : -1, score = results[i] != DBScan.NOISE ? 1f : 0f }; } _Results.Add(runResults); ch.Info(MessageSensitivity.UserData, "Found {0} clusters.", clusterIds.Count); } sw.Stop(); ch.Info(MessageSensitivity.UserData, "'Optics' finished in {0}.", sw.Elapsed); } } }
void TrainTransform() { lock (_lock) { if (_Results != null) { return; } using (var ch = _host.Start("Starting Optics")) { var sw = Stopwatch.StartNew(); sw.Start(); var points = new List <IPointIdFloat>(); int index; if (!_input.Schema.TryGetColumnIndex(_args.features, out index)) { ch.Except("Unable to find column '{0}'", _args.features); } // Caching data. ch.Info("Caching the data."); using (var cursor = _input.GetRowCursor(i => i == index)) { var getter = cursor.GetGetter <VBuffer <float> >(index); var getterId = cursor.GetIdGetter(); UInt128 id = new UInt128(); VBuffer <float> tmp = new VBuffer <float>(); for (int i = 0; cursor.MoveNext(); i++) { getter(ref tmp); getterId(ref id); if (id > long.MaxValue) { ch.Except("An id is outside the range for long {0}", id); } points.Add(new PointIdFloat((long)id, tmp.DenseValues().Select(c => (float)c))); } } // Mapping. // long: index in the ordering // long: index of a point var mapping = new long[points.Count]; var mapprev = new Dictionary <long, long>(); var distance = (float)_args.epsilon; if (distance <= 0) { float mind, maxd; distance = EstimateDistance(ch, points, out mind, out maxd); ch.Info("epsilon (=Radius) was estimating on random couples of points: {0} in [{1}, {2}]", distance, mind, maxd); } Optics opticsAlgo = new Optics(points, _args.seed); //Ordering ch.Info("Generating OPTICS ordering for {0} points.", points.Count); int nPoints = points.Count; int cyclesBetweenLogging = Math.Min(1000, nPoints / 10); int currentIteration = 0; Action progressLogger = () => { if (++currentIteration % cyclesBetweenLogging == 0) { ch.Info("Processing {0}/{1}", currentIteration, nPoints); } }; OpticsOrdering opticsOrdering = opticsAlgo.Ordering( distance, _args.minPoints, seed: _args.seed, onShuffle: msg => ch.Info(msg), onPointProcessing: progressLogger); IReadOnlyDictionary <long, long> results = opticsOrdering.orderingMapping; var reachabilityDs = opticsOrdering.reachabilityDistances; var coreDs = opticsOrdering.coreDistancesCache; for (int i = 0; i < results.Count; ++i) { var p = points[i]; mapprev[results[i]] = i; mapping[i] = results[i]; } _reversedMapping = mapprev; // Cleaning. ch.Info("Cleaning."); // We replace by the original labels. _Results = new OpticsOrderingResult[results.Count]; for (int i = 0; i < results.Count; ++i) { long pId = points[i].id; float?rd; float?cd; reachabilityDs.TryGetValue(pId, out rd); coreDs.TryGetValue(pId, out cd); _Results[i] = new OpticsOrderingResult() { id = results[i] != DBScan.NOISE ? results[i] : -1, reachability = (float)rd.GetValueOrDefault(float.PositiveInfinity), core = (float)cd.GetValueOrDefault(float.PositiveInfinity) }; } ch.Info("Ordered {0} points.", _Results.Count()); sw.Stop(); ch.Info("'OpticsOrdering' finished in {0}.", sw.Elapsed); } } }