public void Cluster() { using (var status = StatusWrapper.NewStatus("DBScan", this.Data.Count)) { //Console.WriteLine(this.RangeDiameter); this.RangeList = TimeCalculator .GetIndexRangeByTimeInterval(this.Data.Select(x => x.TimeTick).ToList(), RangeRadius); for (int i = 0; i < this.Data.Count; i++) { if (this.Data[i].ClusterId > 0) { continue; } CurrentCluster = this.InnerClustering(i); int t = 0; while (t < this.CurrentCluster.Count) { var tl = this.InnerClustering(this.CurrentCluster[t++]); if (tl.Any()) { this.CurrentCluster.AddRange(tl); } } status.PushProgress(this.CurrentCluster.Count); } } this.Validation(); }
private void BuildNeighbors() { using (var status = StatusWrapper.NewStatus("Building Neighbors", this.Data.Count)) { var heap = new EasyHeapSmallFixedSize <NearestNeighbor>(HeapType.MinHeap, withTop, x => x.Distance); var rangeList = TimeCalculator.GetIndexRangeByTimeInterval(this.Data.Select(x => x.TimeTick).ToList(), Convert.ToInt64(kDisDiameter.TotalSeconds)); for (int i = 0; i < this.Data.Count; i++) { this.Data.GetRange(rangeList[i].Item1, rangeList[i].Item2 - rangeList[i].Item1) .ForEach(x => { var g = VectorPointHelper.GetDistance(this.Data[i], x, MathDistance); if (g > 0) { heap.Push(new NearestNeighbor { Distance = g, NeighborIndex = x.Id }); } }); this.Neighbors.Add(heap.GetList().ToList()); status.PushProgress(); } } }
private void TSNETransformForFile(string filename) { this.EmbeddingModel.ReadModel(this.EmbeddingModel.GetModelName(filename)); StatusWrapper.NewStatus("Preparing"); var visualizer = new TSNEVisualization { Data = this.DataController.UseData <KeyFeatureClusteredCategorized>(filename).ToList(), Model = this.EmbeddingModel }; //Console.WriteLine(measure.ExternalValidation.Print()); visualizer.toSNE(@"E:\EventExtractionExam\Debug\tSNEcate.csv"); }
private void CountErrors() { using (var status = StatusWrapper.NewStatus("Counting", ComparisonList.Count * ((long)ComparisonList.Count - 1) / 2)) { for (int i = 0; i < ComparisonList.Count - 1; i++) { for (int j = i + 1; j < ComparisonList.Count; j++) { status.PushProgress(); //if (i == j) //{ // continue; //} if (ComparisonList[i].Item1 == ComparisonList[j].Item1) { if (ComparisonList[i].Item2 == ComparisonList[j].Item2) { ExternalValidation.TruePositive++; } else { ExternalValidation.FalseNegative++; } } else { if (ComparisonList[i].Item2 == ComparisonList[j].Item2) { ExternalValidation.FalsePositive++; } else { ExternalValidation.TrueNegative++; } } } } } ExternalValidation.Purity = (double)ComparisonList.GroupBy(x => x.Item2) .Sum(x => x.GroupBy(y => y.Item1).Max(y => y.Count())) / ComparisonList.Count; ExternalValidation.NMI = CalculateNMI(); }
private ClusterExternalValidation ClusteringMeasurementForFile(string filename) { StatusWrapper.NewStatus("Preparing"); var labelList = DataController.UseData <KeyFeatureOfRandomEvent>(filename).ToList(); var resultList = DataController.UseData <KeyFeatureClustered>(EmbeddingModel.GetClusteredFilename(filename)).ToList(); var measure = new ClusteringMeasurement { LabelList = labelList, ResultList = resultList, }; measure.Measure(); //Console.WriteLine(measure.ExternalValidation.Print()); return(measure.ExternalValidation); }
//using AdaGradient private void Train() { // Status using (var status = StatusWrapper.NewStatus(@"Train", this.Cooccurrences.Count * this.NeVeSettings.EpochsCount)) { for (int epoch = 0; epoch < this.NeVeSettings.EpochsCount; epoch++) { this.Loss = 0d; ParallelOptions parallelOptions = new ParallelOptions() { MaxDegreeOfParallelism = this.NeVeSettings.MaximumParallelWorker }; var result = Parallel.For(0, this.NeVeSettings.MaximumParallelWorker, parallelOptions, id => this.AdaGradient(id, status)); if (!result.IsCompleted) { throw new InvalidOperationException(); } Console.WriteLine("Loss = {0}", this.Loss); } } }
private void Train() { using (var status = StatusWrapper.NewStatus(@"Train", this.Data.Count() * this.Ip2VecSettings.EpochsCount)) { // Building training container var sets = new List <TrainingElement>(); foreach (var set in this.Ip2VecSettings.ContextRelationDict) { var target = TrainingElement.NewContainer(this.WordCount, set.Value.Count); sets.Add(target); } for (int i = 0; i < this.Ip2VecSettings.EpochsCount; i++) { this.Loss = 0d; foreach (var item in this.Data) { //this.StatusController.Preparesw.Start(); // Building training elements var c = 0; foreach (var set in this.Ip2VecSettings.ContextRelationDict) { sets[c].RebuildTargetWordVector(W2I(set.Key(item))); sets[c].SetContextWordVector(set.Value.Select(x => W2I(x(item))).ToArray()); c++; } //this.StatusController.Preparesw.Stop(); //this.StatusController.Calcsw.Start(); var index = 0; if (this.Ip2VecSettings.NegativeSampling > 0) { foreach (var set in sets) { this.TrainElementWithNegativeSampling(set); } } else { foreach (var set in sets) { this.TrainElement(set, index++); } } Iter++; status.PushProgress(); //this.StatusController.Calcsw.Stop(); //this.StatusController.SetCurrentProgress(); //if (Iter % 1000 == 0) //{ // this.StatusController.Refresh(); //} //this.Percentage.Value = Iter; //if (this.W1.ToRowMajorArray().Contains(double.NaN)) //{ // MessageBox.Show("NAN error"); //} } Console.WriteLine("loss = " + this.Loss); } #if DEBUG var sb = new StringBuilder(); foreach (var item in this.Ip2VecModel.Index2Word) { sb.AppendLine(string.Format("{0}\t{1}", item.Key, item.Value)); } (new Thread(Record)).Start(new KeyValuePair <string, string>("Dictionary", sb.ToString())); #endif } }
private void PreTrain() { //Distinct this.Data = this.Data.Distinct().ToList(); using (var status = StatusWrapper.NewStatus(@"PreTrain", this.Data.Count)) { // Model Initialization if needed if (this.Ip2VecModel == null) { this.Ip2VecModel = new Ip2VecModel(); } // Negative Sampling Count //var freq = new Dictionary<string, int>(); // Generating trainning data // Generating dictionraies int index = this.Ip2VecModel.Word2Index.Count; foreach (var item in this.Data) { var allfunc = this.Ip2VecSettings.ContextRelationDict.SelectMany(x => x.Value).ToList(); allfunc.AddRange(this.Ip2VecSettings.ContextRelationDict.Keys.ToList()); foreach (var func in allfunc.Distinct()) { var value = func.Invoke(item); if (!this.Ip2VecModel.Word2Index.ContainsKey(value)) { this.Ip2VecModel.Word2Index.Add(value, index); this.Ip2VecModel.Index2Word.Add(index, value); index++; this.Ip2VecModel.Frequency.Add(1); } else { this.Ip2VecModel.Frequency[this.Ip2VecModel.Word2Index[value]]++; } } status.PushProgress(); } this.WordCount = index; } //if (this.Ip2VecSettings.UseRandomInitialization) //{ // Appending Learning Mode w/o new word if (this.Ip2VecModel.W1 != null && this.WordCount == this.Ip2VecModel.W1.RowCount) { this.BuildDiceTable(); return; } this.Ip2VecModel.W1 = Matrix <double> .Build.Dense(this.WordCount, this.Ip2VecSettings.NeuronCount); this.Ip2VecModel.W2 = Matrix <double> .Build.Dense(this.WordCount, this.Ip2VecSettings.NeuronCount); ParallelOptions parallelOptions = new ParallelOptions() { MaxDegreeOfParallelism = this.Ip2VecSettings.MaximumParallelWorker }; var result = Parallel.For(0, this.Ip2VecSettings.MaximumParallelWorker, parallelOptions, this.RandomInitializationParallelWorker); if (!result.IsCompleted) { throw new InvalidOperationException(); } //if (this.Ip2VecModel.W1 != null && this.Ip2VecModel.W2 != null) //{ // for (int i = 0; i < this.Ip2VecModel.W1.RowCount; i++) // { // this.W1.SetRow(i, this.Ip2VecModel.W1.Row(i)); // this.W2.SetRow(i, this.Ip2VecModel.W2.Row(i)); // } //} //this.Ip2VecModel.W1 = this.W1; //this.Ip2VecModel.W2 = this.W2; this.BuildDiceTable(); //} //else //{ // this.Ip2VecModel.W1 = Matrix<double>.Build.Dense(this.WordCount, this.Ip2VecSettings.NeuronCount, (i, j) => 1d / (i * 10d + 2)); // this.Ip2VecModel.W2 = Matrix<double>.Build.Dense(this.WordCount, this.Ip2VecSettings.NeuronCount, (i, j) => 1d / (i * 10d + 2)); //} }
private void PreTrain() { // Distinct this.Data = this.Data.Distinct().ToList(); this.NeVeModel = new NeVeModel(); StatusWrapper.NewStatus("PreTrain"); // WordCount int index = 0; this.NeVeModel.Word2Index = this.Data .SelectMany(x => Contexts.ContextFuncDict.Values.Select(y => y.Invoke(x))) .Distinct() .ToDictionary(x => x, x => index++); this.WordCount = index; // Build Cooccurrences var cooccurrenceMatrix = new Dictionary <Coordinate <int, int>, double>(); Coordinate <int, int> coordinate; foreach (var sentence in this.Data) { foreach (var relations in this.NeVeSettings.ContextRelationDistanceDict) { foreach (var distancefunc in relations.Value) { //var increment = 1d / distancefunc.Value; //var i = this.Word2Index[relations.Key.Invoke(sentence)]; //var j = this.Word2Index[distancefunc.Key.Invoke(sentence)]; coordinate = Coordinate <int, int> .NewInstance(this.NeVeModel.Word2Index[relations.Key.Invoke(sentence)] , this.NeVeModel.Word2Index[distancefunc.Key.Invoke(sentence)]); if (cooccurrenceMatrix.ContainsKey(coordinate)) { cooccurrenceMatrix[coordinate] += 1d / distancefunc.Value; } else { cooccurrenceMatrix.Add(coordinate, 1d / distancefunc.Value); } //cooccurrenceMatrix[] //cooccurrenceMatrix[this.GloVeModel.Word2Index[relations.Key.Invoke(sentence)] // , this.GloVeModel.Word2Index[distancefunc.Key.Invoke(sentence)]] += 1d / distancefunc.Value; } } } // Build Working Tuples this.Cooccurrences = cooccurrenceMatrix .Select(x => new Tuple <int, int, double>(x.Key.X, x.Key.Y, x.Value)).ToList(); //this.Cooccurrences.Shuffle(); // Build Word Vectors #if DEBUG var r = new Random(Constants.DebugSeed); #else var r = new Random(); #endif for (int i = 0; i < index; i++) { //var row = Vector<double>.Build.Dense(this.GloVeSettings.VectorSize, x => r.NextDouble()); this.NeVeModel.MainVectors.Add(Vector <double> .Build.Dense(this.NeVeSettings.VectorSize, x => (r.NextDouble() - 0.5) / (index + 1))); this.NeVeModel.ContextVectors.Add(Vector <double> .Build.Dense(this.NeVeSettings.VectorSize, x => (r.NextDouble() - 0.5) / (index + 1))); this.MainBias.Add((r.NextDouble() - 0.5) / index + 1); this.ContextBias.Add((r.NextDouble() - 0.5) / index + 1); this.MainGradientSquared.Add(Vector <double> .Build.Dense(this.NeVeSettings.VectorSize, 1)); this.ContextGradientSquared.Add(Vector <double> .Build.Dense(this.NeVeSettings.VectorSize, 1)); this.MainGradientSquaredBias.Add(1); this.ContextGradientSquaredBias.Add(1); } }