Example #1
0
        public void Cluster()
        {
            using (var status = StatusWrapper.NewStatus("DBScan", this.Data.Count))
            {
                //Console.WriteLine(this.RangeDiameter);
                this.RangeList = TimeCalculator
                                 .GetIndexRangeByTimeInterval(this.Data.Select(x => x.TimeTick).ToList(), RangeRadius);


                for (int i = 0; i < this.Data.Count; i++)
                {
                    if (this.Data[i].ClusterId > 0)
                    {
                        continue;
                    }

                    CurrentCluster = this.InnerClustering(i);

                    int t = 0;
                    while (t < this.CurrentCluster.Count)
                    {
                        var tl = this.InnerClustering(this.CurrentCluster[t++]);
                        if (tl.Any())
                        {
                            this.CurrentCluster.AddRange(tl);
                        }
                    }
                    status.PushProgress(this.CurrentCluster.Count);
                }
            }
            this.Validation();
        }
Example #2
0
        private void BuildNeighbors()
        {
            using (var status = StatusWrapper.NewStatus("Building Neighbors", this.Data.Count))
            {
                var heap = new EasyHeapSmallFixedSize <NearestNeighbor>(HeapType.MinHeap, withTop, x => x.Distance);

                var rangeList = TimeCalculator.GetIndexRangeByTimeInterval(this.Data.Select(x => x.TimeTick).ToList(), Convert.ToInt64(kDisDiameter.TotalSeconds));

                for (int i = 0; i < this.Data.Count; i++)
                {
                    this.Data.GetRange(rangeList[i].Item1, rangeList[i].Item2 - rangeList[i].Item1)
                    .ForEach(x =>
                    {
                        var g = VectorPointHelper.GetDistance(this.Data[i], x, MathDistance);
                        if (g > 0)
                        {
                            heap.Push(new NearestNeighbor {
                                Distance = g, NeighborIndex = x.Id
                            });
                        }
                    });

                    this.Neighbors.Add(heap.GetList().ToList());
                    status.PushProgress();
                }
            }
        }
Example #3
0
        private void TSNETransformForFile(string filename)
        {
            this.EmbeddingModel.ReadModel(this.EmbeddingModel.GetModelName(filename));

            StatusWrapper.NewStatus("Preparing");
            var visualizer = new TSNEVisualization
            {
                Data  = this.DataController.UseData <KeyFeatureClusteredCategorized>(filename).ToList(),
                Model = this.EmbeddingModel
            };

            //Console.WriteLine(measure.ExternalValidation.Print());
            visualizer.toSNE(@"E:\EventExtractionExam\Debug\tSNEcate.csv");
        }
Example #4
0
        private void CountErrors()
        {
            using (var status = StatusWrapper.NewStatus("Counting", ComparisonList.Count * ((long)ComparisonList.Count - 1) / 2))
            {
                for (int i = 0; i < ComparisonList.Count - 1; i++)
                {
                    for (int j = i + 1; j < ComparisonList.Count; j++)
                    {
                        status.PushProgress();

                        //if (i == j)
                        //{
                        //    continue;
                        //}

                        if (ComparisonList[i].Item1 == ComparisonList[j].Item1)
                        {
                            if (ComparisonList[i].Item2 == ComparisonList[j].Item2)
                            {
                                ExternalValidation.TruePositive++;
                            }
                            else
                            {
                                ExternalValidation.FalseNegative++;
                            }
                        }
                        else
                        {
                            if (ComparisonList[i].Item2 == ComparisonList[j].Item2)
                            {
                                ExternalValidation.FalsePositive++;
                            }
                            else
                            {
                                ExternalValidation.TrueNegative++;
                            }
                        }
                    }
                }
            }

            ExternalValidation.Purity = (double)ComparisonList.GroupBy(x => x.Item2)
                                        .Sum(x => x.GroupBy(y => y.Item1).Max(y => y.Count())) / ComparisonList.Count;

            ExternalValidation.NMI = CalculateNMI();
        }
Example #5
0
        private ClusterExternalValidation ClusteringMeasurementForFile(string filename)
        {
            StatusWrapper.NewStatus("Preparing");
            var labelList  = DataController.UseData <KeyFeatureOfRandomEvent>(filename).ToList();
            var resultList = DataController.UseData <KeyFeatureClustered>(EmbeddingModel.GetClusteredFilename(filename)).ToList();

            var measure = new ClusteringMeasurement
            {
                LabelList  = labelList,
                ResultList = resultList,
            };

            measure.Measure();

            //Console.WriteLine(measure.ExternalValidation.Print());

            return(measure.ExternalValidation);
        }
Example #6
0
        //using AdaGradient
        private void Train()
        {
            // Status
            using (var status = StatusWrapper.NewStatus(@"Train", this.Cooccurrences.Count * this.NeVeSettings.EpochsCount))
            {
                for (int epoch = 0; epoch < this.NeVeSettings.EpochsCount; epoch++)
                {
                    this.Loss = 0d;

                    ParallelOptions parallelOptions = new ParallelOptions()
                    {
                        MaxDegreeOfParallelism = this.NeVeSettings.MaximumParallelWorker
                    };
                    var result = Parallel.For(0, this.NeVeSettings.MaximumParallelWorker, parallelOptions, id => this.AdaGradient(id, status));

                    if (!result.IsCompleted)
                    {
                        throw new InvalidOperationException();
                    }

                    Console.WriteLine("Loss = {0}", this.Loss);
                }
            }
        }
Example #7
0
        private void Train()
        {
            using (var status = StatusWrapper.NewStatus(@"Train", this.Data.Count() * this.Ip2VecSettings.EpochsCount))
            {
                // Building training container
                var sets = new List <TrainingElement>();
                foreach (var set in this.Ip2VecSettings.ContextRelationDict)
                {
                    var target = TrainingElement.NewContainer(this.WordCount, set.Value.Count);
                    sets.Add(target);
                }

                for (int i = 0; i < this.Ip2VecSettings.EpochsCount; i++)
                {
                    this.Loss = 0d;
                    foreach (var item in this.Data)
                    {
                        //this.StatusController.Preparesw.Start();
                        // Building training elements
                        var c = 0;
                        foreach (var set in this.Ip2VecSettings.ContextRelationDict)
                        {
                            sets[c].RebuildTargetWordVector(W2I(set.Key(item)));
                            sets[c].SetContextWordVector(set.Value.Select(x => W2I(x(item))).ToArray());
                            c++;
                        }

                        //this.StatusController.Preparesw.Stop();
                        //this.StatusController.Calcsw.Start();
                        var index = 0;
                        if (this.Ip2VecSettings.NegativeSampling > 0)
                        {
                            foreach (var set in sets)
                            {
                                this.TrainElementWithNegativeSampling(set);
                            }
                        }
                        else
                        {
                            foreach (var set in sets)
                            {
                                this.TrainElement(set, index++);
                            }
                        }

                        Iter++;
                        status.PushProgress();
                        //this.StatusController.Calcsw.Stop();
                        //this.StatusController.SetCurrentProgress();
                        //if (Iter % 1000 == 0)
                        //{
                        //    this.StatusController.Refresh();
                        //}

                        //this.Percentage.Value = Iter;
                        //if (this.W1.ToRowMajorArray().Contains(double.NaN))
                        //{
                        //    MessageBox.Show("NAN error");
                        //}
                    }
                    Console.WriteLine("loss = " + this.Loss);
                }
#if DEBUG
                var sb = new StringBuilder();
                foreach (var item in this.Ip2VecModel.Index2Word)
                {
                    sb.AppendLine(string.Format("{0}\t{1}", item.Key, item.Value));
                }
                (new Thread(Record)).Start(new KeyValuePair <string, string>("Dictionary", sb.ToString()));
#endif
            }
        }
Example #8
0
        private void PreTrain()
        {
            //Distinct
            this.Data = this.Data.Distinct().ToList();

            using (var status = StatusWrapper.NewStatus(@"PreTrain", this.Data.Count))
            {
                // Model Initialization if needed
                if (this.Ip2VecModel == null)
                {
                    this.Ip2VecModel = new Ip2VecModel();
                }

                // Negative Sampling Count
                //var freq = new Dictionary<string, int>();


                // Generating trainning data
                // Generating dictionraies
                int index = this.Ip2VecModel.Word2Index.Count;
                foreach (var item in this.Data)
                {
                    var allfunc = this.Ip2VecSettings.ContextRelationDict.SelectMany(x => x.Value).ToList();
                    allfunc.AddRange(this.Ip2VecSettings.ContextRelationDict.Keys.ToList());

                    foreach (var func in allfunc.Distinct())
                    {
                        var value = func.Invoke(item);

                        if (!this.Ip2VecModel.Word2Index.ContainsKey(value))
                        {
                            this.Ip2VecModel.Word2Index.Add(value, index);
                            this.Ip2VecModel.Index2Word.Add(index, value);
                            index++;

                            this.Ip2VecModel.Frequency.Add(1);
                        }
                        else
                        {
                            this.Ip2VecModel.Frequency[this.Ip2VecModel.Word2Index[value]]++;
                        }
                    }
                    status.PushProgress();
                }

                this.WordCount = index;
            }


            //if (this.Ip2VecSettings.UseRandomInitialization)
            //{
            // Appending Learning Mode w/o new word
            if (this.Ip2VecModel.W1 != null && this.WordCount == this.Ip2VecModel.W1.RowCount)
            {
                this.BuildDiceTable();
                return;
            }

            this.Ip2VecModel.W1 = Matrix <double> .Build.Dense(this.WordCount, this.Ip2VecSettings.NeuronCount);

            this.Ip2VecModel.W2 = Matrix <double> .Build.Dense(this.WordCount, this.Ip2VecSettings.NeuronCount);

            ParallelOptions parallelOptions = new ParallelOptions()
            {
                MaxDegreeOfParallelism = this.Ip2VecSettings.MaximumParallelWorker
            };
            var result = Parallel.For(0, this.Ip2VecSettings.MaximumParallelWorker, parallelOptions, this.RandomInitializationParallelWorker);

            if (!result.IsCompleted)
            {
                throw new InvalidOperationException();
            }

            //if (this.Ip2VecModel.W1 != null && this.Ip2VecModel.W2 != null)
            //{
            //    for (int i = 0; i < this.Ip2VecModel.W1.RowCount; i++)
            //    {
            //        this.W1.SetRow(i, this.Ip2VecModel.W1.Row(i));
            //        this.W2.SetRow(i, this.Ip2VecModel.W2.Row(i));
            //    }
            //}

            //this.Ip2VecModel.W1 = this.W1;
            //this.Ip2VecModel.W2 = this.W2;

            this.BuildDiceTable();
            //}
            //else
            //{
            //    this.Ip2VecModel.W1 = Matrix<double>.Build.Dense(this.WordCount, this.Ip2VecSettings.NeuronCount, (i, j) => 1d / (i * 10d + 2));
            //    this.Ip2VecModel.W2 = Matrix<double>.Build.Dense(this.WordCount, this.Ip2VecSettings.NeuronCount, (i, j) => 1d / (i * 10d + 2));
            //}
        }
Example #9
0
        private void PreTrain()
        {
            // Distinct
            this.Data = this.Data.Distinct().ToList();

            this.NeVeModel = new NeVeModel();

            StatusWrapper.NewStatus("PreTrain");

            // WordCount
            int index = 0;

            this.NeVeModel.Word2Index = this.Data
                                        .SelectMany(x => Contexts.ContextFuncDict.Values.Select(y => y.Invoke(x)))
                                        .Distinct()
                                        .ToDictionary(x => x, x => index++);

            this.WordCount = index;

            // Build Cooccurrences
            var cooccurrenceMatrix = new Dictionary <Coordinate <int, int>, double>();
            Coordinate <int, int> coordinate;

            foreach (var sentence in this.Data)
            {
                foreach (var relations in this.NeVeSettings.ContextRelationDistanceDict)
                {
                    foreach (var distancefunc in relations.Value)
                    {
                        //var increment = 1d / distancefunc.Value;
                        //var i = this.Word2Index[relations.Key.Invoke(sentence)];
                        //var j = this.Word2Index[distancefunc.Key.Invoke(sentence)];
                        coordinate = Coordinate <int, int> .NewInstance(this.NeVeModel.Word2Index[relations.Key.Invoke(sentence)]
                                                                        , this.NeVeModel.Word2Index[distancefunc.Key.Invoke(sentence)]);

                        if (cooccurrenceMatrix.ContainsKey(coordinate))
                        {
                            cooccurrenceMatrix[coordinate] += 1d / distancefunc.Value;
                        }
                        else
                        {
                            cooccurrenceMatrix.Add(coordinate, 1d / distancefunc.Value);
                        }
                        //cooccurrenceMatrix[]
                        //cooccurrenceMatrix[this.GloVeModel.Word2Index[relations.Key.Invoke(sentence)]
                        //    , this.GloVeModel.Word2Index[distancefunc.Key.Invoke(sentence)]] += 1d / distancefunc.Value;
                    }
                }
            }

            // Build Working Tuples
            this.Cooccurrences = cooccurrenceMatrix
                                 .Select(x => new Tuple <int, int, double>(x.Key.X, x.Key.Y, x.Value)).ToList();
            //this.Cooccurrences.Shuffle();

            // Build Word Vectors
#if DEBUG
            var r = new Random(Constants.DebugSeed);
#else
            var r = new Random();
#endif
            for (int i = 0; i < index; i++)
            {
                //var row = Vector<double>.Build.Dense(this.GloVeSettings.VectorSize, x => r.NextDouble());
                this.NeVeModel.MainVectors.Add(Vector <double> .Build.Dense(this.NeVeSettings.VectorSize, x => (r.NextDouble() - 0.5) / (index + 1)));
                this.NeVeModel.ContextVectors.Add(Vector <double> .Build.Dense(this.NeVeSettings.VectorSize, x => (r.NextDouble() - 0.5) / (index + 1)));
                this.MainBias.Add((r.NextDouble() - 0.5) / index + 1);
                this.ContextBias.Add((r.NextDouble() - 0.5) / index + 1);
                this.MainGradientSquared.Add(Vector <double> .Build.Dense(this.NeVeSettings.VectorSize, 1));
                this.ContextGradientSquared.Add(Vector <double> .Build.Dense(this.NeVeSettings.VectorSize, 1));
                this.MainGradientSquaredBias.Add(1);
                this.ContextGradientSquaredBias.Add(1);
            }
        }