Esempio n. 1
0
        private void TrainElementWithNegativeSampling(TrainingElement trainset)
        {
            foreach (var contextWordIndex in trainset.ContextWordIndexList)
            {
                var deltaW1 = Vector <double> .Build.Dense(Ip2VecSettings.NeuronCount, 0);

                for (int i = 0; i < this.Ip2VecSettings.NegativeSampling + 1; i++)
                {
                    int targetNegativeWordIndex;
                    int label;
                    if (i == 0)
                    {
                        targetNegativeWordIndex = contextWordIndex;
                        label = 1;
                    }
                    else
                    {
                        targetNegativeWordIndex = this.DiceTable[random.Next(0, tableSize)];
                        if (targetNegativeWordIndex == contextWordIndex)
                        {
                            continue;
                        }
                        label = 0;
                    }

                    this.NegativeSamplingIteration(contextWordIndex, label, targetNegativeWordIndex, ref deltaW1);
                }
                this.Ip2VecModel.W1.SetRow(contextWordIndex, this.Ip2VecModel.W1.Row(contextWordIndex).Add(deltaW1));
            }
        }
Esempio n. 2
0
        public static TrainingElement NewContainer(int count, int contextCount)
        {
            var container = new TrainingElement {
                Count = count
            }.BuildTargetWordVector(0);

            for (int i = 0; i < contextCount; i++)
            {
                container = container.AddContextWordVector(0);
            }
            return(container);
        }
Esempio n. 3
0
        private void TrainElement(TrainingElement trainset, int subIter)
        {
            var sb     = new StringBuilder("********************new iteration starts************************").AppendLine();
            var h      = this.Ip2VecModel.W1.Transpose().Multiply(trainset.TargetWordVector);
            var u      = this.Ip2VecModel.W2.Multiply(h);
            var y_pred = this.Softmax(u);

            var EI = Vector <double> .Build.Dense(this.WordCount, 0);

            foreach (var item in trainset.ContextWordVectorList)
            {
                EI = EI.Add(y_pred.Subtract(item));
            }
            sb.Append("h   >>>").Append(h.ToString(int.MaxValue, int.MaxValue)).AppendLine();
            sb.Append("u   >>>").Append(u.ToString(int.MaxValue, int.MaxValue)).AppendLine();
            sb.Append("y_p >>>").Append(y_pred.ToString(int.MaxValue, int.MaxValue)).AppendLine();
            sb.Append("EI  >>>").Append(EI.ToString(int.MaxValue, int.MaxValue)).AppendLine();

            var delta_dw2 = h.OuterProduct(EI);
            var delta_dw1 = trainset.TargetWordVector.OuterProduct(this.Ip2VecModel.W2.Transpose().Multiply(EI.ToColumnMatrix()).Column(0));

            this.Ip2VecModel.W1 -= delta_dw1.Multiply(this.Ip2VecSettings.LearningRate);
            this.Ip2VecModel.W2  = (this.Ip2VecModel.W2.Transpose() - delta_dw2.Multiply(this.Ip2VecSettings.LearningRate)).Transpose();


            sb.AppendLine("W1   >>>").Append(Ip2VecModel.W1.ToString(int.MaxValue, int.MaxValue)).AppendLine();
            sb.AppendLine("W2   >>>").Append(Ip2VecModel.W2.ToString(int.MaxValue, int.MaxValue)).AppendLine();


            sb.AppendLine("************************iteration ends***************************");

#if DEBUG
            var data = new KeyValuePair <string, string>(string.Format("{0}-{1}", this.Iter.ToString(), subIter.ToString()), sb.ToString());
            new Thread(Record).Start(data);
#endif
            //this.Record(sb.ToString());
        }
Esempio n. 4
0
        private void Train()
        {
            using (var status = StatusWrapper.NewStatus(@"Train", this.Data.Count() * this.Ip2VecSettings.EpochsCount))
            {
                // Building training container
                var sets = new List <TrainingElement>();
                foreach (var set in this.Ip2VecSettings.ContextRelationDict)
                {
                    var target = TrainingElement.NewContainer(this.WordCount, set.Value.Count);
                    sets.Add(target);
                }

                for (int i = 0; i < this.Ip2VecSettings.EpochsCount; i++)
                {
                    this.Loss = 0d;
                    foreach (var item in this.Data)
                    {
                        //this.StatusController.Preparesw.Start();
                        // Building training elements
                        var c = 0;
                        foreach (var set in this.Ip2VecSettings.ContextRelationDict)
                        {
                            sets[c].RebuildTargetWordVector(W2I(set.Key(item)));
                            sets[c].SetContextWordVector(set.Value.Select(x => W2I(x(item))).ToArray());
                            c++;
                        }

                        //this.StatusController.Preparesw.Stop();
                        //this.StatusController.Calcsw.Start();
                        var index = 0;
                        if (this.Ip2VecSettings.NegativeSampling > 0)
                        {
                            foreach (var set in sets)
                            {
                                this.TrainElementWithNegativeSampling(set);
                            }
                        }
                        else
                        {
                            foreach (var set in sets)
                            {
                                this.TrainElement(set, index++);
                            }
                        }

                        Iter++;
                        status.PushProgress();
                        //this.StatusController.Calcsw.Stop();
                        //this.StatusController.SetCurrentProgress();
                        //if (Iter % 1000 == 0)
                        //{
                        //    this.StatusController.Refresh();
                        //}

                        //this.Percentage.Value = Iter;
                        //if (this.W1.ToRowMajorArray().Contains(double.NaN))
                        //{
                        //    MessageBox.Show("NAN error");
                        //}
                    }
                    Console.WriteLine("loss = " + this.Loss);
                }
#if DEBUG
                var sb = new StringBuilder();
                foreach (var item in this.Ip2VecModel.Index2Word)
                {
                    sb.AppendLine(string.Format("{0}\t{1}", item.Key, item.Value));
                }
                (new Thread(Record)).Start(new KeyValuePair <string, string>("Dictionary", sb.ToString()));
#endif
            }
        }