public ViterbiForGlobalLinearModel(WeightVector weightVector, Tags tags)
 {
     WeightVector = weightVector;
     Tags = tags;
     Pi = new List<Dictionary<string, double>>();
     Bp = new List<Dictionary<string, string>>();
 }
        public ForwardBackwordAlgo(List<string> inputSentence, WeightVector wc, List<string> tagList)
        {
            _inputSentence = inputSentence;
            _wc = wc;
            _tagList = tagList;
            _alphaDictionary = new Dictionary<int, Dictionary<string, double>>();
            _betaDictionary = new Dictionary<int, Dictionary<string, double>>();
            _uDictionary = new Dictionary<int, Dictionary<string, double>>();
            UabDictionary = new Dictionary<int, Dictionary<string, double>>();
            Z = 0;
            _weightedFeaturesum = new WeightedFeatureSum(wc, inputSentence, true);
            cList = new List<double>(_inputSentence.Count);
            dList = new List<double>(_inputSentence.Count);
            _useScaling = true;
            _useLog = false;

            _twoGramsList = new string[4];
            var ngramTags = new Tags(_tagList);
            int index = 0;
            foreach (var ngram in ngramTags.GetNGramTags(2))
            {
                if (index >= _twoGramsList.Length)
                {
                    Array.Resize(ref _twoGramsList, index + 1);
                }
                string[] split = ngram.Split(new[] { ':' });
                _twoGramsList[index] = split[0] + "@#" + split[1];
                index++;
            }
        }
 public ComputeGradient(List<List<string>> inputSentence, List<List<string>> tagsList,
     List<string> tagList, double lambda, double learningParam, FeatureCache cache, WriteModel logger)
 {
     Logger = logger;
     _inputSentence = inputSentence;
     _outputTagsList = tagsList;
     _tagList = tagList;
     _lambda = lambda;
     _learningParam = learningParam;
     _cache = cache;
     forwardBackwordAlgos = new List<ForwardBackwordAlgo>();
     _weightVector = null;
     _twoGramsList = new string[4];
     _twoGramPair = new KeyValuePair<string, string>[4];
     var ngramTags = new Tags(_tagList);
     int index = 0;
     foreach (var ngram in ngramTags.GetNGramTags(2))
     {
         if (index >= _twoGramsList.Length)
         {
             Array.Resize(ref _twoGramsList, index+1);
             Array.Resize(ref _twoGramPair, index + 1);
         }
         string[] split = ngram.Split(new[] { ':' });
         _twoGramsList[index] = split[0] +"@#"+ split[1];
         _twoGramPair[index] = new KeyValuePair<string, string>(split[0], split[1]);
         index++;
     }
 }
Ejemplo n.º 4
0
        public void Setup(bool debug)
        {
            var readModel = new ReadModel(InputModelFile);
            var temp = new ReadModel(string.Concat(InputModelFile, ".featuresToK"));
            _weightVector = new WeightVector(temp.GetFeatureToKdDictionary());

            foreach (var pair in readModel.ModelIterator())
            {
                _weightVector.Add(pair);
            }

            _tags = new Tags(_tagList);

            _viterbiForGlobalLinearModel = new ViterbiForGlobalLinearModel(_weightVector, _tags);

            // read input file in a class and per line iterator.
            var inputData = new ReadInputData(InputTestFile);
            var writeModel = new WriteModel(_outputTestFile);
            foreach (var line in inputData.GetSentence())
            {
                List<string> debugList;
                var outputTags = _viterbiForGlobalLinearModel.Decode(line, debug, out debugList);
                if (debug)
                {
                    writeModel.WriteDataWithTagDebug(line, outputTags, debugList);
                }
                else
                {
                    writeModel.WriteDataWithTag(line, outputTags);
                }

            }
            writeModel.Flush();
        }
 public ThreadInfoObject(ComputeGradient cg, int start, int end, WeightVector wc, ManualResetEvent resetEvent)
 {
     Gradient = cg;
     Start = start;
     End = end;
     NewWeightVector = wc;
     ResetEvent = resetEvent;
 }
Ejemplo n.º 6
0
 public ComputeGradient(List<List<string>> inputSentence, List<List<string>> tagsList,
     List<string> tagList, double lambda, FeatureCache cache)
 {
     _inputSentence = inputSentence;
     _outputTagsList = tagsList;
     _tagList = tagList;
     _lambda = lambda;
     _cache = cache;
     forwardBackwordAlgos = new List<ForwardBackwordAlgo>();
     _weightVector = null;
 }
Ejemplo n.º 7
0
        public void ReMappingFromWeightVector(WeightVector weightVector, bool normalize = true)
        {
            var newDictKtoF = new Dictionary<int, string>();
            var newDictFtoK = new Dictionary<string, int>();
            //var weightDict = new Dictionary<int, double>();
            //int k = 0;

            //var elements = from element in weightVector.WeightArray
            //               orderby element descending
            //               where element > 1
            //               select element;

            //var sortedDictionary = from pair in weightVector.WDictionary
            //                       where Math.Abs(pair.Value) > 1
            //                        orderby Math.Abs(pair.Value) descending
            //                        select pair;
            const double limit = 0;
            var newWeights = new double[weightVector.FeatureCount];
            int featureCount = 0;
            Array.Clear(newWeights, 0, newWeights.Length);
            double max = 0;

            for (int i = 0; i < weightVector.FeatureCount; i++)
            {
                if (max < Math.Abs(weightVector.WeightArray[i]))
                {
                    max = Math.Abs(weightVector.WeightArray[i]);
                }
                if (Math.Abs(weightVector.WeightArray[i]) > limit)
                {
                    newWeights[featureCount] = weightVector.WeightArray[i];
                    var feature = DictKToFeatures[i];
                    newDictFtoK[feature] = featureCount;
                    newDictKtoF[featureCount] = feature;
                    featureCount++;
                }
            }

            //if (normalize)
            //{
            //    for (int i = 0; i < featureCount; i++)
            //    {
            //        newWeights[i] /= max;
            //    }
            //}
            weightVector.WeightArray = newWeights;
            weightVector.FeatureCount = featureCount;
            DictFeaturesToK = weightVector.FeatureKDictionary = newDictFtoK;
            DictKToFeatures = newDictKtoF;
            if (normalize)
            {
                weightVector.AvgNormalize();
            }
        }
Ejemplo n.º 8
0
 public Perceptron(string inputFile, string outputFile, List<string> tagList)
 {
     _inputFile = inputFile;
     _outputFile = outputFile;
     var tags = new Tags(tagList);
     MapFeatures = new MapFeaturesToK(inputFile, string.Concat(outputFile, ".featuresToK"), tagList);
     MapFeatures.StartMapping();
     WeightVector = new WeightVector(MapFeatures.DictFeaturesToK, MapFeatures.FeatureCount);
     _viterbiForGlobalLinearModel = new ViterbiForGlobalLinearModel(WeightVector, tags);
     InputSentences = new List<List<string>>();
     TagsList = new List<List<string>>();
     ReadInputs();
 }
Ejemplo n.º 9
0
 public ForwardBackwordAlgo(List<string> inputSentence, WeightVector wc, List<string> tagList)
 {
     _inputSentence = inputSentence;
     _wc = wc;
     _tagList = tagList;
     _tags = new Tags(tagList);
     _alphaDictionary = new Dictionary<int, Dictionary<string, double>>();
     _betaDictionary = new Dictionary<int, Dictionary<string, double>>();
     _uDictionary = new Dictionary<int, Dictionary<string, double>>();
     UabDictionary = new Dictionary<int, Dictionary<string, double>>();
     Z = 0;
     _weightedFeaturesum = new WeightedFeatureSum(wc, inputSentence, true);
 }
Ejemplo n.º 10
0
        public Perceptron(List<string> inputFiles, string outputFile, List<string> tagList, bool useAvg = false)
        {
            _outputFile = outputFile;
            _useAvg = useAvg;
            var tags = new Tags(tagList);
            MapFeatures = new MapFeaturesToK(string.Concat(outputFile, ".featuresToK"), tagList);
            MapFeatures.StartMapping(inputFiles);

            WeightVector = new WeightVector(MapFeatures.DictFeaturesToK, MapFeatures.FeatureCount);
            AvgWeightVector = new WeightVector(MapFeatures.DictFeaturesToK, MapFeatures.FeatureCount);
            _viterbiForGlobalLinearModel = new ViterbiForGlobalLinearModel(WeightVector, tags);
            InputSentences = new List<List<string>>();
            TagsList = new List<List<string>>();
            //ReadInputs();
        }
Ejemplo n.º 11
0
        public void Init()
        {
            var readModel = new ReadModel(InputModelFile + ".preceptron");
            var temp = new ReadModel(string.Concat(InputModelFile, ".featuresToK"));
            var dict = temp.GetFeatureToKdDictionary();
            _weightVector = new WeightVector(dict, dict.Count);

            foreach (var pair in readModel.ModelIterator())
            {
                _weightVector.Add(pair);
            }

            _tags = new Tags(_tagList);

            ViterbiForGLM = new ViterbiForGlobalLinearModel(_weightVector, _tags);
        }
 public void ComputeRange(int start, int end, WeightVector newWeightVector, int threadIndex = 0)
 {
     for (var k = start; k < end; k++)
     {
         if (k % 100 == 0)
         {
             Console.WriteLine(DateTime.Now + "threadIndex: " + threadIndex +
                 " running iteration for k " + k);
         }
         var wk = Compute(k);
         if (double.IsNaN(wk) || double.IsInfinity(wk))
         {
             Logger.WriteLine("k: "+ k + "wk is infiity of nana"+ wk);
             Logger.Flush(false);
         }
         newWeightVector.SetKey(k, wk);
     }
 }
Ejemplo n.º 13
0
 public WeightVector RunIterations(WeightVector weightVector, int iterationCount)
 {
     for (int iter = 0; iter < iterationCount; iter++)
     {
         Console.WriteLine(DateTime.Now + " running iteration " + iter);
         var newWeightVector = new WeightVector(weightVector.FeatureKDictionary);
         SetForwardBackwordAlgo(weightVector);
         //for (var k = 0; k < weightVector.FeatureKDictionary.Count; k++)
         for (var k = weightVector.FeatureKDictionary.Count-1; k >= 0; k--)
         {
             if (k%100 == 0)
             {
                 Console.WriteLine(DateTime.Now + " running iteration for k " + k);
             }
             var wk = Compute(k, weightVector);
             wk = weightVector.Get(k) + _lambda*wk;
             newWeightVector.SetKey(k, wk);
         }
         weightVector = newWeightVector;
     }
     _weightVector = weightVector;
     return weightVector;
 }
Ejemplo n.º 14
0
        public void ReMappingFromWeightVector(WeightVector weightVector)
        {
            var newDictKtoF = new Dictionary<int, string>();
            var newDictFtoK = new Dictionary<string, int>();
            var weightDict = new Dictionary<int, double>();
            int k = 0;

            var sortedDictionary = from pair in weightVector.WDictionary
                                   where Math.Abs(pair.Value) > 2
                                    orderby Math.Abs(pair.Value) descending
                                    select pair;

            foreach (var weight in sortedDictionary)
            {
                var feature = DictKToFeatures[weight.Key];
                newDictFtoK[feature] = k;
                newDictKtoF[k] = feature;
                weightDict[k] = weight.Value;
                k++;
            }
            weightVector.WDictionary = weightDict;
            DictFeaturesToK = weightVector.FeatureKDictionary = newDictFtoK;
            DictKToFeatures = newDictKtoF;
        }
Ejemplo n.º 15
0
 public void ReMapFeatureToK(bool normalize = true)
 {
     MapFeatures.ReMappingFromWeightVector(_useAvg ? AvgWeightVector : WeightVector, normalize);
     if (_useAvg)
     {
         WeightVector = AvgWeightVector;
     }
 }
Ejemplo n.º 16
0
        public double ComputeFunctionValue(WeightVector weightVector)
        {
            double outputDouble = 0;

            for (var lineIndex = 0; lineIndex < _inputSentence.Count; lineIndex++)
            {
                var outputTags = _outputTagsList[lineIndex];

                double initOutputDouble = 0;
                for (var k = 0; k < weightVector.FeatureCount; k++)
                {
                    initOutputDouble += GetAllFeatureKFromCacheWithWeights(outputTags, k,
                        lineIndex, weightVector);
                }
                initOutputDouble -= Math.Log(forwardBackwordAlgos[lineIndex].Z);

                outputDouble += initOutputDouble;
            }
            Console.WriteLine(DateTime.Now+": new function value is: "+outputDouble);
            return -outputDouble;
        }
Ejemplo n.º 17
0
        public void ComputeGradientMultiThread(WeightVector weightVector, double[] gradient,
            int threadCount)
        {
            if (threadCount > 1)
            {
                //var newWeightVector = weightVector.DeepCopy();

                var doneEvents = new ManualResetEvent[threadCount];
                var partition = weightVector.FeatureCount / threadCount;

                for (int threadIndex = 0; threadIndex < threadCount; threadIndex++)
                {
                    var start = threadIndex*partition;
                    var end = start + partition;
                    end = end > weightVector.FeatureCount ? weightVector.FeatureCount : end;
                    doneEvents[threadIndex] = new ManualResetEvent(false);

                    var info = new ThreadInfoObject(this, start, end, weightVector,
                        doneEvents[threadIndex], gradient);
                    ThreadPool.QueueUserWorkItem(info.StartLBFGGradientComputing, threadIndex);
                }

                WaitHandle.WaitAll(doneEvents);
            }
            ComputeGradientValues(weightVector, gradient, 0, weightVector.FeatureCount);
        }
Ejemplo n.º 18
0
        public void ComputeGradientValues(WeightVector weightVector, double[] gradient,
            int startIndex, int endIndex )
        {
            for (var k = startIndex; k < endIndex; k++)
            {
                double outputDouble = 0;

                var kstring = "@#" + k.ToString(CultureInfo.InvariantCulture);
                for (var lineIndex = 0; lineIndex < _inputSentence.Count; lineIndex++)
                {
                    var outputTags = _outputTagsList[lineIndex];

                    double initOutputDouble = 0;
                    initOutputDouble += GetAllFeatureKFromCache(outputTags, k, lineIndex);
                    initOutputDouble -= CalculateGradient(outputTags, k,
                        lineIndex, kstring);

                    outputDouble += initOutputDouble;
                }

                gradient[k] = outputDouble - (_lambda * weightVector.Get(k));
            }
        }
Ejemplo n.º 19
0
 public WeightedFeatureSum(WeightVector weightVector, List<string> sentence, bool crf = false)
 {
     _sentence = sentence;
     _crf = crf;
     WeightVector = weightVector;
 }
Ejemplo n.º 20
0
 private void SetForwardBackwordAlgo(WeightVector weightVector)
 {
     if (_inputSentence.Count != _outputTagsList.Count)
     {
         throw new Exception("counts dont match " + _inputSentence.Count + "with " + _outputTagsList.Count);
     }
     int counter = 0;
     forwardBackwordAlgos.Clear();
     foreach (var sentence in _inputSentence)
     {
         var outputTags = _outputTagsList[counter];
         if (sentence.Count != outputTags.Count)
         {
             throw new Exception("counts dont match " + sentence.Count + "with " + outputTags.Count);
         }
         forwardBackwordAlgos.Add(new ForwardBackwordAlgo(sentence, weightVector, outputTags));
         counter++;
     }
 }
Ejemplo n.º 21
0
 public double GetAllFeatureKFromCacheWithWeights(List<string> tags, int k,
     int lineIndex, WeightVector weightVector)
 {
     double sum = 0;
     for (var pos = 0; pos < tags.Count; pos++)
     {
         var prevTag = "*";
         if (pos > 0)
         {
             prevTag = tags[pos - 1];
         }
         if (_cache.Contains(prevTag, tags[pos], k, pos, lineIndex))
         {
             //var val = Math.Exp(_weightVector.Get(k));
             sum += (_weightVector.Get(k));
         }
     }
     return sum;
 }
Ejemplo n.º 22
0
        private double Compute(int k, WeightVector weightVector)
        {
            double output = 0;
            //double secondTerm = 0;
            int lineIndex = 0;
            //var weightedFeaturesum = new WeightedFeatureSum(weightVector, null, true);

            if (_inputSentence.Count != _outputTagsList.Count)
            {
                throw new Exception("counts dont match "+ _inputSentence.Count + "with "+ _outputTagsList.Count);
            }
            var ngramTags = new Tags(_tagList);

            // first term.
            foreach (var sentence in _inputSentence)
            {
                var outputTags = _outputTagsList[lineIndex];

                if (sentence.Count != outputTags.Count)
                {
                    throw new Exception("compute counts dont match " + sentence.Count + "with " + outputTags.Count);
                }

                output += CalculateGradient(outputTags, k,
                    ngramTags, lineIndex);

                //output += weightedFeaturesum.GetAllFeatureK(outputTags, k, sentence);

                //// second term.
                //for (var j = 0; j < outputTags.Count; j++)
                //{
                //    double sum = 0;
                //    foreach (var ngramTag in ngramTags.GetNGramTags(2))
                //    {
                //        string[] split = ngramTag.Split(new[] {':'});
                //        sum += (forwardBackwordAlgos[i].GetQ(j, split[0], split[1]) *
                //            weightedFeaturesum.GetFeatureK(split[0], split[1], j, k, sentence));
                //    }
                //    secondTerm += sum;
                //}
                lineIndex++;
            }

            output = output - (_lambda*weightVector.Get(k));
            return output;
        }
Ejemplo n.º 23
0
        public WeightVector RunIterations(WeightVector weightVector, int iterationCount, int threadCount = 1)
        {
            _weightVector = weightVector;

            for (var iter = 0; iter < iterationCount; iter++)
            {
                Console.WriteLine(DateTime.Now + " running iteration " + iter);

                var newWeightVector = _weightVector.DeepCopy();
                SetForwardBackwordAlgo(newWeightVector);
                if (threadCount > 1)
                {
                    var doneEvents = new ManualResetEvent[threadCount];
                    var partition = newWeightVector.FeatureCount / threadCount;

                    for (int threadIndex = 0; threadIndex < threadCount; threadIndex++)
                    {
                        var start = threadIndex*partition;
                        var end = start + partition;
                        end = end > newWeightVector.FeatureCount ? newWeightVector.FeatureCount : end;
                        doneEvents[threadIndex] = new ManualResetEvent(false);

                        var info = new ThreadInfoObject(this, start, end, newWeightVector,
                            doneEvents[threadIndex], null);
                        ThreadPool.QueueUserWorkItem(info.StartGradientComputing, threadIndex);
                    }

                    WaitHandle.WaitAll(doneEvents);
                }
                else
                {
                    ComputeRange(0, _weightVector.FeatureCount, newWeightVector);
                }
                _weightVector = newWeightVector;
                if (iter + 1 < iterationCount)
                {
                    _weightVector.AvgNormalize();
                }
            }
            _weightVector.AvgNormalize();
            return _weightVector;
        }
Ejemplo n.º 24
0
 public void AddWeightVector(WeightVector weightVector)
 {
     for (int i = 0; i < weightVector.WeightArray.Length; i++)
     {
         WeightArray[i] += weightVector.WeightArray[i];
     }
 }
Ejemplo n.º 25
0
        public void RunLBFGAlgo(WeightVector weightVector)
        {
            double epsg = 0.0000000001;
            double epsf = 0;
            double epsx = 0;
            int maxits = 50;
            alglib.minlbfgsstate state;
            alglib.minlbfgsreport rep;

            _weightVector = weightVector;
            this.FeatureKDictionary = weightVector.FeatureKDictionary;

            alglib.minlbfgscreate(5, weightVector.WeightArray, out state);
            alglib.minlbfgssetcond(state, epsg, epsf, epsx, maxits);
            alglib.minlbfgsoptimize(state, GetFunctionValueAndGradient, null, this);
            double[] output;
            alglib.minlbfgsresults(state, out output, out rep);
            weightVector.WeightArray = output;

            System.Console.WriteLine(DateTime.Now+": terminationtype {0}", rep.terminationtype); // EXPECTED: 4
            //System.Console.WriteLine("{0}", alglib.ap.format(x, 2)); // EXPECTED: [-3,3]
            //System.Console.ReadLine();
        }
Ejemplo n.º 26
0
 private void SetForwardBackwordAlgo(WeightVector weightVector)
 {
     if (_inputSentence.Count != _outputTagsList.Count)
     {
         throw new Exception("counts dont match " + _inputSentence.Count + "with " + _outputTagsList.Count);
     }
     int counter = 0;
     forwardBackwordAlgos.Clear();
     foreach (var sentence in _inputSentence)
     {
         if (counter % 100 == 0)
             Console.WriteLine(DateTime.Now + "running fw/backword iteration: "+counter);
         var algo = new ForwardBackwordAlgo(sentence, weightVector, _tagList);
         algo.Run();
         forwardBackwordAlgos.Add(algo);
         counter++;
     }
 }