public ForwardBackwordAlgo(List<string> inputSentence, WeightVector wc, List<string> tagList)
        {
            _inputSentence = inputSentence;
            _wc = wc;
            _tagList = tagList;
            _alphaDictionary = new Dictionary<int, Dictionary<string, double>>();
            _betaDictionary = new Dictionary<int, Dictionary<string, double>>();
            _uDictionary = new Dictionary<int, Dictionary<string, double>>();
            UabDictionary = new Dictionary<int, Dictionary<string, double>>();
            Z = 0;
            _weightedFeaturesum = new WeightedFeatureSum(wc, inputSentence, true);
            cList = new List<double>(_inputSentence.Count);
            dList = new List<double>(_inputSentence.Count);
            _useScaling = true;
            _useLog = false;

            _twoGramsList = new string[4];
            var ngramTags = new Tags(_tagList);
            int index = 0;
            foreach (var ngram in ngramTags.GetNGramTags(2))
            {
                if (index >= _twoGramsList.Length)
                {
                    Array.Resize(ref _twoGramsList, index + 1);
                }
                string[] split = ngram.Split(new[] { ':' });
                _twoGramsList[index] = split[0] + "@#" + split[1];
                index++;
            }
        }
 public ComputeGradient(List<List<string>> inputSentence, List<List<string>> tagsList,
     List<string> tagList, double lambda, double learningParam, FeatureCache cache, WriteModel logger)
 {
     Logger = logger;
     _inputSentence = inputSentence;
     _outputTagsList = tagsList;
     _tagList = tagList;
     _lambda = lambda;
     _learningParam = learningParam;
     _cache = cache;
     forwardBackwordAlgos = new List<ForwardBackwordAlgo>();
     _weightVector = null;
     _twoGramsList = new string[4];
     _twoGramPair = new KeyValuePair<string, string>[4];
     var ngramTags = new Tags(_tagList);
     int index = 0;
     foreach (var ngram in ngramTags.GetNGramTags(2))
     {
         if (index >= _twoGramsList.Length)
         {
             Array.Resize(ref _twoGramsList, index+1);
             Array.Resize(ref _twoGramPair, index + 1);
         }
         string[] split = ngram.Split(new[] { ':' });
         _twoGramsList[index] = split[0] +"@#"+ split[1];
         _twoGramPair[index] = new KeyValuePair<string, string>(split[0], split[1]);
         index++;
     }
 }
예제 #3
0
 public void CreateCache()
 {
     Console.WriteLine(DateTime.Now + "creating Cache begin");
     var ngramTags = new Tags(_tags);
     foreach (var ngramTag in ngramTags.GetNGramTags(2))
     {
         string[] split = ngramTag.Split(new[] { ':' });
         for (var lineIndex = 0; lineIndex < Sentences.Count; lineIndex++)
         {
             for (var pos = 0; pos < Sentences[lineIndex].Count; pos++)
             {
                 if (pos == 0)
                 {
                     StoreFeature("*", split[1], pos, lineIndex);
                 }
                 else
                 {
                     StoreFeature(split[0], split[1], pos, lineIndex);
                 }
             }
         }
     }
     Console.WriteLine(DateTime.Now + "creating Cache end");
 }
예제 #4
0
        private double GetSecondTerm(Tags ngramTags, 
            int lineIndex, int pos, int k)
        {
            double sum = 0;
            foreach (var ngramTag in ngramTags.GetNGramTags(2))
            {
                string[] split = ngramTag.Split(new[] { ':' });

                if (_cache.Contains(split[0], split[1], k, pos, lineIndex))
                {
                    sum += (forwardBackwordAlgos[lineIndex].GetQ(pos, split[0], split[1]) *
                    _weightVector.Get(k));
                }
                //else
                //{
                //    sum += (forwardBackwordAlgos[lineIndex].GetQ(j, split[0], split[1]) *
                //    weightedFeatureSum.GetFeatureK(split[0], split[1], j, k, sentence));
                //}

            }
            return sum;
        }