public ForwardBackwordAlgo(List<string> inputSentence, WeightVector wc, List<string> tagList)
        {
            _inputSentence = inputSentence;
            _wc = wc;
            _tagList = tagList;
            _alphaDictionary = new Dictionary<int, Dictionary<string, double>>();
            _betaDictionary = new Dictionary<int, Dictionary<string, double>>();
            _uDictionary = new Dictionary<int, Dictionary<string, double>>();
            UabDictionary = new Dictionary<int, Dictionary<string, double>>();
            Z = 0;
            _weightedFeaturesum = new WeightedFeatureSum(wc, inputSentence, true);
            cList = new List<double>(_inputSentence.Count);
            dList = new List<double>(_inputSentence.Count);
            _useScaling = true;
            _useLog = false;

            _twoGramsList = new string[4];
            var ngramTags = new Tags(_tagList);
            int index = 0;
            foreach (var ngram in ngramTags.GetNGramTags(2))
            {
                if (index >= _twoGramsList.Length)
                {
                    Array.Resize(ref _twoGramsList, index + 1);
                }
                string[] split = ngram.Split(new[] { ':' });
                _twoGramsList[index] = split[0] + "@#" + split[1];
                index++;
            }
        }
예제 #2
0
        public void Setup(bool debug)
        {
            var readModel = new ReadModel(InputModelFile);
            var temp = new ReadModel(string.Concat(InputModelFile, ".featuresToK"));
            _weightVector = new WeightVector(temp.GetFeatureToKdDictionary());

            foreach (var pair in readModel.ModelIterator())
            {
                _weightVector.Add(pair);
            }

            _tags = new Tags(_tagList);

            _viterbiForGlobalLinearModel = new ViterbiForGlobalLinearModel(_weightVector, _tags);

            // read input file in a class and per line iterator.
            var inputData = new ReadInputData(InputTestFile);
            var writeModel = new WriteModel(_outputTestFile);
            foreach (var line in inputData.GetSentence())
            {
                List<string> debugList;
                var outputTags = _viterbiForGlobalLinearModel.Decode(line, debug, out debugList);
                if (debug)
                {
                    writeModel.WriteDataWithTagDebug(line, outputTags, debugList);
                }
                else
                {
                    writeModel.WriteDataWithTag(line, outputTags);
                }

            }
            writeModel.Flush();
        }
 public ViterbiForGlobalLinearModel(WeightVector weightVector, Tags tags)
 {
     WeightVector = weightVector;
     Tags = tags;
     Pi = new List<Dictionary<string, double>>();
     Bp = new List<Dictionary<string, string>>();
 }
 public ComputeGradient(List<List<string>> inputSentence, List<List<string>> tagsList,
     List<string> tagList, double lambda, double learningParam, FeatureCache cache, WriteModel logger)
 {
     Logger = logger;
     _inputSentence = inputSentence;
     _outputTagsList = tagsList;
     _tagList = tagList;
     _lambda = lambda;
     _learningParam = learningParam;
     _cache = cache;
     forwardBackwordAlgos = new List<ForwardBackwordAlgo>();
     _weightVector = null;
     _twoGramsList = new string[4];
     _twoGramPair = new KeyValuePair<string, string>[4];
     var ngramTags = new Tags(_tagList);
     int index = 0;
     foreach (var ngram in ngramTags.GetNGramTags(2))
     {
         if (index >= _twoGramsList.Length)
         {
             Array.Resize(ref _twoGramsList, index+1);
             Array.Resize(ref _twoGramPair, index + 1);
         }
         string[] split = ngram.Split(new[] { ':' });
         _twoGramsList[index] = split[0] +"@#"+ split[1];
         _twoGramPair[index] = new KeyValuePair<string, string>(split[0], split[1]);
         index++;
     }
 }
        public MapFeaturesToK(string outputFile, List<string> tagList)
        {
            _writeModel = new WriteModel(outputFile);

            _tagList = tagList;
            DictFeaturesToK = new Dictionary<string, int>();
            DictKToFeatures = new Dictionary<int, string>();
            FeatureCount = 0;
            _tags = new Tags(tagList);
        }
예제 #6
0
 public ForwardBackwordAlgo(List<string> inputSentence, WeightVector wc, List<string> tagList)
 {
     _inputSentence = inputSentence;
     _wc = wc;
     _tagList = tagList;
     _tags = new Tags(tagList);
     _alphaDictionary = new Dictionary<int, Dictionary<string, double>>();
     _betaDictionary = new Dictionary<int, Dictionary<string, double>>();
     _uDictionary = new Dictionary<int, Dictionary<string, double>>();
     UabDictionary = new Dictionary<int, Dictionary<string, double>>();
     Z = 0;
     _weightedFeaturesum = new WeightedFeatureSum(wc, inputSentence, true);
 }
예제 #7
0
 public Perceptron(string inputFile, string outputFile, List<string> tagList)
 {
     _inputFile = inputFile;
     _outputFile = outputFile;
     var tags = new Tags(tagList);
     MapFeatures = new MapFeaturesToK(inputFile, string.Concat(outputFile, ".featuresToK"), tagList);
     MapFeatures.StartMapping();
     WeightVector = new WeightVector(MapFeatures.DictFeaturesToK, MapFeatures.FeatureCount);
     _viterbiForGlobalLinearModel = new ViterbiForGlobalLinearModel(WeightVector, tags);
     InputSentences = new List<List<string>>();
     TagsList = new List<List<string>>();
     ReadInputs();
 }
예제 #8
0
        public Perceptron(List<string> inputFiles, string outputFile, List<string> tagList, bool useAvg = false)
        {
            _outputFile = outputFile;
            _useAvg = useAvg;
            var tags = new Tags(tagList);
            MapFeatures = new MapFeaturesToK(string.Concat(outputFile, ".featuresToK"), tagList);
            MapFeatures.StartMapping(inputFiles);

            WeightVector = new WeightVector(MapFeatures.DictFeaturesToK, MapFeatures.FeatureCount);
            AvgWeightVector = new WeightVector(MapFeatures.DictFeaturesToK, MapFeatures.FeatureCount);
            _viterbiForGlobalLinearModel = new ViterbiForGlobalLinearModel(WeightVector, tags);
            InputSentences = new List<List<string>>();
            TagsList = new List<List<string>>();
            //ReadInputs();
        }
        public void Init()
        {
            var readModel = new ReadModel(InputModelFile + ".preceptron");
            var temp = new ReadModel(string.Concat(InputModelFile, ".featuresToK"));
            var dict = temp.GetFeatureToKdDictionary();
            _weightVector = new WeightVector(dict, dict.Count);

            foreach (var pair in readModel.ModelIterator())
            {
                _weightVector.Add(pair);
            }

            _tags = new Tags(_tagList);

            ViterbiForGLM = new ViterbiForGlobalLinearModel(_weightVector, _tags);
        }
예제 #10
0
 public void CreateCache()
 {
     Console.WriteLine(DateTime.Now + "creating Cache begin");
     var ngramTags = new Tags(_tags);
     foreach (var ngramTag in ngramTags.GetNGramTags(2))
     {
         string[] split = ngramTag.Split(new[] { ':' });
         for (var lineIndex = 0; lineIndex < Sentences.Count; lineIndex++)
         {
             for (var pos = 0; pos < Sentences[lineIndex].Count; pos++)
             {
                 if (pos == 0)
                 {
                     StoreFeature("*", split[1], pos, lineIndex);
                 }
                 else
                 {
                     StoreFeature(split[0], split[1], pos, lineIndex);
                 }
             }
         }
     }
     Console.WriteLine(DateTime.Now + "creating Cache end");
 }
예제 #11
0
        /*
                static void Test()
                {
                    const string inputFile = "../../data/tag.model";
                    //const string outputFile = "../../test.output1";
                    var readModel = new ReadModel(inputFile);
                    //var writeModel = new WriteModel(outputFile);
                    var weightVector = new WeightVector();
                    var tags = new List<string> {"I-GENE", "O"};

                    PrintFeatureList(tags);

                    foreach (var pair in readModel.ModelIterator())
                    {
                        weightVector.Add(pair);

                    }
                    //writeModel.WriteLine(line);
                    //writeModel.Flush();
                }
        */
        static void PrintFeatureList(List<string> tags)
        {
            var featureTags = new Tags(tags);
            featureTags.Dump(3);
        }
예제 #12
0
        private double CalculateGradient(List<string> outputTags,
            int k, Tags ngramTags, int lineIndex)
        {
            double output = 0;
            double secondTerm = 0;
            //output += weightedFeatureSum.GetAllFeatureK(outputTags, k, sentence);
            output += GetAllFeatureKFromCache(outputTags, k, lineIndex);

            // second term.
            for (var pos = 0; pos < outputTags.Count; pos++)
            {
                //double sum = 0;
                secondTerm += GetSecondTerm(ngramTags, lineIndex, pos, k);
                //foreach (var ngramTag in ngramTags.GetNGramTags(2))
                //{
                //    string[] split = ngramTag.Split(new[] { ':' });
                //    sum += (forwardBackwordAlgos[i].GetQ(j, split[0], split[1]) *
                //        weightedFeatureSum.GetFeatureK(split[0], split[1], j, k, sentence));
                //}
                //secondTerm += sum;
            }
            return output - secondTerm;
        }
예제 #13
0
        private double GetSecondTerm(Tags ngramTags, 
            int lineIndex, int pos, int k)
        {
            double sum = 0;
            foreach (var ngramTag in ngramTags.GetNGramTags(2))
            {
                string[] split = ngramTag.Split(new[] { ':' });

                if (_cache.Contains(split[0], split[1], k, pos, lineIndex))
                {
                    sum += (forwardBackwordAlgos[lineIndex].GetQ(pos, split[0], split[1]) *
                    _weightVector.Get(k));
                }
                //else
                //{
                //    sum += (forwardBackwordAlgos[lineIndex].GetQ(j, split[0], split[1]) *
                //    weightedFeatureSum.GetFeatureK(split[0], split[1], j, k, sentence));
                //}

            }
            return sum;
        }
예제 #14
0
        private double Compute(int k, WeightVector weightVector)
        {
            double output = 0;
            //double secondTerm = 0;
            int lineIndex = 0;
            //var weightedFeaturesum = new WeightedFeatureSum(weightVector, null, true);

            if (_inputSentence.Count != _outputTagsList.Count)
            {
                throw new Exception("counts dont match "+ _inputSentence.Count + "with "+ _outputTagsList.Count);
            }
            var ngramTags = new Tags(_tagList);

            // first term.
            foreach (var sentence in _inputSentence)
            {
                var outputTags = _outputTagsList[lineIndex];

                if (sentence.Count != outputTags.Count)
                {
                    throw new Exception("compute counts dont match " + sentence.Count + "with " + outputTags.Count);
                }

                output += CalculateGradient(outputTags, k,
                    ngramTags, lineIndex);

                //output += weightedFeaturesum.GetAllFeatureK(outputTags, k, sentence);

                //// second term.
                //for (var j = 0; j < outputTags.Count; j++)
                //{
                //    double sum = 0;
                //    foreach (var ngramTag in ngramTags.GetNGramTags(2))
                //    {
                //        string[] split = ngramTag.Split(new[] {':'});
                //        sum += (forwardBackwordAlgos[i].GetQ(j, split[0], split[1]) *
                //            weightedFeaturesum.GetFeatureK(split[0], split[1], j, k, sentence));
                //    }
                //    secondTerm += sum;
                //}
                lineIndex++;
            }

            output = output - (_lambda*weightVector.Get(k));
            return output;
        }