Beispiel #1
0
        public void Setup(bool debug)
        {
            var readModel = new ReadModel(InputModelFile);
            var temp = new ReadModel(string.Concat(InputModelFile, ".featuresToK"));
            _weightVector = new WeightVector(temp.GetFeatureToKdDictionary());

            foreach (var pair in readModel.ModelIterator())
            {
                _weightVector.Add(pair);
            }

            _tags = new Tags(_tagList);

            _viterbiForGlobalLinearModel = new ViterbiForGlobalLinearModel(_weightVector, _tags);

            // read input file in a class and per line iterator.
            var inputData = new ReadInputData(InputTestFile);
            var writeModel = new WriteModel(_outputTestFile);
            foreach (var line in inputData.GetSentence())
            {
                List<string> debugList;
                var outputTags = _viterbiForGlobalLinearModel.Decode(line, debug, out debugList);
                if (debug)
                {
                    writeModel.WriteDataWithTagDebug(line, outputTags, debugList);
                }
                else
                {
                    writeModel.WriteDataWithTag(line, outputTags);
                }

            }
            writeModel.Flush();
        }
Beispiel #2
0
        public void StartMapping()
        {
            var inputData = new ReadInputData(_inputFile);
            foreach (var line in inputData.GetSentence())
            {
                var inputTags = new List<string>(line.Count);
                for (var j = 0; j < line.Count; j++)
                {
                    var split = line[j].Split(new char[] {' '});
                    line[j] = split[0];
                    inputTags.Add(split[1]);
                }
                GenerateMappingForSentence(line);

            }
            inputData.Reset();
        }
 public void ReadInputs(string inputFile)
 {
     var inputData = new ReadInputData(inputFile);
     foreach (var line in inputData.GetSentence())
     {
         var inputTags = new List<string>(line.Count);
         var inputList = new List<string>(line.Count);
         for (var j = 0; j < line.Count; j++)
         {
             var split = line[j].Split(new char[] { ' ' });
             inputList.Add(split[0]);
             inputTags.Add(split[1]);
         }
         InputSentences.Add(inputList);
         TagsList.Add(inputTags);
     }
     inputData.Reset();
 }
        public void Setup(bool debug)
        {
            Init();
            // read input file in a class and per line iterator.
            var inputData = new ReadInputData(InputTestFile);
            var writeModel = new WriteModel(_outputTestFile);
            foreach (var line in inputData.GetSentence())
            {
                List<string> debugList;
                var outputTags = ViterbiForGLM.DecodeNew(line, debug, out debugList);
                if (debug)
                {
                    writeModel.WriteDataWithTagDebug(line, outputTags, debugList);
                }
                else
                {
                    writeModel.WriteDataWithTag(line, outputTags);
                }

            }
            writeModel.Flush();
        }
        public void Train(string[] inputFiles)
        {
            for (int fileNum = 0; fileNum < inputFiles.Length; fileNum++)
            {
                var inputFile = inputFiles[fileNum];

                Console.WriteLine(DateTime.Now + " " + inputFile + " training  ");

                const int iterationCount = 10;
                for (var i = 0; i < iterationCount; i++)
                {
                   // Console.WriteLine(DateTime.Now + " " + inputFile + " training iteration: " + i);
                    var inputData = new ReadInputData(inputFile);
                    foreach (var line in inputData.GetSentence())
                    {
                        var inputTags = new List<string>(line.Count);
                        for (var j = 0; j < line.Count; j++)
                        {
                            var split = line[j].Split(new char[] {' '});
                            line[j] = split[0];
                            inputTags.Add(split[1]);
                        }
                        List<string> temp;
                        var outputTags = _viterbiForGlobalLinearModel.DecodeNew(line, false, out temp);
                        if (Match(inputTags, outputTags)) continue;
                        var inputFeature = (new FeatureWrapper(inputTags, line)).NextFeature().GetEnumerator();
                        var outputFeature = new FeatureWrapper(outputTags, line).NextFeature().GetEnumerator();
                        while (inputFeature.MoveNext() && outputFeature.MoveNext())
                        {
                            if (inputFeature.Current.Key.Equals(outputFeature.Current.Key))
                                continue;
                            var inputAdd = 1*Features.GetWeight(inputFeature.Current.Value);
                            var outputRemove = -1*Features.GetWeight(outputFeature.Current.Value);
                            WeightVector.AddToKey(inputFeature.Current.Value, inputAdd);
                            WeightVector.AddToKey(outputFeature.Current.Value, outputRemove);
                        }
                    }

                    AvgWeightVector.AddWeightVector(WeightVector);
                    inputData.Reset();
                }

                AvgWeightVector.DividebyNum(iterationCount);
            }

            Console.WriteLine(DateTime.Now+" training is complete");
        }
Beispiel #6
0
        public void Train()
        {
            for (var i = 0; i < 1; i++)
            {
                Console.WriteLine(DateTime.Now+" training iteration: "+ i);
                var inputData = new ReadInputData(_inputFile);
                foreach (var line in inputData.GetSentence())
                {
                    var inputTags = new List<string>(line.Count);
                    for(var j = 0; j < line.Count;j++)
                    {
                        var split = line[j].Split(new char[] {' '});
                        line[j] = split[0];
                        inputTags.Add(split[1]);
                    }
                    List<string> temp;
                    var outputTags = _viterbiForGlobalLinearModel.Decode(line, false, out temp);
                    if (Match(inputTags, outputTags)) continue;
                    var inputFeature = (new FeatureWrapper(inputTags, line)).NextFeature().GetEnumerator();
                    var outputFeature= new FeatureWrapper(outputTags, line).NextFeature().GetEnumerator();
                    while (inputFeature.MoveNext() && outputFeature.MoveNext())
                    {
                        if (inputFeature.Current.Key.Equals(outputFeature.Current.Key))
                            continue;
                        WeightVector.AddToKey(inputFeature.Current.Value,
                            1 * Features.GetWeight(inputFeature.Current.Value));
                        WeightVector.AddToKey(outputFeature.Current.Value,
                            -1 * Features.GetWeight(inputFeature.Current.Value));
                    }
                }

                inputData.Reset();
            }

            //  _weightVector.NormalizeAllWeights(100);

            Console.WriteLine(DateTime.Now+" training is complete");
        }