public void Setup(bool debug) { var readModel = new ReadModel(InputModelFile); var temp = new ReadModel(string.Concat(InputModelFile, ".featuresToK")); _weightVector = new WeightVector(temp.GetFeatureToKdDictionary()); foreach (var pair in readModel.ModelIterator()) { _weightVector.Add(pair); } _tags = new Tags(_tagList); _viterbiForGlobalLinearModel = new ViterbiForGlobalLinearModel(_weightVector, _tags); // read input file in a class and per line iterator. var inputData = new ReadInputData(InputTestFile); var writeModel = new WriteModel(_outputTestFile); foreach (var line in inputData.GetSentence()) { List<string> debugList; var outputTags = _viterbiForGlobalLinearModel.Decode(line, debug, out debugList); if (debug) { writeModel.WriteDataWithTagDebug(line, outputTags, debugList); } else { writeModel.WriteDataWithTag(line, outputTags); } } writeModel.Flush(); }
public void ReadInputs(string inputFile) { var inputData = new ReadInputData(inputFile); foreach (var line in inputData.GetSentence()) { var inputTags = new List<string>(line.Count); var inputList = new List<string>(line.Count); for (var j = 0; j < line.Count; j++) { var split = line[j].Split(new char[] { ' ' }); inputList.Add(split[0]); inputTags.Add(split[1]); } InputSentences.Add(inputList); TagsList.Add(inputTags); } inputData.Reset(); }
public void Setup(bool debug) { Init(); // read input file in a class and per line iterator. var inputData = new ReadInputData(InputTestFile); var writeModel = new WriteModel(_outputTestFile); foreach (var line in inputData.GetSentence()) { List<string> debugList; var outputTags = ViterbiForGLM.DecodeNew(line, debug, out debugList); if (debug) { writeModel.WriteDataWithTagDebug(line, outputTags, debugList); } else { writeModel.WriteDataWithTag(line, outputTags); } } writeModel.Flush(); }
public void Train(string[] inputFiles) { for (int fileNum = 0; fileNum < inputFiles.Length; fileNum++) { var inputFile = inputFiles[fileNum]; Console.WriteLine(DateTime.Now + " " + inputFile + " training "); const int iterationCount = 10; for (var i = 0; i < iterationCount; i++) { // Console.WriteLine(DateTime.Now + " " + inputFile + " training iteration: " + i); var inputData = new ReadInputData(inputFile); foreach (var line in inputData.GetSentence()) { var inputTags = new List<string>(line.Count); for (var j = 0; j < line.Count; j++) { var split = line[j].Split(new char[] {' '}); line[j] = split[0]; inputTags.Add(split[1]); } List<string> temp; var outputTags = _viterbiForGlobalLinearModel.DecodeNew(line, false, out temp); if (Match(inputTags, outputTags)) continue; var inputFeature = (new FeatureWrapper(inputTags, line)).NextFeature().GetEnumerator(); var outputFeature = new FeatureWrapper(outputTags, line).NextFeature().GetEnumerator(); while (inputFeature.MoveNext() && outputFeature.MoveNext()) { if (inputFeature.Current.Key.Equals(outputFeature.Current.Key)) continue; var inputAdd = 1*Features.GetWeight(inputFeature.Current.Value); var outputRemove = -1*Features.GetWeight(outputFeature.Current.Value); WeightVector.AddToKey(inputFeature.Current.Value, inputAdd); WeightVector.AddToKey(outputFeature.Current.Value, outputRemove); } } AvgWeightVector.AddWeightVector(WeightVector); inputData.Reset(); } AvgWeightVector.DividebyNum(iterationCount); } Console.WriteLine(DateTime.Now+" training is complete"); }
public void StartMapping() { var inputData = new ReadInputData(_inputFile); foreach (var line in inputData.GetSentence()) { var inputTags = new List<string>(line.Count); for (var j = 0; j < line.Count; j++) { var split = line[j].Split(new char[] {' '}); line[j] = split[0]; inputTags.Add(split[1]); } GenerateMappingForSentence(line); } inputData.Reset(); }
public void Train() { for (var i = 0; i < 1; i++) { Console.WriteLine(DateTime.Now+" training iteration: "+ i); var inputData = new ReadInputData(_inputFile); foreach (var line in inputData.GetSentence()) { var inputTags = new List<string>(line.Count); for(var j = 0; j < line.Count;j++) { var split = line[j].Split(new char[] {' '}); line[j] = split[0]; inputTags.Add(split[1]); } List<string> temp; var outputTags = _viterbiForGlobalLinearModel.Decode(line, false, out temp); if (Match(inputTags, outputTags)) continue; var inputFeature = (new FeatureWrapper(inputTags, line)).NextFeature().GetEnumerator(); var outputFeature= new FeatureWrapper(outputTags, line).NextFeature().GetEnumerator(); while (inputFeature.MoveNext() && outputFeature.MoveNext()) { if (inputFeature.Current.Key.Equals(outputFeature.Current.Key)) continue; WeightVector.AddToKey(inputFeature.Current.Value, 1 * Features.GetWeight(inputFeature.Current.Value)); WeightVector.AddToKey(outputFeature.Current.Value, -1 * Features.GetWeight(inputFeature.Current.Value)); } } inputData.Reset(); } // _weightVector.NormalizeAllWeights(100); Console.WriteLine(DateTime.Now+" training is complete"); }