public void Train(string[] inputFiles) { for (int fileNum = 0; fileNum < inputFiles.Length; fileNum++) { var inputFile = inputFiles[fileNum]; Console.WriteLine(DateTime.Now + " " + inputFile + " training "); const int iterationCount = 10; for (var i = 0; i < iterationCount; i++) { // Console.WriteLine(DateTime.Now + " " + inputFile + " training iteration: " + i); var inputData = new ReadInputData(inputFile); foreach (var line in inputData.GetSentence()) { var inputTags = new List<string>(line.Count); for (var j = 0; j < line.Count; j++) { var split = line[j].Split(new char[] {' '}); line[j] = split[0]; inputTags.Add(split[1]); } List<string> temp; var outputTags = _viterbiForGlobalLinearModel.DecodeNew(line, false, out temp); if (Match(inputTags, outputTags)) continue; var inputFeature = (new FeatureWrapper(inputTags, line)).NextFeature().GetEnumerator(); var outputFeature = new FeatureWrapper(outputTags, line).NextFeature().GetEnumerator(); while (inputFeature.MoveNext() && outputFeature.MoveNext()) { if (inputFeature.Current.Key.Equals(outputFeature.Current.Key)) continue; var inputAdd = 1*Features.GetWeight(inputFeature.Current.Value); var outputRemove = -1*Features.GetWeight(outputFeature.Current.Value); WeightVector.AddToKey(inputFeature.Current.Value, inputAdd); WeightVector.AddToKey(outputFeature.Current.Value, outputRemove); } } AvgWeightVector.AddWeightVector(WeightVector); inputData.Reset(); } AvgWeightVector.DividebyNum(iterationCount); } Console.WriteLine(DateTime.Now+" training is complete"); }
public void Train() { for (var i = 0; i < 1; i++) { Console.WriteLine(DateTime.Now+" training iteration: "+ i); var inputData = new ReadInputData(_inputFile); foreach (var line in inputData.GetSentence()) { var inputTags = new List<string>(line.Count); for(var j = 0; j < line.Count;j++) { var split = line[j].Split(new char[] {' '}); line[j] = split[0]; inputTags.Add(split[1]); } List<string> temp; var outputTags = _viterbiForGlobalLinearModel.Decode(line, false, out temp); if (Match(inputTags, outputTags)) continue; var inputFeature = (new FeatureWrapper(inputTags, line)).NextFeature().GetEnumerator(); var outputFeature= new FeatureWrapper(outputTags, line).NextFeature().GetEnumerator(); while (inputFeature.MoveNext() && outputFeature.MoveNext()) { if (inputFeature.Current.Key.Equals(outputFeature.Current.Key)) continue; WeightVector.AddToKey(inputFeature.Current.Value, 1 * Features.GetWeight(inputFeature.Current.Value)); WeightVector.AddToKey(outputFeature.Current.Value, -1 * Features.GetWeight(inputFeature.Current.Value)); } } inputData.Reset(); } // _weightVector.NormalizeAllWeights(100); Console.WriteLine(DateTime.Now+" training is complete"); }