public void Process() { var tags = new List<string> { "LOCATION", "OTHER" }; var words = new List<string>(); const string modelFile = "../../../LocationProjectWithFeatureTemplate/"+ "data/training/tag.model.trial1"; var testGLMViterbi = new TestGLMViterbi(modelFile, "", "", tags); testGLMViterbi.Init(); foreach (var line in _reader.GetNextLine()) { var temp = ReplaceTags(line).Trim(); if (string.IsNullOrEmpty(temp)) continue; var splits = line.Split(new[] {' '}); foreach (var split in splits) { if (string.IsNullOrEmpty(split.Trim())) continue; if (!IsSalutationAbbr(split) && split.EndsWith(".")) { words.Add(split); var tempList = new List<string>(); var outputTags = testGLMViterbi.ViterbiForGLM.DecodeNew(words, false, out tempList); var str = new StringBuilder(); for (var i = 0; i < outputTags.Count; i++) { str.Append(words[i]); if (outputTags[i].Equals("LOCATION")) { str.Append("{LOCATION}"); } str.Append(" "); } _writer.WriteLine(str.ToString()); _writer.WriteLine(""); words.Clear(); } else { words.Add(split); } } } _writer.Flush(); }
static void Test1(List<string> tags, bool debug, bool eval) { //const string input = "../../data/gene.dev"; //const string outputFile = "../../data/gene_dev.output3"; //const string modelFile = "../../data/gene.key.model"; var inputFiles = new[] { "../../data/training/NYT_19980403_parsed", "../../data/training/APW_19980314_parsed", "../../data/training/APW_19980424_parsed", "../../data/training/APW_19980429_parsed", "../../data/training/NYT_19980315_parsed", "../../data/training/NYT_19980407_parsed", "../../data/travelTraining/InputToCRF6", "../../data/travelTraining/InputToCRF7", "../../data/travelTraining/InputToCRF8", "../../data/travelTraining/InputToCRF9", "../../data/travelTraining/InputToCRF10", "../../data/travelTraining/InputToCRF11", "../../data/travelTraining/InputToCRF12", }; foreach (var inputFile in inputFiles) { string input = inputFile + ".key.dev"; string outputFile = inputFile + ".dev.output1"; string keyFile = inputFile + ".key"; string outputEval = inputFile + ".dev.evalDump"; const string modelFile = "../../data/tag.model.withoutsecondPassImpFor407"; var testGLMViterbi = new TestGLMViterbi(modelFile, input, outputFile, tags); testGLMViterbi.Setup(debug); if (eval && !debug) { var dump = EvaluateModel(keyFile, outputFile, outputEval); Console.WriteLine("training for: "+ inputFile); Console.WriteLine(dump); Console.ReadLine(); } } }
static void Test1(List<string> tags, bool debug, bool eval) { //const string input = "../../data/gene.dev"; //const string outputFile = "../../data/gene_dev.output3"; //const string modelFile = "../../data/gene.key.model"; var inputFiles = new[] { "../../data/training/NYT_19980403_parsed", "../../data/training/APW_19980314_parsed", "../../data/training/APW_19980424_parsed", "../../data/training/APW_19980429_parsed", "../../data/training/NYT_19980315_parsed", "../../data/training/NYT_19980407_parsed" }; foreach (var inputFile in inputFiles) { string input = inputFile + ".key.dev"; string outputFile = inputFile + ".dev.output1"; string keyFile = inputFile + ".key"; string outputEval = inputFile + ".dev.evalDump"; const string modelFile = "../../data/training/tag.model.trial1"; var testGLMViterbi = new TestGLMViterbi(modelFile, input, outputFile, tags); testGLMViterbi.Setup(debug); if (eval) { var dump = EvaluateModel(keyFile, outputFile, outputEval); Console.WriteLine(dump); Console.ReadLine(); } } }