public void Process()
        {
            var tags = new List<string> { "LOCATION", "OTHER" };
            var words = new List<string>();

            const string modelFile = "../../../LocationProjectWithFeatureTemplate/"+
                                    "data/training/tag.model.trial1";

            var testGLMViterbi = new TestGLMViterbi(modelFile, "", "", tags);
            testGLMViterbi.Init();

            foreach (var line in _reader.GetNextLine())
            {
                var temp = ReplaceTags(line).Trim();
                if (string.IsNullOrEmpty(temp))
                    continue;
                var splits = line.Split(new[] {' '});

                foreach (var split in splits)
                {
                    if (string.IsNullOrEmpty(split.Trim()))
                        continue;
                    if (!IsSalutationAbbr(split) && split.EndsWith("."))
                    {
                        words.Add(split);
                        var tempList = new List<string>();
                        var outputTags = testGLMViterbi.ViterbiForGLM.DecodeNew(words, false, out tempList);
                        var str = new StringBuilder();
                        for (var i = 0; i < outputTags.Count; i++)
                        {
                            str.Append(words[i]);
                            if (outputTags[i].Equals("LOCATION"))
                            {
                                str.Append("{LOCATION}");
                            }
                            str.Append(" ");
                        }
                        _writer.WriteLine(str.ToString());
                        _writer.WriteLine("");
                        words.Clear();
                    }
                    else
                    {
                        words.Add(split);
                    }
                }

            }
            _writer.Flush();
        }
示例#2
0
        static void Test1(List<string> tags, bool debug, bool eval)
        {
            //const string input = "../../data/gene.dev";
            //const string outputFile = "../../data/gene_dev.output3";
            //const string modelFile = "../../data/gene.key.model";

            var inputFiles = new[]
                                 {
                                    "../../data/training/NYT_19980403_parsed",
                                    "../../data/training/APW_19980314_parsed",
                                    "../../data/training/APW_19980424_parsed",
                                    "../../data/training/APW_19980429_parsed",
                                    "../../data/training/NYT_19980315_parsed",
                                    "../../data/training/NYT_19980407_parsed",
                                    "../../data/travelTraining/InputToCRF6",
                                    "../../data/travelTraining/InputToCRF7",
                                    "../../data/travelTraining/InputToCRF8",
                                    "../../data/travelTraining/InputToCRF9",
                                    "../../data/travelTraining/InputToCRF10",
                                    "../../data/travelTraining/InputToCRF11",
                                    "../../data/travelTraining/InputToCRF12",
                                 };

            foreach (var inputFile in inputFiles)
            {
                string input = inputFile + ".key.dev";
                string outputFile = inputFile + ".dev.output1";
                string keyFile = inputFile + ".key";
                string outputEval = inputFile + ".dev.evalDump";
                const string modelFile = "../../data/tag.model.withoutsecondPassImpFor407";

                var testGLMViterbi = new TestGLMViterbi(modelFile, input, outputFile, tags);
                testGLMViterbi.Setup(debug);

                if (eval && !debug)
                {
                    var dump = EvaluateModel(keyFile, outputFile, outputEval);
                    Console.WriteLine("training for: "+ inputFile);
                    Console.WriteLine(dump);
                    Console.ReadLine();
                }
            }
        }
        static void Test1(List<string> tags, bool debug, bool eval)
        {
            //const string input = "../../data/gene.dev";
            //const string outputFile = "../../data/gene_dev.output3";
            //const string modelFile = "../../data/gene.key.model";

            var inputFiles = new[]
                                 {
                                     "../../data/training/NYT_19980403_parsed",
                                     "../../data/training/APW_19980314_parsed",
                                     "../../data/training/APW_19980424_parsed",
                                     "../../data/training/APW_19980429_parsed",
                                     "../../data/training/NYT_19980315_parsed",
                                     "../../data/training/NYT_19980407_parsed"
                                 };

            foreach (var inputFile in inputFiles)
            {
                string input = inputFile + ".key.dev";
                string outputFile = inputFile + ".dev.output1";
                string keyFile = inputFile + ".key";
                string outputEval = inputFile + ".dev.evalDump";
                const string modelFile = "../../data/training/tag.model.trial1";

                var testGLMViterbi = new TestGLMViterbi(modelFile, input, outputFile, tags);
                testGLMViterbi.Setup(debug);

                if (eval)
                {
                    var dump = EvaluateModel(keyFile, outputFile, outputEval);
                    Console.WriteLine(dump);
                    Console.ReadLine();
                }
            }
        }