Ejemplo n.º 1
0
        public void Setup(bool debug)
        {
            var readModel = new ReadModel(InputModelFile);
            _weightVector = new WeightVector();

            foreach (var pair in readModel.ModelIterator())
            {
                _weightVector.Add(pair);
            }

            _tags = new Tags(_tagList);

            _viterbiForGlobalLinearModel = new ViterbiForGlobalLinearModel(_weightVector, _tags);

            // read input file in a class and per line iterator.
            var inputData = new ReadInputData(InputTestFile);
            var writeModel = new WriteModel(_outputTestFile);
            foreach (var line in inputData.GetSentence())
            {
                List<string> debugList;
                var outputTags = _viterbiForGlobalLinearModel.Decode(line, debug, out debugList);
                if (debug)
                {
                    writeModel.WriteDataWithTagDebug(line, outputTags, debugList);
                }
                else
                {
                    writeModel.WriteDataWithTag(line, outputTags);
                }

            }
            writeModel.Flush();
        }
Ejemplo n.º 2
0
        public string Evalulate(string keyFile, string devFile, string dumpFile)
        {
            var keyModel = new ReadModel(keyFile);
            var devModel = new ReadModel(devFile);

            var dumpOutputModel = new WriteModel(dumpFile);

            var keyIter = keyModel.ModelIterator().GetEnumerator();
            var devIter = devModel.ModelIterator().GetEnumerator();

            float expected = 0;
            float correct = 0;
            float found = 0;
            float line = 0;

            string dump;
            while (keyIter.MoveNext() && devIter.MoveNext())
            {
                var key = keyIter.Current;
                var dev = devIter.Current;
                line++;
                if (!key.Key.Equals(dev.Key))
                {
                    dump = "line: " + line + " " + key.Key + " doesn't match " + dev.Key+"\r\n";
                    dumpOutputModel.WriteLine(dump);
                    dumpOutputModel.Flush();
                    return dump;
                }

                if (key.Value.Contains("LOCATION"))
                {
                    expected++;
                    if (!dev.Value.Contains("LOCATION")) continue;
                    found++;
                    correct++;
                }
                else if (dev.Value.Contains("LOCATION"))
                {
                    found++;
                }
            }

            dump = "found: " + found + " expected: " + expected + " correct: " + correct +"\r\n";
            dumpOutputModel.WriteLine(dump);
            float precision = correct/found;
            float recall = correct/expected;
            float f1Score = (2*precision*recall)/(precision + recall);
            dump += "precision\t recall \t f1score\t\r\n";
            dumpOutputModel.WriteLine("precision\t recall \t f1score\t");
            dump += precision.ToString(CultureInfo.InvariantCulture) + "\t" +
                    recall.ToString(CultureInfo.InvariantCulture) + "\t" +
                    f1Score.ToString(CultureInfo.InvariantCulture) +"\r\n";
            dumpOutputModel.WriteLine(precision.ToString(CultureInfo.InvariantCulture)+"\t"+
                recall.ToString(CultureInfo.InvariantCulture)+ "\t" +
                f1Score.ToString(CultureInfo.InvariantCulture));
            dumpOutputModel.Flush();
            return dump;
        }
Ejemplo n.º 3
0
        public MapFeaturesToK(string inputFile, string outputFile, List<string> tagList)
        {
            writeModel = new WriteModel(outputFile);

            _inputFile = inputFile;
            _tagList = tagList;
            DictFeaturesToK = new Dictionary<string, int>();
            _featureCount = 0;
            _tags = new Tags(tagList);
        }
        public void Parse(string input, string output)
        {
            var readModel= new ReadModel(input);
            var writeModel = new WriteModel(output);
            var writeDevModel = new WriteModel(output+".dev");
            //var tempWrite = new WriteModel(output + "tempWrite");
            var temp = new List<string>();

            foreach (var line in readModel.GetNextLine())
            {
                var newLine = RemoveTags(line);
                newLine = ReplaceTags(newLine);
                newLine = RemoveAllTags(newLine);
                if (string.IsNullOrEmpty(newLine)) continue;

                //tempWrite.WriteLine(newLine);
                var split = newLine.Split(new char[] {' '});
                temp.AddRange(split.ToList());
                //temp.Add("##NEWLINE##");
            }
            //tempWrite.Flush();
            bool location = false;
            var lastStr = string.Empty;

            foreach (var tempStr in temp)
            {
                var str = tempStr.Trim();
                if (string.IsNullOrEmpty(str))
                {
                    lastStr = "";
                    continue;
                }

                //if (str.Equals("##NEWLINE##"))
                {
                    if (!location && lastStr.EndsWith(".") && !IsSalutationAbbr(lastStr))
                    {
                        lastStr = string.Empty;
                        writeModel.WriteLine("");
                        writeDevModel.WriteLine("");
                        continue;
                    }
                }
                if (location)
                {
                    if (str.Equals("##ENDTAG##"))
                    {
                        location = false;
                        lastStr = "";
                        continue;
                    }
                    writeModel.WriteLine(str + " " + "LOCATION");
                    writeDevModel.WriteLine(str);
                    lastStr = str;
                    continue;
                }
                if (str.Equals("##LOCATIONSTARTTAG##"))
                {
                    lastStr = "";
                    location = true;
                    continue;
                }
                if (str.Equals("##ENDTAG##"))
                {
                    lastStr = "";
                    continue;
                }
                writeModel.WriteLine(str + " "+ "OTHER");
                writeDevModel.WriteLine(str);
                lastStr = str;
            }
            writeModel.Flush();
            writeDevModel.Flush();
        }