示例#1
0
        public void Setup(bool debug)
        {
            var readModel = new ReadModel(InputModelFile);
            var temp = new ReadModel(string.Concat(InputModelFile, ".featuresToK"));
            _weightVector = new WeightVector(temp.GetFeatureToKdDictionary());

            foreach (var pair in readModel.ModelIterator())
            {
                _weightVector.Add(pair);
            }

            _tags = new Tags(_tagList);

            _viterbiForGlobalLinearModel = new ViterbiForGlobalLinearModel(_weightVector, _tags);

            // read input file in a class and per line iterator.
            var inputData = new ReadInputData(InputTestFile);
            var writeModel = new WriteModel(_outputTestFile);
            foreach (var line in inputData.GetSentence())
            {
                List<string> debugList;
                var outputTags = _viterbiForGlobalLinearModel.Decode(line, debug, out debugList);
                if (debug)
                {
                    writeModel.WriteDataWithTagDebug(line, outputTags, debugList);
                }
                else
                {
                    writeModel.WriteDataWithTag(line, outputTags);
                }

            }
            writeModel.Flush();
        }
示例#2
0
        public string Evalulate(string keyFile, string devFile, string dumpFile)
        {
            var keyModel = new ReadModel(keyFile);
            var devModel = new ReadModel(devFile);

            var dumpOutputModel = new WriteModel(dumpFile);

            var keyIter = keyModel.ModelIterator().GetEnumerator();
            var devIter = devModel.ModelIterator().GetEnumerator();

            float expected = 0;
            float correct = 0;
            float found = 0;
            float line = 0;

            string dump;
            while (keyIter.MoveNext() && devIter.MoveNext())
            {
                var key = keyIter.Current;
                var dev = devIter.Current;
                line++;
                if (!key.Key.Equals(dev.Key))
                {
                    dump = "line: " + line + " " + key.Key + " doesn't match " + dev.Key+"\r\n";
                    dumpOutputModel.WriteLine(dump);
                    dumpOutputModel.Flush();
                    return dump;
                }

                if (key.Value.Contains("LOCATION"))
                {
                    expected++;
                    if (!dev.Value.Contains("LOCATION")) continue;
                    found++;
                    correct++;
                }
                else if (dev.Value.Contains("LOCATION"))
                {
                    found++;
                }
            }

            dump = "found: " + found + " expected: " + expected + " correct: " + correct +"\r\n";
            dumpOutputModel.WriteLine(dump);
            float precision = correct/found;
            float recall = correct/expected;
            float f1Score = (2*precision*recall)/(precision + recall);
            dump += "precision\t recall \t f1score\t\r\n";
            dumpOutputModel.WriteLine("precision\t recall \t f1score\t");
            dump += precision.ToString(CultureInfo.InvariantCulture) + "\t" +
                    recall.ToString(CultureInfo.InvariantCulture) + "\t" +
                    f1Score.ToString(CultureInfo.InvariantCulture) +"\r\n";
            dumpOutputModel.WriteLine(precision.ToString(CultureInfo.InvariantCulture)+"\t"+
                recall.ToString(CultureInfo.InvariantCulture)+ "\t" +
                f1Score.ToString(CultureInfo.InvariantCulture));
            dumpOutputModel.Flush();
            return dump;
        }
        public void Init()
        {
            var readModel = new ReadModel(InputModelFile + ".preceptron");
            var temp = new ReadModel(string.Concat(InputModelFile, ".featuresToK"));
            var dict = temp.GetFeatureToKdDictionary();
            _weightVector = new WeightVector(dict, dict.Count);

            foreach (var pair in readModel.ModelIterator())
            {
                _weightVector.Add(pair);
            }

            _tags = new Tags(_tagList);

            ViterbiForGLM = new ViterbiForGlobalLinearModel(_weightVector, _tags);
        }
        internal static void CreateInputForCRF(string input, string output)
        {
            var reader = new ReadModel(input);
            var keyWriter = new WriteModel(string.Concat(output, ".key"));
            var devWriter = new WriteModel(string.Concat(output, ".key.dev"));

            foreach (var line in reader.GetNextLine())
            {
                var words = line.Split(new[] {' '});

                if (words.Length < 4)
                    continue;

                foreach (var word in words)
                {
                    if (string.IsNullOrEmpty(word.Trim()))
                        continue;

                    if (word.EndsWith("{LOCATION}"))
                    {
                        keyWriter.WriteLine(word.Replace("{LOCATION}", "") + " " + "LOCATION");
                        devWriter.WriteLine(word.Replace("{LOCATION}", ""));
                    }
                    else if (word.EndsWith("{LOCATION}."))
                    {
                        keyWriter.WriteLine(word.Replace("{LOCATION}.", ".") + " " + "LOCATION");
                        devWriter.WriteLine(word.Replace("{LOCATION}.", "."));
                    }
                    else
                    {
                        keyWriter.WriteLine(word + " " + "OTHER");
                        devWriter.WriteLine(word);
                    }
                }
                keyWriter.WriteLine("");
                devWriter.WriteLine("");
            }
            keyWriter.Flush();
            devWriter.Flush();
        }
 public ProcessRawText(string input, string output)
 {
     _reader = new ReadModel(input);
     _writer = new WriteModel(output);
 }
        public void Parse(string input, string output)
        {
            var readModel= new ReadModel(input);
            var writeModel = new WriteModel(output);
            var writeDevModel = new WriteModel(output+".dev");
            //var tempWrite = new WriteModel(output + "tempWrite");
            var temp = new List<string>();

            foreach (var line in readModel.GetNextLine())
            {
                var newLine = RemoveTags(line);
                newLine = ReplaceTags(newLine);
                newLine = RemoveAllTags(newLine);
                if (string.IsNullOrEmpty(newLine)) continue;

                //tempWrite.WriteLine(newLine);
                var split = newLine.Split(new char[] {' '});
                temp.AddRange(split.ToList());
                //temp.Add("##NEWLINE##");
            }
            //tempWrite.Flush();
            bool location = false;
            var lastStr = string.Empty;

            foreach (var tempStr in temp)
            {
                var str = tempStr.Trim();
                if (string.IsNullOrEmpty(str))
                {
                    lastStr = "";
                    continue;
                }

                //if (str.Equals("##NEWLINE##"))
                {
                    if (!location && lastStr.EndsWith(".") && !IsSalutationAbbr(lastStr))
                    {
                        lastStr = string.Empty;
                        writeModel.WriteLine("");
                        writeDevModel.WriteLine("");
                        continue;
                    }
                }
                if (location)
                {
                    if (str.Equals("##ENDTAG##"))
                    {
                        location = false;
                        lastStr = "";
                        continue;
                    }
                    writeModel.WriteLine(str + " " + "LOCATION");
                    writeDevModel.WriteLine(str);
                    lastStr = str;
                    continue;
                }
                if (str.Equals("##LOCATIONSTARTTAG##"))
                {
                    lastStr = "";
                    location = true;
                    continue;
                }
                if (str.Equals("##ENDTAG##"))
                {
                    lastStr = "";
                    continue;
                }
                writeModel.WriteLine(str + " "+ "OTHER");
                writeDevModel.WriteLine(str);
                lastStr = str;
            }
            writeModel.Flush();
            writeDevModel.Flush();
        }
示例#7
0
        private Config()
        {
            BlackList = new HashSet<string>();
            var readBlackList = new ReadModel(blackList);
            foreach (var line in readBlackList.GetNextLine())
            {
                var word = line.ToLowerInvariant().Trim();
                if (string.IsNullOrEmpty(word))
                {
                    continue;
                }
                BlackList.Add(word);
            }
            PronounSet = new HashSet<string>();
            var readPronoun = new ReadModel(PronounList);
            foreach (var line in readPronoun.GetNextLine())
            {
                var word = line.ToLowerInvariant().Trim();
                if (string.IsNullOrEmpty(word))
                {
                    continue;
                }
                PronounSet.Add(word);
            }

            ConjunctionSet = new HashSet<string>();
            var readConjunction = new ReadModel(ConjuctionList);
            foreach (var line in readConjunction.GetNextLine())
            {
                var word = line.ToLowerInvariant().Trim();
                if (string.IsNullOrEmpty(word))
                {
                    continue;
                }
                ConjunctionSet.Add(word);
            }

            VerbSet = new HashSet<string>();
            var readVerb = new ReadModel(VerbList);
            foreach (var line in readVerb.GetNextLine())
            {
                var word = line.ToLowerInvariant().Trim();
                if (string.IsNullOrEmpty(word))
                {
                    continue;
                }
                VerbSet.Add(word);
            }

            ArticleSet = new HashSet<string>();
            var readArticle = new ReadModel(ArticleList);
            foreach (var line in readArticle.GetNextLine())
            {
                var word = line.ToLowerInvariant().Trim();
                if (string.IsNullOrEmpty(word))
                {
                    continue;
                }
                ArticleSet.Add(word);
            }

            PrepositionSet = new HashSet<string>();
            var readPreposition = new ReadModel(PrepositionList);
            foreach (var line in readPreposition.GetNextLine())
            {
                var word = line.ToLowerInvariant().Trim();
                if (string.IsNullOrEmpty(word))
                {
                    continue;
                }
                PrepositionSet.Add(word);
            }

            SuffixSet = new HashSet<string>();
            var readSuffix = new ReadModel(SuffixList);
            foreach (var line in readSuffix.GetNextLine())
            {
                var word = line.ToLowerInvariant().Trim();
                if (string.IsNullOrEmpty(word))
                {
                    continue;
                }
                SuffixSet.Add(word);
            }

            AdjectiveSet = new HashSet<string>();
            var readAdjective = new ReadModel(AdjectiveList);
            foreach (var line in readAdjective.GetNextLine())
            {
                var word = line.ToLowerInvariant().Trim();
                if (string.IsNullOrEmpty(word))
                {
                    continue;
                }
                AdjectiveSet.Add(word);
            }
        }