public void Setup(bool debug) { var readModel = new ReadModel(InputModelFile); var temp = new ReadModel(string.Concat(InputModelFile, ".featuresToK")); _weightVector = new WeightVector(temp.GetFeatureToKdDictionary()); foreach (var pair in readModel.ModelIterator()) { _weightVector.Add(pair); } _tags = new Tags(_tagList); _viterbiForGlobalLinearModel = new ViterbiForGlobalLinearModel(_weightVector, _tags); // read input file in a class and per line iterator. var inputData = new ReadInputData(InputTestFile); var writeModel = new WriteModel(_outputTestFile); foreach (var line in inputData.GetSentence()) { List<string> debugList; var outputTags = _viterbiForGlobalLinearModel.Decode(line, debug, out debugList); if (debug) { writeModel.WriteDataWithTagDebug(line, outputTags, debugList); } else { writeModel.WriteDataWithTag(line, outputTags); } } writeModel.Flush(); }
public string Evalulate(string keyFile, string devFile, string dumpFile) { var keyModel = new ReadModel(keyFile); var devModel = new ReadModel(devFile); var dumpOutputModel = new WriteModel(dumpFile); var keyIter = keyModel.ModelIterator().GetEnumerator(); var devIter = devModel.ModelIterator().GetEnumerator(); float expected = 0; float correct = 0; float found = 0; float line = 0; string dump; while (keyIter.MoveNext() && devIter.MoveNext()) { var key = keyIter.Current; var dev = devIter.Current; line++; if (!key.Key.Equals(dev.Key)) { dump = "line: " + line + " " + key.Key + " doesn't match " + dev.Key+"\r\n"; dumpOutputModel.WriteLine(dump); dumpOutputModel.Flush(); return dump; } if (key.Value.Contains("LOCATION")) { expected++; if (!dev.Value.Contains("LOCATION")) continue; found++; correct++; } else if (dev.Value.Contains("LOCATION")) { found++; } } dump = "found: " + found + " expected: " + expected + " correct: " + correct +"\r\n"; dumpOutputModel.WriteLine(dump); float precision = correct/found; float recall = correct/expected; float f1Score = (2*precision*recall)/(precision + recall); dump += "precision\t recall \t f1score\t\r\n"; dumpOutputModel.WriteLine("precision\t recall \t f1score\t"); dump += precision.ToString(CultureInfo.InvariantCulture) + "\t" + recall.ToString(CultureInfo.InvariantCulture) + "\t" + f1Score.ToString(CultureInfo.InvariantCulture) +"\r\n"; dumpOutputModel.WriteLine(precision.ToString(CultureInfo.InvariantCulture)+"\t"+ recall.ToString(CultureInfo.InvariantCulture)+ "\t" + f1Score.ToString(CultureInfo.InvariantCulture)); dumpOutputModel.Flush(); return dump; }
public void Init() { var readModel = new ReadModel(InputModelFile + ".preceptron"); var temp = new ReadModel(string.Concat(InputModelFile, ".featuresToK")); var dict = temp.GetFeatureToKdDictionary(); _weightVector = new WeightVector(dict, dict.Count); foreach (var pair in readModel.ModelIterator()) { _weightVector.Add(pair); } _tags = new Tags(_tagList); ViterbiForGLM = new ViterbiForGlobalLinearModel(_weightVector, _tags); }
internal static void CreateInputForCRF(string input, string output) { var reader = new ReadModel(input); var keyWriter = new WriteModel(string.Concat(output, ".key")); var devWriter = new WriteModel(string.Concat(output, ".key.dev")); foreach (var line in reader.GetNextLine()) { var words = line.Split(new[] {' '}); if (words.Length < 4) continue; foreach (var word in words) { if (string.IsNullOrEmpty(word.Trim())) continue; if (word.EndsWith("{LOCATION}")) { keyWriter.WriteLine(word.Replace("{LOCATION}", "") + " " + "LOCATION"); devWriter.WriteLine(word.Replace("{LOCATION}", "")); } else if (word.EndsWith("{LOCATION}.")) { keyWriter.WriteLine(word.Replace("{LOCATION}.", ".") + " " + "LOCATION"); devWriter.WriteLine(word.Replace("{LOCATION}.", ".")); } else { keyWriter.WriteLine(word + " " + "OTHER"); devWriter.WriteLine(word); } } keyWriter.WriteLine(""); devWriter.WriteLine(""); } keyWriter.Flush(); devWriter.Flush(); }
public ProcessRawText(string input, string output) { _reader = new ReadModel(input); _writer = new WriteModel(output); }
public void Parse(string input, string output) { var readModel= new ReadModel(input); var writeModel = new WriteModel(output); var writeDevModel = new WriteModel(output+".dev"); //var tempWrite = new WriteModel(output + "tempWrite"); var temp = new List<string>(); foreach (var line in readModel.GetNextLine()) { var newLine = RemoveTags(line); newLine = ReplaceTags(newLine); newLine = RemoveAllTags(newLine); if (string.IsNullOrEmpty(newLine)) continue; //tempWrite.WriteLine(newLine); var split = newLine.Split(new char[] {' '}); temp.AddRange(split.ToList()); //temp.Add("##NEWLINE##"); } //tempWrite.Flush(); bool location = false; var lastStr = string.Empty; foreach (var tempStr in temp) { var str = tempStr.Trim(); if (string.IsNullOrEmpty(str)) { lastStr = ""; continue; } //if (str.Equals("##NEWLINE##")) { if (!location && lastStr.EndsWith(".") && !IsSalutationAbbr(lastStr)) { lastStr = string.Empty; writeModel.WriteLine(""); writeDevModel.WriteLine(""); continue; } } if (location) { if (str.Equals("##ENDTAG##")) { location = false; lastStr = ""; continue; } writeModel.WriteLine(str + " " + "LOCATION"); writeDevModel.WriteLine(str); lastStr = str; continue; } if (str.Equals("##LOCATIONSTARTTAG##")) { lastStr = ""; location = true; continue; } if (str.Equals("##ENDTAG##")) { lastStr = ""; continue; } writeModel.WriteLine(str + " "+ "OTHER"); writeDevModel.WriteLine(str); lastStr = str; } writeModel.Flush(); writeDevModel.Flush(); }
private Config() { BlackList = new HashSet<string>(); var readBlackList = new ReadModel(blackList); foreach (var line in readBlackList.GetNextLine()) { var word = line.ToLowerInvariant().Trim(); if (string.IsNullOrEmpty(word)) { continue; } BlackList.Add(word); } PronounSet = new HashSet<string>(); var readPronoun = new ReadModel(PronounList); foreach (var line in readPronoun.GetNextLine()) { var word = line.ToLowerInvariant().Trim(); if (string.IsNullOrEmpty(word)) { continue; } PronounSet.Add(word); } ConjunctionSet = new HashSet<string>(); var readConjunction = new ReadModel(ConjuctionList); foreach (var line in readConjunction.GetNextLine()) { var word = line.ToLowerInvariant().Trim(); if (string.IsNullOrEmpty(word)) { continue; } ConjunctionSet.Add(word); } VerbSet = new HashSet<string>(); var readVerb = new ReadModel(VerbList); foreach (var line in readVerb.GetNextLine()) { var word = line.ToLowerInvariant().Trim(); if (string.IsNullOrEmpty(word)) { continue; } VerbSet.Add(word); } ArticleSet = new HashSet<string>(); var readArticle = new ReadModel(ArticleList); foreach (var line in readArticle.GetNextLine()) { var word = line.ToLowerInvariant().Trim(); if (string.IsNullOrEmpty(word)) { continue; } ArticleSet.Add(word); } PrepositionSet = new HashSet<string>(); var readPreposition = new ReadModel(PrepositionList); foreach (var line in readPreposition.GetNextLine()) { var word = line.ToLowerInvariant().Trim(); if (string.IsNullOrEmpty(word)) { continue; } PrepositionSet.Add(word); } SuffixSet = new HashSet<string>(); var readSuffix = new ReadModel(SuffixList); foreach (var line in readSuffix.GetNextLine()) { var word = line.ToLowerInvariant().Trim(); if (string.IsNullOrEmpty(word)) { continue; } SuffixSet.Add(word); } AdjectiveSet = new HashSet<string>(); var readAdjective = new ReadModel(AdjectiveList); foreach (var line in readAdjective.GetNextLine()) { var word = line.ToLowerInvariant().Trim(); if (string.IsNullOrEmpty(word)) { continue; } AdjectiveSet.Add(word); } }