public void Setup(bool debug) { var readModel = new ReadModel(InputModelFile); _weightVector = new WeightVector(); foreach (var pair in readModel.ModelIterator()) { _weightVector.Add(pair); } _tags = new Tags(_tagList); _viterbiForGlobalLinearModel = new ViterbiForGlobalLinearModel(_weightVector, _tags); // read input file in a class and per line iterator. var inputData = new ReadInputData(InputTestFile); var writeModel = new WriteModel(_outputTestFile); foreach (var line in inputData.GetSentence()) { List<string> debugList; var outputTags = _viterbiForGlobalLinearModel.Decode(line, debug, out debugList); if (debug) { writeModel.WriteDataWithTagDebug(line, outputTags, debugList); } else { writeModel.WriteDataWithTag(line, outputTags); } } writeModel.Flush(); }
public string Evalulate(string keyFile, string devFile, string dumpFile) { var keyModel = new ReadModel(keyFile); var devModel = new ReadModel(devFile); var dumpOutputModel = new WriteModel(dumpFile); var keyIter = keyModel.ModelIterator().GetEnumerator(); var devIter = devModel.ModelIterator().GetEnumerator(); float expected = 0; float correct = 0; float found = 0; float line = 0; string dump; while (keyIter.MoveNext() && devIter.MoveNext()) { var key = keyIter.Current; var dev = devIter.Current; line++; if (!key.Key.Equals(dev.Key)) { dump = "line: " + line + " " + key.Key + " doesn't match " + dev.Key+"\r\n"; dumpOutputModel.WriteLine(dump); dumpOutputModel.Flush(); return dump; } if (key.Value.Contains("LOCATION")) { expected++; if (!dev.Value.Contains("LOCATION")) continue; found++; correct++; } else if (dev.Value.Contains("LOCATION")) { found++; } } dump = "found: " + found + " expected: " + expected + " correct: " + correct +"\r\n"; dumpOutputModel.WriteLine(dump); float precision = correct/found; float recall = correct/expected; float f1Score = (2*precision*recall)/(precision + recall); dump += "precision\t recall \t f1score\t\r\n"; dumpOutputModel.WriteLine("precision\t recall \t f1score\t"); dump += precision.ToString(CultureInfo.InvariantCulture) + "\t" + recall.ToString(CultureInfo.InvariantCulture) + "\t" + f1Score.ToString(CultureInfo.InvariantCulture) +"\r\n"; dumpOutputModel.WriteLine(precision.ToString(CultureInfo.InvariantCulture)+"\t"+ recall.ToString(CultureInfo.InvariantCulture)+ "\t" + f1Score.ToString(CultureInfo.InvariantCulture)); dumpOutputModel.Flush(); return dump; }
public void Parse(string input, string output) { var readModel= new ReadModel(input); var writeModel = new WriteModel(output); var writeDevModel = new WriteModel(output+".dev"); //var tempWrite = new WriteModel(output + "tempWrite"); var temp = new List<string>(); foreach (var line in readModel.GetNextLine()) { var newLine = RemoveTags(line); newLine = ReplaceTags(newLine); newLine = RemoveAllTags(newLine); if (string.IsNullOrEmpty(newLine)) continue; //tempWrite.WriteLine(newLine); var split = newLine.Split(new char[] {' '}); temp.AddRange(split.ToList()); //temp.Add("##NEWLINE##"); } //tempWrite.Flush(); bool location = false; var lastStr = string.Empty; foreach (var tempStr in temp) { var str = tempStr.Trim(); if (string.IsNullOrEmpty(str)) { lastStr = ""; continue; } //if (str.Equals("##NEWLINE##")) { if (!location && lastStr.EndsWith(".") && !IsSalutationAbbr(lastStr)) { lastStr = string.Empty; writeModel.WriteLine(""); writeDevModel.WriteLine(""); continue; } } if (location) { if (str.Equals("##ENDTAG##")) { location = false; lastStr = ""; continue; } writeModel.WriteLine(str + " " + "LOCATION"); writeDevModel.WriteLine(str); lastStr = str; continue; } if (str.Equals("##LOCATIONSTARTTAG##")) { lastStr = ""; location = true; continue; } if (str.Equals("##ENDTAG##")) { lastStr = ""; continue; } writeModel.WriteLine(str + " "+ "OTHER"); writeDevModel.WriteLine(str); lastStr = str; } writeModel.Flush(); writeDevModel.Flush(); }