static void Main(string[] args) { /* CAttributeClassification client = new CAttributeClassification(); client.ReadRealname(@"D:\cuiwanyun\qa\data\SatoriCategoryToRealname.txt"); client.ReadRDF(@"\\graph012\e$\v-wacui\NewSatori\qaFullShort_sampleRDF.txt"); client.OutputAttributeFrequency(@"D:\cuiwanyun\qa\data\AttributeCategoryFrequency_Satorinew.txt"); client.FindMostcategory(@"D:\cuiwanyun\qa\data\AttributeCategoryFrequency_Satorinew.txt", @"D:\cuiwanyun\qa\data\AttributeMaxcategoryFrequency_Satorinew.txt"); client.FindOccurredMaxcategory(@"D:\cuiwanyun\qa\data\AttributeMaxcategoryFrequency_Satorinew.txt", @"D:\cuiwanyun\qa\data\AttributeOccurredmaxcategoryFrequency_Satorinew.txt");*/ /* CPathClassification client = new CPathClassification(); Console.WriteLine(client.GetPathCategory("-population-year")); Console.WriteLine(client.GetPathCategory("-population-number")); Console.WriteLine(client.GetPathCategory("-political appointer"));*/ CClassifyQuestion client = new CClassifyQuestion(); Console.WriteLine(client.GetScategory("What is the area of south africa?")); }
static void Main(string[] args) { CClassifyQuestion client = new CClassifyQuestion(); StreamReader reader = new StreamReader(@"E:\v-wacui\QuestionAnswering\ParsingData\LabeledQuestion.txt"); string currentLine; Dictionary<string,string> categoryToScategory=new Dictionary<string,string>(); Dictionary<string,int> trueFrequency=new Dictionary<string,int>(); Dictionary<string,int> totalFrequency=new Dictionary<string,int>(); categoryToScategory.Add("ENTY", "Text"); categoryToScategory.Add("LOC", "Text"); categoryToScategory.Add("NUM", "Decimal"); categoryToScategory.Add("NUM:date", "Datetime"); categoryToScategory.Add("HUM", "Text"); while ((currentLine = reader.ReadLine()) != null) { int index; index = currentLine.IndexOf(':'); string category = currentLine.Substring(0, index); index = currentLine.IndexOf(' '); if (category == "NUM") { string category2 = currentLine.Substring(0, index); if (category2 == "NUM:date") category = category2; } if (categoryToScategory.ContainsKey(category) == false) continue; string scategory = categoryToScategory[category]; if (trueFrequency.ContainsKey(scategory) == false) { trueFrequency.Add(scategory, 0); totalFrequency.Add(scategory, 0); } string question = currentLine.Substring(index + 1); if (client.GetScategory(question) == scategory) trueFrequency[scategory]++; totalFrequency[scategory]++; } reader.Close(); foreach (string scategory in totalFrequency.Keys) Console.WriteLine(scategory + "\t" + Convert.ToDouble(trueFrequency[scategory]) / totalFrequency[scategory]); }
public void ReadQA(string filePath) { StreamReader reader = new StreamReader(filePath); string currentLine; int lineCount = 0; string resourcefilePath = @"D:\cuiwanyun\qa\data\ResourceData\"; CStringTool.ReadPluralToSingual(resourcefilePath+@"StemmingVerb.txt"); CStringTool.ReadPluralToSingual2(resourcefilePath + @"StemmingNoun.txt"); //NERKBclient.ReadCategoryId(resourcefilePath + @"CategoryId_Satorinew.txt"); //NERKBclient.ReadNER(resourcefilePath + @"qaFullShort_PatternFrequencyNER.txt"); CClassifyQuestion client=new CClassifyQuestion(); while ((currentLine = reader.ReadLine()) != null) { lineCount++; if (lineCount % 100000 == 0) //break; Console.WriteLine(lineCount); if (lineCount == 1) currentLine = "What year was america established? In 1776, when America signed the Declaration of Independence stating their independence from England."; if (visitedLine.Contains(currentLine)) continue; visitedLine.Add(currentLine); string[] tokens = currentLine.Split('\t'); string question = CStringTool.FormalizeString(tokens[0]); string answer = CStringTool.FormalizeStringNumber(tokens[1]); string questionCategory = client.GetScategory(tokens[0]); if (questionCategory == "DESC"||questionCategory=="ABBR") continue; if (question.Contains(" your ") || question.Contains(" you ")) continue; Understand(question, answer,questionCategory); } reader.Close(); }