internal static void RunAnswerGeneralizationDev() { var trainDataset = Configuration.GetQuestionDialogsTrain(); var devDataset = Configuration.GetQuestionDialogsDev(); var simpleQuestions = Configuration.GetSimpleQuestionsDump(); var db = Configuration.Db; var trainDialogs = trainDataset.Dialogs.ToArray(); var linkedUtterancesTrain = cachedLinkedUtterancesTrain(simpleQuestions, db, trainDialogs); //var graph = cachedEntityGraph(simpleQuestions, trainDialogs, linkedUtterancesTrain); var graph = new ComposedGraph(new FreebaseGraphLayer(db)); var linker = new GraphDisambiguatedLinker(db, "./verbs.lex"); var cachedLinker = new CachedLinker(trainDialogs.Select(d => d.Question).ToArray(), linkedUtterancesTrain, linker); var generalizer = new PatternGeneralizer(graph, cachedLinker.LinkUtterance); var testDialogs = 0; //train for (var i = 0; i < trainDialogs.Length - testDialogs; ++i) { var trainDialog = trainDialogs[i]; var question = trainDialog.Question; var answerNodeId = FreebaseDbProvider.GetId(trainDialog.AnswerMid); var answerNode = graph.GetNode(answerNodeId); generalizer.AddExample(question, answerNode); } /*/ * //evaluation on dev set * foreach (var devDialog in trainDialogs) * { * writeLine(devDialog.Question); * writeLine("\t" + cachedLinker.LinkUtterance(devDialog.Question)); * var desiredAnswerLabel = db.GetLabel(devDialog.AnswerMid); * writeLine("\tDesired answer: {0} ({1})", desiredAnswerLabel, devDialog.AnswerMid); * var answer = generalizer.GetAnswer(devDialog.Question); * if (answer == null) * { * writeLine("\tNo answer."); * } * else * { * var answerLabel = db.GetLabel(FreebaseLoader.GetMid(answer.Value.Data)); * writeLine("\tGeneralizer output: {0} {1}", answerLabel, answer); * } * writeLine(); * } * /**/ var result = generalizer.GetAnswer("What county is ovens auditorium in"); //var result = generalizer.GetAnswer("What is Obama gender?"); //var result = generalizer.GetAnswer("is mir khasim ali of the male or female gender"); }
private static IEnumerable <string> getQuestionNgrams(QuestionDialog dialog, int n, CachedLinker linker) { var result = new HashSet <string>(); for (var i = 2; i <= n; ++i) { var question = dialog.Question; //result.UnionWith(getNgrams(question, n)); var linkedQuestion = linker.LinkUtterance(question); result.UnionWith(linkedQuestion.GetNgrams(i)); /* * foreach (var explanation in dialog.ExplanationTurns) * { * result.UnionWith(getNgrams(explanation.InputChat, i)); * }*/ } return(result); }
internal static void RunGraphMIExperiment() { var trainDataset = Configuration.GetQuestionDialogsTrain(); var devDataset = Configuration.GetQuestionDialogsDev(); var db = Configuration.Db; var graph = new ComposedGraph(new FreebaseGraphLayer(db)); var trainDialogs = trainDataset.Dialogs.ToArray(); var simpleQuestions = Configuration.GetSimpleQuestionsDump(); var linkedUtterances = cachedLinkedUtterancesTrain(simpleQuestions, db, trainDialogs); var linkedUtterancesTrain = cachedLinkedUtterancesTrain(simpleQuestions, db, trainDialogs); var linker = new GraphDisambiguatedLinker(db, "./verbs.lex"); var cachedLinker = new CachedLinker(trainDialogs.Select(d => d.Question).ToArray(), linkedUtterancesTrain, linker); var totalNgramCounts = new Dictionary <string, int>(); var totalEdgeCounts = new Dictionary <Edge, int>(); var ngramEdgeCounts = new Dictionary <Tuple <string, Edge>, int>(); foreach (var dialog in trainDataset.Dialogs) { var questionNgrams = getQuestionNgrams(dialog, 4, cachedLinker); var linkedQuestion = cachedLinker.LinkUtterance(dialog.Question); Console.WriteLine(dialog.Question); var answerNode = graph.GetNode(db.GetFreebaseId(dialog.AnswerMid)); var targets = graph.GetNeighbours(answerNode, 100); var questionEntities = linkedQuestion.Parts.SelectMany(p => p.Entities.Select(e => db.GetFreebaseId(e.Mid))).ToArray(); var edges = new HashSet <Edge>(); foreach (var target in targets) { var edge = target.Item1; var targetId = target.Item2.Data; if (!edges.Add(edge)) { continue; } if (!questionEntities.Contains(targetId)) { continue; } foreach (var rawNgram in questionNgrams) { if (!rawNgram.Contains(targetId)) { continue; } var ngram = rawNgram.Replace(targetId, "$"); int count; var key = Tuple.Create(ngram, edge); ngramEdgeCounts.TryGetValue(key, out count); ngramEdgeCounts[key] = count + 1; totalNgramCounts.TryGetValue(ngram, out count); totalNgramCounts[ngram] = count + 1; totalEdgeCounts.TryGetValue(edge, out count); totalEdgeCounts[edge] = count + 1; } } } var orderedCounts = ngramEdgeCounts.OrderBy(p => getPmi(p.Key, totalNgramCounts, totalEdgeCounts, ngramEdgeCounts)); foreach (var pair in orderedCounts) { logWriteLine("{0} -> [{1},{2},{3}] {4:0.00}", pair.Key, pair.Value, totalNgramCounts[pair.Key.Item1], totalEdgeCounts[pair.Key.Item2], getPmi(pair.Key, totalNgramCounts, totalEdgeCounts, ngramEdgeCounts)); } }