private static IEnumerable <string> getQuestionNgrams(QuestionDialog dialog, int n, CachedLinker linker) { var result = new HashSet <string>(); for (var i = 2; i <= n; ++i) { var question = dialog.Question; //result.UnionWith(getNgrams(question, n)); var linkedQuestion = linker.LinkUtterance(question); result.UnionWith(linkedQuestion.GetNgrams(i)); /* * foreach (var explanation in dialog.ExplanationTurns) * { * result.UnionWith(getNgrams(explanation.InputChat, i)); * }*/ } return(result); }
internal static void RunGraphMIExperiment() { var trainDataset = Configuration.GetQuestionDialogsTrain(); var devDataset = Configuration.GetQuestionDialogsDev(); var db = Configuration.Db; var graph = new ComposedGraph(new FreebaseGraphLayer(db)); var trainDialogs = trainDataset.Dialogs.ToArray(); var simpleQuestions = Configuration.GetSimpleQuestionsDump(); var linkedUtterances = cachedLinkedUtterancesTrain(simpleQuestions, db, trainDialogs); var linkedUtterancesTrain = cachedLinkedUtterancesTrain(simpleQuestions, db, trainDialogs); var linker = new GraphDisambiguatedLinker(db, "./verbs.lex"); var cachedLinker = new CachedLinker(trainDialogs.Select(d => d.Question).ToArray(), linkedUtterancesTrain, linker); var totalNgramCounts = new Dictionary <string, int>(); var totalEdgeCounts = new Dictionary <Edge, int>(); var ngramEdgeCounts = new Dictionary <Tuple <string, Edge>, int>(); foreach (var dialog in trainDataset.Dialogs) { var questionNgrams = getQuestionNgrams(dialog, 4, cachedLinker); var linkedQuestion = cachedLinker.LinkUtterance(dialog.Question); Console.WriteLine(dialog.Question); var answerNode = graph.GetNode(db.GetFreebaseId(dialog.AnswerMid)); var targets = graph.GetNeighbours(answerNode, 100); var questionEntities = linkedQuestion.Parts.SelectMany(p => p.Entities.Select(e => db.GetFreebaseId(e.Mid))).ToArray(); var edges = new HashSet <Edge>(); foreach (var target in targets) { var edge = target.Item1; var targetId = target.Item2.Data; if (!edges.Add(edge)) { continue; } if (!questionEntities.Contains(targetId)) { continue; } foreach (var rawNgram in questionNgrams) { if (!rawNgram.Contains(targetId)) { continue; } var ngram = rawNgram.Replace(targetId, "$"); int count; var key = Tuple.Create(ngram, edge); ngramEdgeCounts.TryGetValue(key, out count); ngramEdgeCounts[key] = count + 1; totalNgramCounts.TryGetValue(ngram, out count); totalNgramCounts[ngram] = count + 1; totalEdgeCounts.TryGetValue(edge, out count); totalEdgeCounts[edge] = count + 1; } } } var orderedCounts = ngramEdgeCounts.OrderBy(p => getPmi(p.Key, totalNgramCounts, totalEdgeCounts, ngramEdgeCounts)); foreach (var pair in orderedCounts) { logWriteLine("{0} -> [{1},{2},{3}] {4:0.00}", pair.Key, pair.Value, totalNgramCounts[pair.Key.Item1], totalEdgeCounts[pair.Key.Item2], getPmi(pair.Key, totalNgramCounts, totalEdgeCounts, ngramEdgeCounts)); } }