Пример #1
0
        internal static void RunAnswerGeneralizationDev()
        {
            var trainDataset = Configuration.GetQuestionDialogsTrain();
            var devDataset   = Configuration.GetQuestionDialogsDev();

            var simpleQuestions = Configuration.GetSimpleQuestionsDump();
            var db = Configuration.Db;

            var trainDialogs          = trainDataset.Dialogs.ToArray();
            var linkedUtterancesTrain = cachedLinkedUtterancesTrain(simpleQuestions, db, trainDialogs);

            //var graph = cachedEntityGraph(simpleQuestions, trainDialogs, linkedUtterancesTrain);

            var graph = new ComposedGraph(new FreebaseGraphLayer(db));

            var linker       = new GraphDisambiguatedLinker(db, "./verbs.lex");
            var cachedLinker = new CachedLinker(trainDialogs.Select(d => d.Question).ToArray(), linkedUtterancesTrain, linker);
            var generalizer  = new PatternGeneralizer(graph, cachedLinker.LinkUtterance);
            var testDialogs  = 0;

            //train
            for (var i = 0; i < trainDialogs.Length - testDialogs; ++i)
            {
                var trainDialog  = trainDialogs[i];
                var question     = trainDialog.Question;
                var answerNodeId = FreebaseDbProvider.GetId(trainDialog.AnswerMid);
                var answerNode   = graph.GetNode(answerNodeId);

                generalizer.AddExample(question, answerNode);
            }

            /*/
             * //evaluation on dev set
             * foreach (var devDialog in trainDialogs)
             * {
             *  writeLine(devDialog.Question);
             *  writeLine("\t" + cachedLinker.LinkUtterance(devDialog.Question));
             *  var desiredAnswerLabel = db.GetLabel(devDialog.AnswerMid);
             *  writeLine("\tDesired answer: {0} ({1})", desiredAnswerLabel, devDialog.AnswerMid);
             *  var answer = generalizer.GetAnswer(devDialog.Question);
             *  if (answer == null)
             *  {
             *      writeLine("\tNo answer.");
             *  }
             *  else
             *  {
             *      var answerLabel = db.GetLabel(FreebaseLoader.GetMid(answer.Value.Data));
             *      writeLine("\tGeneralizer output: {0} {1}", answerLabel, answer);
             *  }
             *  writeLine();
             * }
             * /**/
            var result = generalizer.GetAnswer("What county is ovens auditorium in");
            //var result = generalizer.GetAnswer("What is Obama gender?");
            //var result = generalizer.GetAnswer("is mir khasim ali of the male or female gender");
        }
Пример #2
0
        private static IEnumerable <string> getQuestionNgrams(QuestionDialog dialog, int n, CachedLinker linker)
        {
            var result = new HashSet <string>();

            for (var i = 2; i <= n; ++i)
            {
                var question = dialog.Question;
                //result.UnionWith(getNgrams(question, n));

                var linkedQuestion = linker.LinkUtterance(question);
                result.UnionWith(linkedQuestion.GetNgrams(i));

                /*
                 * foreach (var explanation in dialog.ExplanationTurns)
                 * {
                 *  result.UnionWith(getNgrams(explanation.InputChat, i));
                 * }*/
            }

            return(result);
        }
Пример #3
0
        internal static void RunGraphMIExperiment()
        {
            var trainDataset = Configuration.GetQuestionDialogsTrain();
            var devDataset   = Configuration.GetQuestionDialogsDev();

            var db    = Configuration.Db;
            var graph = new ComposedGraph(new FreebaseGraphLayer(db));

            var trainDialogs          = trainDataset.Dialogs.ToArray();
            var simpleQuestions       = Configuration.GetSimpleQuestionsDump();
            var linkedUtterances      = cachedLinkedUtterancesTrain(simpleQuestions, db, trainDialogs);
            var linkedUtterancesTrain = cachedLinkedUtterancesTrain(simpleQuestions, db, trainDialogs);
            var linker       = new GraphDisambiguatedLinker(db, "./verbs.lex");
            var cachedLinker = new CachedLinker(trainDialogs.Select(d => d.Question).ToArray(), linkedUtterancesTrain, linker);

            var totalNgramCounts = new Dictionary <string, int>();
            var totalEdgeCounts  = new Dictionary <Edge, int>();
            var ngramEdgeCounts  = new Dictionary <Tuple <string, Edge>, int>();

            foreach (var dialog in trainDataset.Dialogs)
            {
                var questionNgrams = getQuestionNgrams(dialog, 4, cachedLinker);
                var linkedQuestion = cachedLinker.LinkUtterance(dialog.Question);

                Console.WriteLine(dialog.Question);
                var answerNode = graph.GetNode(db.GetFreebaseId(dialog.AnswerMid));
                var targets    = graph.GetNeighbours(answerNode, 100);

                var questionEntities = linkedQuestion.Parts.SelectMany(p => p.Entities.Select(e => db.GetFreebaseId(e.Mid))).ToArray();
                var edges            = new HashSet <Edge>();
                foreach (var target in targets)
                {
                    var edge     = target.Item1;
                    var targetId = target.Item2.Data;
                    if (!edges.Add(edge))
                    {
                        continue;
                    }

                    if (!questionEntities.Contains(targetId))
                    {
                        continue;
                    }

                    foreach (var rawNgram in questionNgrams)
                    {
                        if (!rawNgram.Contains(targetId))
                        {
                            continue;
                        }

                        var ngram = rawNgram.Replace(targetId, "$");

                        int count;
                        var key = Tuple.Create(ngram, edge);
                        ngramEdgeCounts.TryGetValue(key, out count);
                        ngramEdgeCounts[key] = count + 1;

                        totalNgramCounts.TryGetValue(ngram, out count);
                        totalNgramCounts[ngram] = count + 1;

                        totalEdgeCounts.TryGetValue(edge, out count);
                        totalEdgeCounts[edge] = count + 1;
                    }
                }
            }

            var orderedCounts = ngramEdgeCounts.OrderBy(p => getPmi(p.Key, totalNgramCounts, totalEdgeCounts, ngramEdgeCounts));

            foreach (var pair in orderedCounts)
            {
                logWriteLine("{0} -> [{1},{2},{3}] {4:0.00}", pair.Key, pair.Value, totalNgramCounts[pair.Key.Item1], totalEdgeCounts[pair.Key.Item2], getPmi(pair.Key, totalNgramCounts, totalEdgeCounts, ngramEdgeCounts));
            }
        }