예제 #1
0
        internal static DiskCachedLinker CreateCachedLinker(FreebaseDbProvider db, string storage)
        {
            var coreLinker = new GraphDisambiguatedLinker(db, "./verbs.lex", useGraphDisambiguation: true);
            var linker     = new DiskCachedLinker("../" + storage + ".link", 1, (u, c) => coreLinker.LinkUtterance(u, c), db);

            linker.CacheResult = true;
            return(linker);
        }
예제 #2
0
        internal static void RunAnswerGeneralizationDev()
        {
            var trainDataset = Configuration.GetQuestionDialogsTrain();
            var devDataset   = Configuration.GetQuestionDialogsDev();

            var simpleQuestions = Configuration.GetSimpleQuestionsDump();
            var db = Configuration.Db;

            var trainDialogs          = trainDataset.Dialogs.ToArray();
            var linkedUtterancesTrain = cachedLinkedUtterancesTrain(simpleQuestions, db, trainDialogs);

            //var graph = cachedEntityGraph(simpleQuestions, trainDialogs, linkedUtterancesTrain);

            var graph = new ComposedGraph(new FreebaseGraphLayer(db));

            var linker       = new GraphDisambiguatedLinker(db, "./verbs.lex");
            var cachedLinker = new CachedLinker(trainDialogs.Select(d => d.Question).ToArray(), linkedUtterancesTrain, linker);
            var generalizer  = new PatternGeneralizer(graph, cachedLinker.LinkUtterance);
            var testDialogs  = 0;

            //train
            for (var i = 0; i < trainDialogs.Length - testDialogs; ++i)
            {
                var trainDialog  = trainDialogs[i];
                var question     = trainDialog.Question;
                var answerNodeId = FreebaseDbProvider.GetId(trainDialog.AnswerMid);
                var answerNode   = graph.GetNode(answerNodeId);

                generalizer.AddExample(question, answerNode);
            }

            /*/
             * //evaluation on dev set
             * foreach (var devDialog in trainDialogs)
             * {
             *  writeLine(devDialog.Question);
             *  writeLine("\t" + cachedLinker.LinkUtterance(devDialog.Question));
             *  var desiredAnswerLabel = db.GetLabel(devDialog.AnswerMid);
             *  writeLine("\tDesired answer: {0} ({1})", desiredAnswerLabel, devDialog.AnswerMid);
             *  var answer = generalizer.GetAnswer(devDialog.Question);
             *  if (answer == null)
             *  {
             *      writeLine("\tNo answer.");
             *  }
             *  else
             *  {
             *      var answerLabel = db.GetLabel(FreebaseLoader.GetMid(answer.Value.Data));
             *      writeLine("\tGeneralizer output: {0} {1}", answerLabel, answer);
             *  }
             *  writeLine();
             * }
             * /**/
            var result = generalizer.GetAnswer("What county is ovens auditorium in");
            //var result = generalizer.GetAnswer("What is Obama gender?");
            //var result = generalizer.GetAnswer("is mir khasim ali of the male or female gender");
        }
예제 #3
0
        private ILinker createLinker(IEnumerable <string> excludedPhrases)
        {
            var coreLinker = new GraphDisambiguatedLinker(_db, "./verbs.lex", useGraphDisambiguation: true);

            coreLinker.SetBlacklistLabels(excludedPhrases);

            var linker = new DiskCachedLinker(ExperimentRootPath + "/experiment_linker.link", 1, (u, c) => coreLinker.LinkUtterance(u, c), _db);

            linker.CacheResult = true;
            return(linker);
        }
예제 #4
0
        internal CachedLinker(string[] utterances, LinkedUtterance[] linkedUtterances, GraphDisambiguatedLinker linker)
        {
            _linker = linker;

            for (var i = 0; i < utterances.Length; ++i)
            {
                var utterance       = utterances[i];
                var linkedUtterance = linkedUtterances[i];

                _cachedUtterances[utterance] = linkedUtterance;
            }
        }
예제 #5
0
        private static LinkedUtterance[] cachedLinkedUtterancesTrain(SimpleQuestionDumpProcessor simpleQuestions, FreebaseDbProvider db, QuestionDialog[] trainDialogs)
        {
            var linkedUtterances = ComputationCache.Load("linked_all_train", 1, () =>
            {
                var linker = new GraphDisambiguatedLinker(db, "./verbs.lex");

                var linkedUtterancesList = new List <LinkedUtterance>();
                foreach (var dialog in trainDialogs)
                {
                    var linkedUtterance = linker.LinkUtterance(dialog.Question, 1).First();
                    linkedUtterancesList.Add(linkedUtterance);
                }
                return(linkedUtterancesList);
            }).ToArray();

            return(linkedUtterances);
        }
예제 #6
0
        internal static void RunGraphMIExperiment()
        {
            var trainDataset = Configuration.GetQuestionDialogsTrain();
            var devDataset   = Configuration.GetQuestionDialogsDev();

            var db    = Configuration.Db;
            var graph = new ComposedGraph(new FreebaseGraphLayer(db));

            var trainDialogs          = trainDataset.Dialogs.ToArray();
            var simpleQuestions       = Configuration.GetSimpleQuestionsDump();
            var linkedUtterances      = cachedLinkedUtterancesTrain(simpleQuestions, db, trainDialogs);
            var linkedUtterancesTrain = cachedLinkedUtterancesTrain(simpleQuestions, db, trainDialogs);
            var linker       = new GraphDisambiguatedLinker(db, "./verbs.lex");
            var cachedLinker = new CachedLinker(trainDialogs.Select(d => d.Question).ToArray(), linkedUtterancesTrain, linker);

            var totalNgramCounts = new Dictionary <string, int>();
            var totalEdgeCounts  = new Dictionary <Edge, int>();
            var ngramEdgeCounts  = new Dictionary <Tuple <string, Edge>, int>();

            foreach (var dialog in trainDataset.Dialogs)
            {
                var questionNgrams = getQuestionNgrams(dialog, 4, cachedLinker);
                var linkedQuestion = cachedLinker.LinkUtterance(dialog.Question);

                Console.WriteLine(dialog.Question);
                var answerNode = graph.GetNode(db.GetFreebaseId(dialog.AnswerMid));
                var targets    = graph.GetNeighbours(answerNode, 100);

                var questionEntities = linkedQuestion.Parts.SelectMany(p => p.Entities.Select(e => db.GetFreebaseId(e.Mid))).ToArray();
                var edges            = new HashSet <Edge>();
                foreach (var target in targets)
                {
                    var edge     = target.Item1;
                    var targetId = target.Item2.Data;
                    if (!edges.Add(edge))
                    {
                        continue;
                    }

                    if (!questionEntities.Contains(targetId))
                    {
                        continue;
                    }

                    foreach (var rawNgram in questionNgrams)
                    {
                        if (!rawNgram.Contains(targetId))
                        {
                            continue;
                        }

                        var ngram = rawNgram.Replace(targetId, "$");

                        int count;
                        var key = Tuple.Create(ngram, edge);
                        ngramEdgeCounts.TryGetValue(key, out count);
                        ngramEdgeCounts[key] = count + 1;

                        totalNgramCounts.TryGetValue(ngram, out count);
                        totalNgramCounts[ngram] = count + 1;

                        totalEdgeCounts.TryGetValue(edge, out count);
                        totalEdgeCounts[edge] = count + 1;
                    }
                }
            }

            var orderedCounts = ngramEdgeCounts.OrderBy(p => getPmi(p.Key, totalNgramCounts, totalEdgeCounts, ngramEdgeCounts));

            foreach (var pair in orderedCounts)
            {
                logWriteLine("{0} -> [{1},{2},{3}] {4:0.00}", pair.Key, pair.Value, totalNgramCounts[pair.Key.Item1], totalEdgeCounts[pair.Key.Item2], getPmi(pair.Key, totalNgramCounts, totalEdgeCounts, ngramEdgeCounts));
            }
        }