private void extractSignature(string question, out string questionSignature, out IEnumerable <NodeReference> questionEntities)
        {
            var linkedQuestion = _linker(question);
            var entities       = new List <NodeReference>();

            var signature = new StringBuilder();

            foreach (var part in linkedQuestion.Parts)
            {
                var partEntities = part.Entities.Select(e => _graph.GetNode(FreebaseDbProvider.GetId(e.Mid))).ToArray();
                entities.AddRange(partEntities);

                if (signature.Length > 0)
                {
                    signature.Append(' ');
                }

                if (partEntities.Length == 0)
                {
                    signature.Append(part.Token);
                }
                else
                {
                    signature.Append("$" + entities.Count);
                }
            }

            questionEntities  = entities;
            questionSignature = signature.ToString().ToLowerInvariant();
        }
示例#2
0
        public void database()
        {
            var db = Configuration.Db;

            var query = GET("query");

            SetParam("query", query);

            if (query != null)
            {
                var currentKnowledgeId = FreebaseDbProvider.TryGetId(query);
                if (db.ContainsId(currentKnowledgeId))
                {
                    SetParam("result_entry", db.GetEntryFromId(currentKnowledgeId));
                }
                else
                {
                    var scores        = db.GetScoredDocs(query);
                    var resultEntries = scores.Select(s => db.GetEntry(s)).ToArray();
                    var resultCount   = resultEntries.Length;

                    SetParam("result_entries", resultEntries);
                    SetParam("result_entries_count", resultCount);
                }
            }

            Layout("layout.haml");
            Render("database.haml");
        }
示例#3
0
        internal KnowledgeReport(ExtractionKnowledge knowledge, LinkBasedExtractor extractor, QuestionCollection questions, bool fruitOnly)
        {
            StoragePath   = knowledge.StoragePath;
            QuestionCount = knowledge.Questions.Count();
            var reports = new List <QuestionReport>();

            foreach (var question in knowledge.Questions)
            {
                if (!question.AnswerHints.Any())
                {
                    continue;
                }

                var answerId = FreebaseDbProvider.GetId(questions.GetAnswerMid(question.Utterance.OriginalSentence));
                var report   = new QuestionReport(question, answerId, extractor);
                if (fruitOnly)
                {
                    if (report.TopDenotationEvidence < 2 || report.CollectedDenotations.Count() - report.TopDenotationEvidence * 2 >= 0)
                    {
                        //not enough evidence
                        continue;
                    }
                }

                reports.Add(report);
            }

            Questions = reports.OrderByDescending(r => r.CollectedDenotations.Count());
        }
示例#4
0
        internal static DiskCachedLinker CreateCachedLinker(FreebaseDbProvider db, string storage)
        {
            var coreLinker = new GraphDisambiguatedLinker(db, "./verbs.lex", useGraphDisambiguation: true);
            var linker     = new DiskCachedLinker("../" + storage + ".link", 1, (u, c) => coreLinker.LinkUtterance(u, c), db);

            linker.CacheResult = true;
            return(linker);
        }
示例#5
0
 internal DiskCachedLinker(string cachePath, int version, LinkProvider provider, FreebaseDbProvider db)
 {
     _cachePath = cachePath;
     _version   = version;
     _provider  = provider;
     _db        = db;
     loadCache();
 }
示例#6
0
        private static ILinker getFullDataLinker(FreebaseDbProvider db)
        {
            var coreLinker = new GraphDisambiguatedLinker(db, "./verbs.lex", useGraphDisambiguation: true);
            var linker     = new DiskCachedLinker("../full.link", 1, (u, c) => coreLinker.LinkUtterance(u, c), db);

            linker.CacheResult = false;
            return(linker);
        }
        private EntityInfo entityInfo(FreebaseEntry entity)
        {
            var db = _linker.GetDb();

            var entityInfo = db.GetEntityInfoFromMid(FreebaseDbProvider.GetMid(entity.Id));

            return(entityInfo);
        }
        internal static void RunAnswerGeneralizationDev()
        {
            var trainDataset = Configuration.GetQuestionDialogsTrain();
            var devDataset   = Configuration.GetQuestionDialogsDev();

            var simpleQuestions = Configuration.GetSimpleQuestionsDump();
            var db = Configuration.Db;

            var trainDialogs          = trainDataset.Dialogs.ToArray();
            var linkedUtterancesTrain = cachedLinkedUtterancesTrain(simpleQuestions, db, trainDialogs);

            //var graph = cachedEntityGraph(simpleQuestions, trainDialogs, linkedUtterancesTrain);

            var graph = new ComposedGraph(new FreebaseGraphLayer(db));

            var linker       = new GraphDisambiguatedLinker(db, "./verbs.lex");
            var cachedLinker = new CachedLinker(trainDialogs.Select(d => d.Question).ToArray(), linkedUtterancesTrain, linker);
            var generalizer  = new PatternGeneralizer(graph, cachedLinker.LinkUtterance);
            var testDialogs  = 0;

            //train
            for (var i = 0; i < trainDialogs.Length - testDialogs; ++i)
            {
                var trainDialog  = trainDialogs[i];
                var question     = trainDialog.Question;
                var answerNodeId = FreebaseDbProvider.GetId(trainDialog.AnswerMid);
                var answerNode   = graph.GetNode(answerNodeId);

                generalizer.AddExample(question, answerNode);
            }

            /*/
             * //evaluation on dev set
             * foreach (var devDialog in trainDialogs)
             * {
             *  writeLine(devDialog.Question);
             *  writeLine("\t" + cachedLinker.LinkUtterance(devDialog.Question));
             *  var desiredAnswerLabel = db.GetLabel(devDialog.AnswerMid);
             *  writeLine("\tDesired answer: {0} ({1})", desiredAnswerLabel, devDialog.AnswerMid);
             *  var answer = generalizer.GetAnswer(devDialog.Question);
             *  if (answer == null)
             *  {
             *      writeLine("\tNo answer.");
             *  }
             *  else
             *  {
             *      var answerLabel = db.GetLabel(FreebaseLoader.GetMid(answer.Value.Data));
             *      writeLine("\tGeneralizer output: {0} {1}", answerLabel, answer);
             *  }
             *  writeLine();
             * }
             * /**/
            var result = generalizer.GetAnswer("What county is ovens auditorium in");
            //var result = generalizer.GetAnswer("What is Obama gender?");
            //var result = generalizer.GetAnswer("is mir khasim ali of the male or female gender");
        }
        internal static void DebugInfo(PathSubstitution substitution)
        {
            var db = Configuration.Db;

            Console.WriteLine("Substitution trace: " + substitution.OriginalTrace.ToString());
            Console.WriteLine("Rank: " + substitution.Rank);
            Console.WriteLine("Substitution node: {0} ({1})", db.GetLabel(FreebaseDbProvider.GetMid(substitution.Substitution.Data)), substitution.Substitution);
            foreach (var node in substitution.OriginalTrace.CurrentNodes.Take(20))
            {
                Console.WriteLine("\t{0} ({1})", db.GetLabel(FreebaseDbProvider.GetMid(node.Data)), node);
            }
        }
示例#10
0
        private static void printInfo(ComposedGraph graph, FreebaseDbProvider db, params string[] ids)
        {
            foreach (var id in ids)
            {
                var mid = FreebaseDbProvider.GetMid(id);

                var label       = db.GetLabel(mid);
                var description = db.GetDescription(mid);

                Console.WriteLine(id + " " + label);
                Console.WriteLine("\t" + description);
                Console.WriteLine();
            }
        }
示例#11
0
        private static IEnumerable <string> getQAEntities(QuestionDialog[] trainDialogs, LinkedUtterance[] utterances)
        {
            var result = new List <string>();

            foreach (var dialog in trainDialogs)
            {
                result.Add(FreebaseDbProvider.GetId(dialog.AnswerMid));
            }

            foreach (var utterance in utterances)
            {
                result.AddRange(utterance.Parts.SelectMany(p => p.Entities).Select(e => FreebaseDbProvider.GetId(e.Mid)));
            }

            return(result);
        }
示例#12
0
        private double questionContextScore(EntityInfo entity, IEnumerable <EntityInfo> questionEntities)
        {
            var score     = 0.0;
            var entityIds = new HashSet <string>(questionEntities.Select(e => FreebaseDbProvider.GetId(e.Mid)));

            var entry = Db.GetEntryFromId(FreebaseDbProvider.GetId(entity.Mid));

            foreach (var target in entry.Targets)
            {
                if (entityIds.Contains(target.Item2))
                {
                    score += 1;
                }
            }

            return(score);
        }
示例#13
0
        private static ComposedGraph cachedEntityGraph(SimpleQuestionDumpProcessor simpleQuestions, QuestionDialog[] trainDialogs, LinkedUtterance[] linkedUtterances)
        {
            return(ComputationCache.Load("knowledge_all_train", 1, () =>
            {
                var trainEntities = getQAEntities(trainDialogs, linkedUtterances);
                //var layer = simpleQuestions.GetLayerFromIds(trainEntities);

                foreach (var entityId in trainEntities)
                {
                    simpleQuestions.AddTargetMid(FreebaseDbProvider.GetMid(entityId));
                }
                simpleQuestions.RunIteration();
                var layer = simpleQuestions.GetLayerFromIds(simpleQuestions.AllIds);
                var graph = new ComposedGraph(layer);
                return graph;
            }));
        }
示例#14
0
        static string getEntityUtterance(LinkedUtterance linkedUtterance)
        {
            var result = new List <string>();

            foreach (var part in linkedUtterance.Parts)
            {
                if (part.Entities.Any())
                {
                    result.Add(FreebaseDbProvider.GetId(part.Entities.First().Mid));
                }
                else
                {
                    result.Add(part.Token);
                }
            }

            return(string.Join(" ", result));
        }
示例#15
0
        internal static void BenchmarkFreebaseProviderNodes()
        {
            var db   = new FreebaseDbProvider(Configuration.FreebaseDB_Path);
            var test = db.GetEntryFromId("02rwv9s");

            var dump = Configuration.GetSimpleQuestionsDump();

            dump.RunIteration(100000);
            Console.WriteLine("Dump prepared.");
            var ids = dump.AllIds.ToArray();

            Console.WriteLine("Id count " + ids.Length);

            var idRepetitionCount = 1000;

            var rnd    = new Random();
            var output = 0;

            for (var sample = 0; sample < 1000; ++sample)
            {
                var start = DateTime.Now;
                for (var i = 0; i < idRepetitionCount; ++i)
                {
                    var rndIndex = rnd.Next(ids.Length);
                    var id       = ids[rndIndex];

                    var info = db.GetEntryFromId(id);
                    if (info == null)
                    {
                        continue;
                    }

                    output += info.Label.Length;
                }
                var duration = DateTime.Now - start;
                Console.WriteLine("Time for entity: {0:0.000}ms", duration.TotalMilliseconds / idRepetitionCount);
            }

            Console.WriteLine(output);
        }
示例#16
0
        public GraphNavigationExperiment(string experimentsRoot, string experimentId, int taskCount, QuestionDialogDatasetReader seedDialogs)
            : base(experimentsRoot, experimentId)
        {
            _db = Configuration.Db;
            var phrases = LoadPhrases(seedDialogs, _db);

            _phrases = phrases.ToArray();
            var navigationDataPath = Path.Combine(ExperimentRootPath, "navigation_data.nvd");

            _data = new NavigationData(navigationDataPath);

            _linker = createLinker(_phrases);

            var writer = new CrowdFlowerCodeWriter(ExperimentRootPath, experimentId);

            //generate all tasks
            for (var taskIndex = 0; taskIndex < taskCount; ++taskIndex)
            {
                add(taskIndex, writer);
            }

            writer.Close();
        }
示例#17
0
        internal QuestionReport(QuestionInfo info, string answerId, LinkBasedExtractor extractor)
        {
            var linker = extractor.Linker;

            Question = linker.LinkUtterance(info.Utterance.OriginalSentence);

            AnswerLabel = extractor.Db.GetEntryFromId(answerId);
            var denotations = new List <Tuple <LinkedUtterance, EntityInfo, bool> >();

            foreach (var answerHint in info.AnswerHints)
            {
                var linkedHint = linker.LinkUtterance(answerHint.OriginalSentence, Question.Entities);
                var denotation = extractor.ExtractAnswerEntity(info.Utterance.OriginalSentence, answerHint.OriginalSentence).FirstOrDefault();

                var item = Tuple.Create(linkedHint, denotation, answerId == FreebaseDbProvider.GetId(denotation.Mid));
                denotations.Add(item);
            }

            CollectedDenotations = denotations;

            var denotationCounts = from denotation in denotations
                                   group denotation by FreebaseDbProvider.GetId(denotation.Item2.Mid)
                                   into grouped
                                   select Tuple.Create(grouped.Key, grouped.Count());

            var maxDenotation = denotationCounts.OrderByDescending(t => t.Item2).FirstOrDefault();

            if (maxDenotation != null && AnswerLabel != null)
            {
                HasCorrectDenotation = maxDenotation.Item1 == AnswerLabel.Id;
            }

            if (maxDenotation != null)
            {
                TopDenotationEvidence = maxDenotation.Item2;
            }
        }
示例#18
0
        public static string LinkedUtteranceLink(LinkedUtterance utterance)
        {
            var builder = new StringBuilder();

            foreach (var part in utterance.Parts)
            {
                if (builder.Length > 0)
                {
                    builder.Append(' ');
                }

                if (!part.Entities.Any())
                {
                    builder.Append(part.Token);
                    continue;
                }

                var entity = part.Entities.First();

                builder.AppendFormat("<a href='/database?query={0}'>[{1}]</a>", FreebaseDbProvider.GetId(entity.Mid), entity.BestAliasMatch);
            }

            return(builder.ToString());
        }
示例#19
0
 internal UtteranceLinker(FreebaseDbProvider db, string verbsLexicon = null)
 {
     Db = db;
     _nonInformativeWords2.UnionWith(_nonInformativeWords1);
     _nonInformativeWords2.UnionWith(loadVerbs(verbsLexicon));
 }
示例#20
0
        public static string EntityLink(EntityInfo entity)
        {
            var id = FreebaseDbProvider.GetId(entity.Mid);

            return(string.Format("<a href='/database?query={0}'>{1} ({0})</a>", id, entity.Label));
        }
示例#21
0
        private static string getNamesRepresentation(string freebaseId, FreebaseDbProvider db)
        {
            var names = db.GetNames(freebaseId);

            return(string.Join(",", names.ToArray()).Replace("\"", ""));
        }
示例#22
0
        private string getKey(string utterance, IEnumerable <EntityInfo> context)
        {
            if (context == null)
            {
                return(utterance);
            }

            var contextStr = string.Join(",", context.OrderBy(e => e.Mid).Select(e => FreebaseDbProvider.GetId(e.Mid)));

            return(utterance + "|" + contextStr);
        }
示例#23
0
        static NodeReference getNode(string freebaseMid, ComposedGraph graph)
        {
            var id = FreebaseDbProvider.GetId(freebaseMid);

            return(graph.GetNode(id));
        }
 internal PopularityMaximizationLinker(FreebaseDbProvider db, string verbsLexicon = null)
 {
     _linker = new UtteranceLinker(db, verbsLexicon);
 }
示例#25
0
        private static LinkedUtterance[] cachedLinkedUtterancesTrain(SimpleQuestionDumpProcessor simpleQuestions, FreebaseDbProvider db, QuestionDialog[] trainDialogs)
        {
            var linkedUtterances = ComputationCache.Load("linked_all_train", 1, () =>
            {
                var linker = new GraphDisambiguatedLinker(db, "./verbs.lex");

                var linkedUtterancesList = new List <LinkedUtterance>();
                foreach (var dialog in trainDialogs)
                {
                    var linkedUtterance = linker.LinkUtterance(dialog.Question, 1).First();
                    linkedUtterancesList.Add(linkedUtterance);
                }
                return(linkedUtterancesList);
            }).ToArray();

            return(linkedUtterances);
        }
 internal GraphDisambiguatedLinker(FreebaseDbProvider db, string verbsLexicon, bool useGraphDisambiguation = true)
     : base(db, verbsLexicon)
 {
     _useDisambiguation = useGraphDisambiguation;
 }
示例#27
0
 internal LinkBasedExtractor(ILinker linker, FreebaseDbProvider db)
 {
     Db     = db;
     Linker = linker;
 }
示例#28
0
        internal static IEnumerable <string> LoadPhrases(QuestionDialogDatasetReader seedDialogs, FreebaseDbProvider db)
        {
            var entities = seedDialogs.Dialogs.Select(d => db.GetEntryFromMid(d.AnswerMid)).Where(e => e != null).ToArray();

            var phrases = new List <string>();

            foreach (var entity in entities)
            {
                if (entity.Aliases.Count() < 2)
                {
                    continue;
                }

                var alias = entity.Aliases.First();
                if (!meetsPhraseRequirements(alias))
                {
                    continue;
                }

                phrases.Add(entity.Aliases.First());
            }

            return(phrases.Distinct().ToArray());
        }