private void extractSignature(string question, out string questionSignature, out IEnumerable <NodeReference> questionEntities) { var linkedQuestion = _linker(question); var entities = new List <NodeReference>(); var signature = new StringBuilder(); foreach (var part in linkedQuestion.Parts) { var partEntities = part.Entities.Select(e => _graph.GetNode(FreebaseDbProvider.GetId(e.Mid))).ToArray(); entities.AddRange(partEntities); if (signature.Length > 0) { signature.Append(' '); } if (partEntities.Length == 0) { signature.Append(part.Token); } else { signature.Append("$" + entities.Count); } } questionEntities = entities; questionSignature = signature.ToString().ToLowerInvariant(); }
public void database() { var db = Configuration.Db; var query = GET("query"); SetParam("query", query); if (query != null) { var currentKnowledgeId = FreebaseDbProvider.TryGetId(query); if (db.ContainsId(currentKnowledgeId)) { SetParam("result_entry", db.GetEntryFromId(currentKnowledgeId)); } else { var scores = db.GetScoredDocs(query); var resultEntries = scores.Select(s => db.GetEntry(s)).ToArray(); var resultCount = resultEntries.Length; SetParam("result_entries", resultEntries); SetParam("result_entries_count", resultCount); } } Layout("layout.haml"); Render("database.haml"); }
internal KnowledgeReport(ExtractionKnowledge knowledge, LinkBasedExtractor extractor, QuestionCollection questions, bool fruitOnly) { StoragePath = knowledge.StoragePath; QuestionCount = knowledge.Questions.Count(); var reports = new List <QuestionReport>(); foreach (var question in knowledge.Questions) { if (!question.AnswerHints.Any()) { continue; } var answerId = FreebaseDbProvider.GetId(questions.GetAnswerMid(question.Utterance.OriginalSentence)); var report = new QuestionReport(question, answerId, extractor); if (fruitOnly) { if (report.TopDenotationEvidence < 2 || report.CollectedDenotations.Count() - report.TopDenotationEvidence * 2 >= 0) { //not enough evidence continue; } } reports.Add(report); } Questions = reports.OrderByDescending(r => r.CollectedDenotations.Count()); }
internal static DiskCachedLinker CreateCachedLinker(FreebaseDbProvider db, string storage) { var coreLinker = new GraphDisambiguatedLinker(db, "./verbs.lex", useGraphDisambiguation: true); var linker = new DiskCachedLinker("../" + storage + ".link", 1, (u, c) => coreLinker.LinkUtterance(u, c), db); linker.CacheResult = true; return(linker); }
internal DiskCachedLinker(string cachePath, int version, LinkProvider provider, FreebaseDbProvider db) { _cachePath = cachePath; _version = version; _provider = provider; _db = db; loadCache(); }
private static ILinker getFullDataLinker(FreebaseDbProvider db) { var coreLinker = new GraphDisambiguatedLinker(db, "./verbs.lex", useGraphDisambiguation: true); var linker = new DiskCachedLinker("../full.link", 1, (u, c) => coreLinker.LinkUtterance(u, c), db); linker.CacheResult = false; return(linker); }
private EntityInfo entityInfo(FreebaseEntry entity) { var db = _linker.GetDb(); var entityInfo = db.GetEntityInfoFromMid(FreebaseDbProvider.GetMid(entity.Id)); return(entityInfo); }
internal static void RunAnswerGeneralizationDev() { var trainDataset = Configuration.GetQuestionDialogsTrain(); var devDataset = Configuration.GetQuestionDialogsDev(); var simpleQuestions = Configuration.GetSimpleQuestionsDump(); var db = Configuration.Db; var trainDialogs = trainDataset.Dialogs.ToArray(); var linkedUtterancesTrain = cachedLinkedUtterancesTrain(simpleQuestions, db, trainDialogs); //var graph = cachedEntityGraph(simpleQuestions, trainDialogs, linkedUtterancesTrain); var graph = new ComposedGraph(new FreebaseGraphLayer(db)); var linker = new GraphDisambiguatedLinker(db, "./verbs.lex"); var cachedLinker = new CachedLinker(trainDialogs.Select(d => d.Question).ToArray(), linkedUtterancesTrain, linker); var generalizer = new PatternGeneralizer(graph, cachedLinker.LinkUtterance); var testDialogs = 0; //train for (var i = 0; i < trainDialogs.Length - testDialogs; ++i) { var trainDialog = trainDialogs[i]; var question = trainDialog.Question; var answerNodeId = FreebaseDbProvider.GetId(trainDialog.AnswerMid); var answerNode = graph.GetNode(answerNodeId); generalizer.AddExample(question, answerNode); } /*/ * //evaluation on dev set * foreach (var devDialog in trainDialogs) * { * writeLine(devDialog.Question); * writeLine("\t" + cachedLinker.LinkUtterance(devDialog.Question)); * var desiredAnswerLabel = db.GetLabel(devDialog.AnswerMid); * writeLine("\tDesired answer: {0} ({1})", desiredAnswerLabel, devDialog.AnswerMid); * var answer = generalizer.GetAnswer(devDialog.Question); * if (answer == null) * { * writeLine("\tNo answer."); * } * else * { * var answerLabel = db.GetLabel(FreebaseLoader.GetMid(answer.Value.Data)); * writeLine("\tGeneralizer output: {0} {1}", answerLabel, answer); * } * writeLine(); * } * /**/ var result = generalizer.GetAnswer("What county is ovens auditorium in"); //var result = generalizer.GetAnswer("What is Obama gender?"); //var result = generalizer.GetAnswer("is mir khasim ali of the male or female gender"); }
internal static void DebugInfo(PathSubstitution substitution) { var db = Configuration.Db; Console.WriteLine("Substitution trace: " + substitution.OriginalTrace.ToString()); Console.WriteLine("Rank: " + substitution.Rank); Console.WriteLine("Substitution node: {0} ({1})", db.GetLabel(FreebaseDbProvider.GetMid(substitution.Substitution.Data)), substitution.Substitution); foreach (var node in substitution.OriginalTrace.CurrentNodes.Take(20)) { Console.WriteLine("\t{0} ({1})", db.GetLabel(FreebaseDbProvider.GetMid(node.Data)), node); } }
private static void printInfo(ComposedGraph graph, FreebaseDbProvider db, params string[] ids) { foreach (var id in ids) { var mid = FreebaseDbProvider.GetMid(id); var label = db.GetLabel(mid); var description = db.GetDescription(mid); Console.WriteLine(id + " " + label); Console.WriteLine("\t" + description); Console.WriteLine(); } }
private static IEnumerable <string> getQAEntities(QuestionDialog[] trainDialogs, LinkedUtterance[] utterances) { var result = new List <string>(); foreach (var dialog in trainDialogs) { result.Add(FreebaseDbProvider.GetId(dialog.AnswerMid)); } foreach (var utterance in utterances) { result.AddRange(utterance.Parts.SelectMany(p => p.Entities).Select(e => FreebaseDbProvider.GetId(e.Mid))); } return(result); }
private double questionContextScore(EntityInfo entity, IEnumerable <EntityInfo> questionEntities) { var score = 0.0; var entityIds = new HashSet <string>(questionEntities.Select(e => FreebaseDbProvider.GetId(e.Mid))); var entry = Db.GetEntryFromId(FreebaseDbProvider.GetId(entity.Mid)); foreach (var target in entry.Targets) { if (entityIds.Contains(target.Item2)) { score += 1; } } return(score); }
private static ComposedGraph cachedEntityGraph(SimpleQuestionDumpProcessor simpleQuestions, QuestionDialog[] trainDialogs, LinkedUtterance[] linkedUtterances) { return(ComputationCache.Load("knowledge_all_train", 1, () => { var trainEntities = getQAEntities(trainDialogs, linkedUtterances); //var layer = simpleQuestions.GetLayerFromIds(trainEntities); foreach (var entityId in trainEntities) { simpleQuestions.AddTargetMid(FreebaseDbProvider.GetMid(entityId)); } simpleQuestions.RunIteration(); var layer = simpleQuestions.GetLayerFromIds(simpleQuestions.AllIds); var graph = new ComposedGraph(layer); return graph; })); }
static string getEntityUtterance(LinkedUtterance linkedUtterance) { var result = new List <string>(); foreach (var part in linkedUtterance.Parts) { if (part.Entities.Any()) { result.Add(FreebaseDbProvider.GetId(part.Entities.First().Mid)); } else { result.Add(part.Token); } } return(string.Join(" ", result)); }
internal static void BenchmarkFreebaseProviderNodes() { var db = new FreebaseDbProvider(Configuration.FreebaseDB_Path); var test = db.GetEntryFromId("02rwv9s"); var dump = Configuration.GetSimpleQuestionsDump(); dump.RunIteration(100000); Console.WriteLine("Dump prepared."); var ids = dump.AllIds.ToArray(); Console.WriteLine("Id count " + ids.Length); var idRepetitionCount = 1000; var rnd = new Random(); var output = 0; for (var sample = 0; sample < 1000; ++sample) { var start = DateTime.Now; for (var i = 0; i < idRepetitionCount; ++i) { var rndIndex = rnd.Next(ids.Length); var id = ids[rndIndex]; var info = db.GetEntryFromId(id); if (info == null) { continue; } output += info.Label.Length; } var duration = DateTime.Now - start; Console.WriteLine("Time for entity: {0:0.000}ms", duration.TotalMilliseconds / idRepetitionCount); } Console.WriteLine(output); }
public GraphNavigationExperiment(string experimentsRoot, string experimentId, int taskCount, QuestionDialogDatasetReader seedDialogs) : base(experimentsRoot, experimentId) { _db = Configuration.Db; var phrases = LoadPhrases(seedDialogs, _db); _phrases = phrases.ToArray(); var navigationDataPath = Path.Combine(ExperimentRootPath, "navigation_data.nvd"); _data = new NavigationData(navigationDataPath); _linker = createLinker(_phrases); var writer = new CrowdFlowerCodeWriter(ExperimentRootPath, experimentId); //generate all tasks for (var taskIndex = 0; taskIndex < taskCount; ++taskIndex) { add(taskIndex, writer); } writer.Close(); }
internal QuestionReport(QuestionInfo info, string answerId, LinkBasedExtractor extractor) { var linker = extractor.Linker; Question = linker.LinkUtterance(info.Utterance.OriginalSentence); AnswerLabel = extractor.Db.GetEntryFromId(answerId); var denotations = new List <Tuple <LinkedUtterance, EntityInfo, bool> >(); foreach (var answerHint in info.AnswerHints) { var linkedHint = linker.LinkUtterance(answerHint.OriginalSentence, Question.Entities); var denotation = extractor.ExtractAnswerEntity(info.Utterance.OriginalSentence, answerHint.OriginalSentence).FirstOrDefault(); var item = Tuple.Create(linkedHint, denotation, answerId == FreebaseDbProvider.GetId(denotation.Mid)); denotations.Add(item); } CollectedDenotations = denotations; var denotationCounts = from denotation in denotations group denotation by FreebaseDbProvider.GetId(denotation.Item2.Mid) into grouped select Tuple.Create(grouped.Key, grouped.Count()); var maxDenotation = denotationCounts.OrderByDescending(t => t.Item2).FirstOrDefault(); if (maxDenotation != null && AnswerLabel != null) { HasCorrectDenotation = maxDenotation.Item1 == AnswerLabel.Id; } if (maxDenotation != null) { TopDenotationEvidence = maxDenotation.Item2; } }
public static string LinkedUtteranceLink(LinkedUtterance utterance) { var builder = new StringBuilder(); foreach (var part in utterance.Parts) { if (builder.Length > 0) { builder.Append(' '); } if (!part.Entities.Any()) { builder.Append(part.Token); continue; } var entity = part.Entities.First(); builder.AppendFormat("<a href='/database?query={0}'>[{1}]</a>", FreebaseDbProvider.GetId(entity.Mid), entity.BestAliasMatch); } return(builder.ToString()); }
internal UtteranceLinker(FreebaseDbProvider db, string verbsLexicon = null) { Db = db; _nonInformativeWords2.UnionWith(_nonInformativeWords1); _nonInformativeWords2.UnionWith(loadVerbs(verbsLexicon)); }
public static string EntityLink(EntityInfo entity) { var id = FreebaseDbProvider.GetId(entity.Mid); return(string.Format("<a href='/database?query={0}'>{1} ({0})</a>", id, entity.Label)); }
private static string getNamesRepresentation(string freebaseId, FreebaseDbProvider db) { var names = db.GetNames(freebaseId); return(string.Join(",", names.ToArray()).Replace("\"", "")); }
private string getKey(string utterance, IEnumerable <EntityInfo> context) { if (context == null) { return(utterance); } var contextStr = string.Join(",", context.OrderBy(e => e.Mid).Select(e => FreebaseDbProvider.GetId(e.Mid))); return(utterance + "|" + contextStr); }
static NodeReference getNode(string freebaseMid, ComposedGraph graph) { var id = FreebaseDbProvider.GetId(freebaseMid); return(graph.GetNode(id)); }
internal PopularityMaximizationLinker(FreebaseDbProvider db, string verbsLexicon = null) { _linker = new UtteranceLinker(db, verbsLexicon); }
private static LinkedUtterance[] cachedLinkedUtterancesTrain(SimpleQuestionDumpProcessor simpleQuestions, FreebaseDbProvider db, QuestionDialog[] trainDialogs) { var linkedUtterances = ComputationCache.Load("linked_all_train", 1, () => { var linker = new GraphDisambiguatedLinker(db, "./verbs.lex"); var linkedUtterancesList = new List <LinkedUtterance>(); foreach (var dialog in trainDialogs) { var linkedUtterance = linker.LinkUtterance(dialog.Question, 1).First(); linkedUtterancesList.Add(linkedUtterance); } return(linkedUtterancesList); }).ToArray(); return(linkedUtterances); }
internal GraphDisambiguatedLinker(FreebaseDbProvider db, string verbsLexicon, bool useGraphDisambiguation = true) : base(db, verbsLexicon) { _useDisambiguation = useGraphDisambiguation; }
internal LinkBasedExtractor(ILinker linker, FreebaseDbProvider db) { Db = db; Linker = linker; }
internal static IEnumerable <string> LoadPhrases(QuestionDialogDatasetReader seedDialogs, FreebaseDbProvider db) { var entities = seedDialogs.Dialogs.Select(d => db.GetEntryFromMid(d.AnswerMid)).Where(e => e != null).ToArray(); var phrases = new List <string>(); foreach (var entity in entities) { if (entity.Aliases.Count() < 2) { continue; } var alias = entity.Aliases.First(); if (!meetsPhraseRequirements(alias)) { continue; } phrases.Add(entity.Aliases.First()); } return(phrases.Distinct().ToArray()); }