/// <summary> /// return a clone of the new bucket /// </summary> /// <param name="token"></param> /// <returns></returns> public override LexiconToken getClone(LexiconToken token) { LexiconPredicate predicateToReplace = new LexiconPredicate(); predicateToReplace.URI = token.URI; predicateToReplace.label = token.label; predicateToReplace.ranges = (token as LexiconPredicate).ranges.ToList(); predicateToReplace.QuestionMatch = token.QuestionMatch; predicateToReplace.score = token.score; predicateToReplace.domains = (token as LexiconPredicate).domains.ToList(); return predicateToReplace; }
/// <summary> /// get predicates is a method in lexicon class that get all predicates objects that match some words in the Question /// </summary> /// <param name="question">question to get matched predicates of it </param> /// <param name="topN">the number of top matching results to be returned, default = 10</param> /// <param name="Limit">the limit of the number of returned results in the query, default = 20</param> /// <returns>list of top matching LexiconPredicates</returns> public List<LexiconPredicate> getPredicates(string question, int topN = 20, int Limit = 30) { DateTime dt = DateTime.Now; // capturing time for testing List<LexiconPredicate> __predicateList = new List<LexiconPredicate>(); //getting all permutation of words formed from the question string List<string> permutationList = getPermutations(question); //removing permutations that most propbably wont return results and will take time in querying permutationList = trimPermutations(permutationList); //Get the stemmed version of the question words Dictionary<string, List<string>> stemmedWords = GetStemmedWords(question); // to check if the predicates are filled before - so returning the matching predicates only - or not if (predicateFilled) { foreach (LexiconPredicate predicate in predicateList) { if (permutationList.Contains(predicate.QuestionMatch)) { __predicateList.Add(predicate); } } return __predicateList; } else { string bifContainsValue = ""; // iterating over each permutation of Question left and Query them from virtuoso and return predicate list and add them foreach (string questionleft in permutationList) { //Get all forms of questionLeft by replacing words with its stemmed version bifContainsValue = ""; //empty string bifContainsValue +="\'" + questionleft + "\'"; //add the original questionleft //Replace words in questionleft with its stem and add it to the bifContainsValue foreach (string word in stemmedWords.Keys) { if (questionleft.Contains(word)) { foreach (string stem in stemmedWords[word]) //This is created because a wordcan has many stems (rare case) { bifContainsValue += "or\'" + questionleft.Replace(word, stem) + "\'"; } } } string Query = "SELECT * WHERE { { " + "?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label ." + "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#DatatypeProperty>." + "?label bif:contains \"" + bifContainsValue + "\" } " + "union {" + "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#ObjectProperty> ." + "?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label ." + "?label bif:contains \"" + bifContainsValue + "\" } " + "union {" + "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/1999/02/22-rdf-syntax-ns#Property> ." + "?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label ." + "?label bif:contains \"" + bifContainsValue + "\" } " + "} limit " + Limit; //another Query to Get predicates untill deciding which of them is the best using statistics string Query2 = "SELECT ?predicate ?label WHERE { " + "{ ?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label . " + "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?propertyType. " + "?label bif:contains \"" + bifContainsValue + "\" } " + "filter ( ?propertyType = <http://www.w3.org/2002/07/owl#DatatypeProperty> || " + "?propertyType = <http://www.w3.org/2002/07/owl#ObjectProperty> || " + "?propertyType = <http://www.w3.org/1999/02/22-rdf-syntax-ns#Property> ) " + "} limit " + Limit; SparqlRemoteEndpoint remoteEndPoint = new SparqlRemoteEndpoint(new Uri("http://localhost:8890/sparql")); try { //executing the Query and finding results SparqlResultSet resultSet = remoteEndPoint.QueryWithResultSet(Query); //iterating over matched predicates in the resultset foreach (SparqlResult result in resultSet) { INode predicateURI = result.Value("predicate"); INode predicateLabel = result.Value("label"); LexiconPredicate tmplexiconpredicate = new LexiconPredicate(); // check that the property is used .. not a non-used property bool hasResuts = false; string checkQuery = "select distinct * where { ?x <" + predicateURI + "> ?y } limit 1 "; QueryHandler.startConnection(); SparqlResultSet checkResults = QueryHandler.ExecuteQueryWithString(checkQuery); QueryHandler.closeConnection(); if (checkResults.Count != 0) { hasResuts = true; } // check that the predicate doesn't exists in the predicateslist before bool exists = false; foreach (LexiconPredicate x in __predicateList) { // we added Questionmatch == question left bec new predicates may be added with better score that the old ones so this should be considered if (x.URI == predicateURI.ToString() && x.QuestionMatch == questionleft ) { exists = true; break; } } // adding the new predicate to the __predicatelist if (!exists && hasResuts) { tmplexiconpredicate.URI = predicateURI.ToString(); tmplexiconpredicate.QuestionMatch = questionleft; tmplexiconpredicate.label = predicateLabel.ToString(); __predicateList.Add(tmplexiconpredicate); } } } // skipping results that raised timeout exceptions catch { util.log("skipped : " + questionleft + " ---- due to time out "); } } util.log(" finished getting " + __predicateList.Count + " predicates " + " Time taken : " + DateTime.Now.Subtract(dt).TotalMilliseconds + " msec"); // now done of collecting predicates scoring them down and get the best n ones this.predicateList = scorePredicates(__predicateList, topN); this.predicateList = addDomainAndRange(this.predicateList); util.log("total time taken :" + DateTime.Now.Subtract(dt).TotalMilliseconds.ToString() + " mSecs"); predicateFilled = true; return this.predicateList; } }