/// <summary> /// return a clone of the new bucket /// </summary> /// <param name="token"></param> /// <returns></returns> public override LexiconToken getClone(LexiconToken token) { LexiconPredicate predicateToReplace = new LexiconPredicate(); predicateToReplace.URI = token.URI; predicateToReplace.label = token.label; predicateToReplace.ranges = (token as LexiconPredicate).ranges.ToList(); predicateToReplace.QuestionMatch = token.QuestionMatch; predicateToReplace.score = token.score; predicateToReplace.domains = (token as LexiconPredicate).domains.ToList(); return predicateToReplace; }
/// <summary> /// return a clone of the new bucket /// </summary> /// <param name="token"></param> /// <returns></returns> public override LexiconToken getClone(LexiconToken token) { LexiconPredicate predicateToReplace = new LexiconPredicate(); predicateToReplace.URI = token.URI; predicateToReplace.label = token.label; predicateToReplace.ranges = (token as LexiconPredicate).ranges.ToList(); predicateToReplace.QuestionMatch = token.QuestionMatch; predicateToReplace.score = token.score; predicateToReplace.domains = (token as LexiconPredicate).domains.ToList(); return(predicateToReplace); }
/// <summary> /// get predicates is a method in lexicon class that get all predicates objects that match some words in the Question /// </summary> /// <param name="question">question to get matched predicates of it </param> /// <param name="topN">the number of top matching results to be returned, default = 10</param> /// <param name="Limit">the limit of the number of returned results in the query, default = 20</param> /// <returns>list of top matching LexiconPredicates</returns> public List<LexiconPredicate> getPredicates(string question, int topN = 20, int Limit = 30) { DateTime dt = DateTime.Now; // capturing time for testing List<LexiconPredicate> __predicateList = new List<LexiconPredicate>(); //getting all permutation of words formed from the question string List<string> permutationList = getPermutations(question); //removing permutations that most propbably wont return results and will take time in querying permutationList = trimPermutations(permutationList); //Get the stemmed version of the question words Dictionary<string, List<string>> stemmedWords = GetStemmedWords(question); // to check if the predicates are filled before - so returning the matching predicates only - or not if (predicateFilled) { foreach (LexiconPredicate predicate in predicateList) { if (permutationList.Contains(predicate.QuestionMatch)) { __predicateList.Add(predicate); } } return __predicateList; } else { string bifContainsValue = ""; // iterating over each permutation of Question left and Query them from virtuoso and return predicate list and add them foreach (string questionleft in permutationList) { //Get all forms of questionLeft by replacing words with its stemmed version bifContainsValue = ""; //empty string bifContainsValue +="\'" + questionleft + "\'"; //add the original questionleft //Replace words in questionleft with its stem and add it to the bifContainsValue foreach (string word in stemmedWords.Keys) { if (questionleft.Contains(word)) { foreach (string stem in stemmedWords[word]) //This is created because a wordcan has many stems (rare case) { bifContainsValue += "or\'" + questionleft.Replace(word, stem) + "\'"; } } } string Query = "SELECT * WHERE { { " + "?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label ." + "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#DatatypeProperty>." + "?label bif:contains \"" + bifContainsValue + "\" } " + "union {" + "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#ObjectProperty> ." + "?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label ." + "?label bif:contains \"" + bifContainsValue + "\" } " + "union {" + "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/1999/02/22-rdf-syntax-ns#Property> ." + "?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label ." + "?label bif:contains \"" + bifContainsValue + "\" } " + "} limit " + Limit; //another Query to Get predicates untill deciding which of them is the best using statistics string Query2 = "SELECT ?predicate ?label WHERE { " + "{ ?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label . " + "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?propertyType. " + "?label bif:contains \"" + bifContainsValue + "\" } " + "filter ( ?propertyType = <http://www.w3.org/2002/07/owl#DatatypeProperty> || " + "?propertyType = <http://www.w3.org/2002/07/owl#ObjectProperty> || " + "?propertyType = <http://www.w3.org/1999/02/22-rdf-syntax-ns#Property> ) " + "} limit " + Limit; SparqlRemoteEndpoint remoteEndPoint = new SparqlRemoteEndpoint(new Uri("http://localhost:8890/sparql")); try { //executing the Query and finding results SparqlResultSet resultSet = remoteEndPoint.QueryWithResultSet(Query); //iterating over matched predicates in the resultset foreach (SparqlResult result in resultSet) { INode predicateURI = result.Value("predicate"); INode predicateLabel = result.Value("label"); LexiconPredicate tmplexiconpredicate = new LexiconPredicate(); // check that the property is used .. not a non-used property bool hasResuts = false; string checkQuery = "select distinct * where { ?x <" + predicateURI + "> ?y } limit 1 "; QueryHandler.startConnection(); SparqlResultSet checkResults = QueryHandler.ExecuteQueryWithString(checkQuery); QueryHandler.closeConnection(); if (checkResults.Count != 0) { hasResuts = true; } // check that the predicate doesn't exists in the predicateslist before bool exists = false; foreach (LexiconPredicate x in __predicateList) { // we added Questionmatch == question left bec new predicates may be added with better score that the old ones so this should be considered if (x.URI == predicateURI.ToString() && x.QuestionMatch == questionleft ) { exists = true; break; } } // adding the new predicate to the __predicatelist if (!exists && hasResuts) { tmplexiconpredicate.URI = predicateURI.ToString(); tmplexiconpredicate.QuestionMatch = questionleft; tmplexiconpredicate.label = predicateLabel.ToString(); __predicateList.Add(tmplexiconpredicate); } } } // skipping results that raised timeout exceptions catch { util.log("skipped : " + questionleft + " ---- due to time out "); } } util.log(" finished getting " + __predicateList.Count + " predicates " + " Time taken : " + DateTime.Now.Subtract(dt).TotalMilliseconds + " msec"); // now done of collecting predicates scoring them down and get the best n ones this.predicateList = scorePredicates(__predicateList, topN); this.predicateList = addDomainAndRange(this.predicateList); util.log("total time taken :" + DateTime.Now.Subtract(dt).TotalMilliseconds.ToString() + " mSecs"); predicateFilled = true; return this.predicateList; } }
/// removing the non used predicates domains and the literals type of owners /// </summary> /// <param name="tokens">list </param>of tokens /// <returns>cleaned list of tokens </returns> private List <QueryBucket> cleanBucket(List <QueryBucket> queryBuckets) { #region removing Buckets which still have question left >1 foreach (QueryBucket querybucket in queryBuckets.ToList()) { if (querybucket.questionLeft.Length > 0) { queryBuckets.Remove(querybucket); } } #endregion #region remove Predicates domains and type of owners foreach (QueryBucket bucket in queryBuckets.ToList()) { //adding predicates and literals to a list List <LexiconPredicate> predicateList = new List <LexiconPredicate>(); List <LexiconLiteral> literalList = new List <LexiconLiteral>(); foreach (LexiconToken token in bucket.tokens) { if (token is LexiconPredicate) { predicateList.Add(token as LexiconPredicate); } if (token is LexiconLiteral) { literalList.Add(token as LexiconLiteral); } } if (predicateList.Count > 0) { //removing domains and ranges that are not used foreach (LexiconToken token in bucket.tokens.ToList()) { if (token is LexiconPredicate) { //casting the lexicontoken to lexicon predicate LexiconPredicate oldPredicate = token as LexiconPredicate; //cloning the token to be modified LexiconPredicate predicateToReplace = (LexiconPredicate)token.getClone(token); foreach (string oldPredDomain in oldPredicate.domains.ToList()) { bool exist = false; foreach (LexiconLiteral tmpliteral in literalList) { if (tmpliteral.typeOfOwner.Contains(oldPredDomain)) { exist = true; } } //if this domains doesn't contained in any of literals type of owners remove it as it wont match|join with anything if (!exist) { //old bucket = new bucket and then modify in the new in order then to be able to remove the old predicateToReplace = oldPredicate.getClone(oldPredicate) as LexiconPredicate; //removing domain not used predicateToReplace.domains.Remove(oldPredDomain); //remove the old bucket and replace it with new modified one // needed because of reference issues bucket.tokens.Remove(oldPredicate); bucket.tokens.Add(predicateToReplace); oldPredicate = predicateToReplace; //remove the predicate if it doesnt have any domains left if (oldPredicate.domains.Count == 0) { bucket.tokens.Remove(oldPredicate); predicateList.Remove(oldPredicate as LexiconPredicate); //remove the bucket if it's free from predicates if (bucket.tokens.Count == 0) { queryBuckets.Remove(bucket); } } } } } if (token is LexiconLiteral) { LexiconLiteral oldLiteral = token as LexiconLiteral; LexiconLiteral newLiteral = token.getClone(token) as LexiconLiteral; foreach (string typeofowner in oldLiteral.typeOfOwner.ToList()) { bool exist = false; foreach (LexiconPredicate tmmpredicate in predicateList) { if (tmmpredicate.domains.Contains(typeofowner)) { exist = true; } } if (!exist) { //taking a copy from the old literal in order to remove it from the bucket when replacing it with the newliteral newLiteral = oldLiteral.getClone(oldLiteral) as LexiconLiteral; // removing typeofowner not used newLiteral.typeOfOwner.Remove(typeofowner); // updating the bucket tokens by replacing the old literal with the new one bucket.tokens.Remove(oldLiteral); bucket.tokens.Add(newLiteral); oldLiteral = newLiteral; if (oldLiteral.typeOfOwner.Count == 0) { bucket.tokens.Remove(oldLiteral); literalList.Remove(oldLiteral as LexiconLiteral); //remove the bucket if it's free from Tokens if (bucket.tokens.Count == 0) { queryBuckets.Remove(bucket); } } } } } } } else { bucket.literalOnly = true; } } #endregion #region remove the multiple domains and multiple ranges foreach (QueryBucket bucket in queryBuckets) { foreach (LexiconToken predicateToken in bucket.tokens) { if (predicateToken is LexiconPredicate) { foreach (LexiconToken literalToken in bucket.tokens) { if (literalToken is LexiconLiteral && Enumerable.SequenceEqual((predicateToken as LexiconPredicate).domains, (literalToken as LexiconLiteral).typeOfOwner)) { (predicateToken as LexiconPredicate).domains.RemoveRange(1, (predicateToken as LexiconPredicate).domains.Count - 1); (literalToken as LexiconLiteral).typeOfOwner.RemoveRange(1, (literalToken as LexiconLiteral).typeOfOwner.Count - 1); } } } } } #endregion return(queryBuckets); }
/// <summary> /// get predicates is a method in lexicon class that get all predicates objects that match some words in the Question /// </summary> /// <param name="question">question to get matched predicates of it </param> /// <param name="topN">the number of top matching results to be returned, default = 10</param> /// <param name="Limit">the limit of the number of returned results in the query, default = 20</param> /// <returns>list of top matching LexiconPredicates</returns> public List <LexiconPredicate> getPredicates(string question, int topN = 20, int Limit = 30) { DateTime dt = DateTime.Now; // capturing time for testing List <LexiconPredicate> __predicateList = new List <LexiconPredicate>(); //getting all permutation of words formed from the question string List <string> permutationList = getPermutations(question); //removing permutations that most propbably wont return results and will take time in querying permutationList = trimPermutations(permutationList); //Get the stemmed version of the question words Dictionary <string, List <string> > stemmedWords = GetStemmedWords(question); // to check if the predicates are filled before - so returning the matching predicates only - or not if (predicateFilled) { foreach (LexiconPredicate predicate in predicateList) { if (permutationList.Contains(predicate.QuestionMatch)) { __predicateList.Add(predicate); } } return(__predicateList); } else { string bifContainsValue = ""; // iterating over each permutation of Question left and Query them from virtuoso and return predicate list and add them foreach (string questionleft in permutationList) { //Get all forms of questionLeft by replacing words with its stemmed version bifContainsValue = ""; //empty string bifContainsValue += "\'" + questionleft + "\'"; //add the original questionleft //Replace words in questionleft with its stem and add it to the bifContainsValue foreach (string word in stemmedWords.Keys) { if (questionleft.Contains(word)) { foreach (string stem in stemmedWords[word]) //This is created because a wordcan has many stems (rare case) { bifContainsValue += "or\'" + questionleft.Replace(word, stem) + "\'"; } } } string Query = "SELECT * WHERE { { " + "?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label ." + "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#DatatypeProperty>." + "?label bif:contains \"" + bifContainsValue + "\" } " + "union {" + "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#ObjectProperty> ." + "?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label ." + "?label bif:contains \"" + bifContainsValue + "\" } " + "union {" + "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/1999/02/22-rdf-syntax-ns#Property> ." + "?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label ." + "?label bif:contains \"" + bifContainsValue + "\" } " + "} limit " + Limit; //another Query to Get predicates untill deciding which of them is the best using statistics string Query2 = "SELECT ?predicate ?label WHERE { " + "{ ?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label . " + "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?propertyType. " + "?label bif:contains \"" + bifContainsValue + "\" } " + "filter ( ?propertyType = <http://www.w3.org/2002/07/owl#DatatypeProperty> || " + "?propertyType = <http://www.w3.org/2002/07/owl#ObjectProperty> || " + "?propertyType = <http://www.w3.org/1999/02/22-rdf-syntax-ns#Property> ) " + "} limit " + Limit; //SparqlRemoteEndpoint remoteEndPoint = new SparqlRemoteEndpoint(new Uri("http://localhost:8890/sparql")); try { //executing the Query and finding results //SparqlResultSet resultSet = remoteEndPoint.QueryWithResultSet(Query); SparqlResultSet resultSet = Request.RequestWithHTTP(Query); //iterating over matched predicates in the resultset foreach (SparqlResult result in resultSet) { INode predicateURI = result.Value("predicate"); INode predicateLabel = result.Value("label"); LexiconPredicate tmplexiconpredicate = new LexiconPredicate(); // check that the property is used .. not a non-used property bool hasResuts = false; string checkQuery = "select distinct * where { ?x <" + predicateURI + "> ?y } limit 1 "; //QueryHandler.startConnection(); //SparqlResultSet checkResults = QueryHandler.ExecuteQueryWithString(checkQuery); //QueryHandler.closeConnection(); SparqlResultSet checkResults = Request.RequestWithHTTP(checkQuery); if (checkResults.Count != 0) { hasResuts = true; } // check that the predicate doesn't exists in the predicateslist before bool exists = false; foreach (LexiconPredicate x in __predicateList) { // we added Questionmatch == question left bec new predicates may be added with better score that the old ones so this should be considered if (x.URI == predicateURI.ToString() && x.QuestionMatch == questionleft) { exists = true; break; } } // adding the new predicate to the __predicatelist if (!exists && hasResuts) { tmplexiconpredicate.URI = predicateURI.ToString(); tmplexiconpredicate.QuestionMatch = questionleft; tmplexiconpredicate.label = predicateLabel.ToString(); __predicateList.Add(tmplexiconpredicate); } } } // skipping results that raised timeout exceptions catch { util.log("skipped : " + questionleft + " ---- due to time out "); } } util.log(" finished getting " + __predicateList.Count + " predicates " + " Time taken : " + DateTime.Now.Subtract(dt).TotalMilliseconds + " msec"); // now done of collecting predicates scoring them down and get the best n ones this.predicateList = scorePredicates(__predicateList, topN); this.predicateList = addDomainAndRange(this.predicateList); util.log("total time taken :" + DateTime.Now.Subtract(dt).TotalMilliseconds.ToString() + " mSecs"); predicateFilled = true; return(this.predicateList); } }