Esempio n. 1
0
        /// <summary>
        /// return a clone of the new bucket 
        /// </summary>
        /// <param name="token"></param>
        /// <returns></returns>
        public override LexiconToken getClone(LexiconToken token)
        {
            LexiconPredicate predicateToReplace = new LexiconPredicate();
            predicateToReplace.URI = token.URI;
            predicateToReplace.label = token.label;
            predicateToReplace.ranges = (token as LexiconPredicate).ranges.ToList();
            predicateToReplace.QuestionMatch = token.QuestionMatch;
            predicateToReplace.score = token.score;
            predicateToReplace.domains = (token as LexiconPredicate).domains.ToList();

            return predicateToReplace;
        }
Esempio n. 2
0
        /// <summary>
        /// get predicates is a method in lexicon class that get all predicates objects that match some words in the Question 
        /// </summary>
        /// <param name="question">question to get matched predicates of it </param>
        /// <param name="topN">the number of top matching results to be returned, default = 10</param>
        /// <param name="Limit">the limit of the number of returned results in the query, default = 20</param>
        /// <returns>list of top matching LexiconPredicates</returns>
        public List<LexiconPredicate> getPredicates(string question, int topN = 20, int Limit = 30)
        {
            DateTime dt = DateTime.Now;  // capturing time for testing

            List<LexiconPredicate> __predicateList = new List<LexiconPredicate>();

            //getting all permutation of words formed from the question string
            List<string> permutationList = getPermutations(question);

            //removing permutations that most propbably wont return results and will take time in querying
            permutationList = trimPermutations(permutationList);

            //Get the stemmed version of the question words
            Dictionary<string, List<string>> stemmedWords = GetStemmedWords(question);

            // to check if the predicates are filled before - so returning the matching predicates only - or not
            if (predicateFilled)
            {
                foreach (LexiconPredicate predicate in predicateList)
                {
                    if (permutationList.Contains(predicate.QuestionMatch))
                    {
                        __predicateList.Add(predicate);
                    }
                }
                return __predicateList;
            }

            else
            {
                string bifContainsValue = "";

                // iterating over each permutation of Question left and Query them from virtuoso and return predicate list and add them
                foreach (string questionleft in permutationList)
                {
                    //Get all forms of questionLeft by replacing words with its stemmed version
                    bifContainsValue = "";  //empty string

                    bifContainsValue +="\'" + questionleft + "\'";  //add the original questionleft

                    //Replace words in questionleft with its stem and add it to the bifContainsValue
                    foreach (string word in stemmedWords.Keys)
                    {
                        if (questionleft.Contains(word))
                        {
                            foreach (string stem in stemmedWords[word]) //This is created because a wordcan has many stems (rare case)
                            {
                                bifContainsValue += "or\'" + questionleft.Replace(word, stem) + "\'";
                            }
                        }
                    }

                    string Query = "SELECT  * WHERE { { " +
                                    "?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label ." +
                                    "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#DatatypeProperty>." +
                                    "?label bif:contains \"" + bifContainsValue + "\" } " +
                                    "union {" +
                                    "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#ObjectProperty> ." +
                                    "?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label ." +
                                   "?label bif:contains \"" + bifContainsValue + "\" } " +
                                    "union {" +
                                    "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>  <http://www.w3.org/1999/02/22-rdf-syntax-ns#Property>  ." +
                                    "?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label ." +
                                   "?label bif:contains \"" + bifContainsValue + "\" } " +

                                    "} limit " + Limit;

                    //another Query to Get predicates untill deciding which of them is the best using statistics
                    string Query2 = "SELECT  ?predicate ?label WHERE {  " +
                                    "{ ?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label . " +
                                     "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?propertyType. " +
                                     "?label bif:contains \"" + bifContainsValue + "\" } " +

                                    "filter ( ?propertyType = <http://www.w3.org/2002/07/owl#DatatypeProperty> || " +
                                    "?propertyType = <http://www.w3.org/2002/07/owl#ObjectProperty> || " +
                                    "?propertyType = <http://www.w3.org/1999/02/22-rdf-syntax-ns#Property>  ) " +
                                    "} limit " + Limit;

                    SparqlRemoteEndpoint remoteEndPoint = new SparqlRemoteEndpoint(new Uri("http://localhost:8890/sparql"));

                    try
                    {
                        //executing the Query and finding results
                        SparqlResultSet resultSet = remoteEndPoint.QueryWithResultSet(Query);

                        //iterating over matched predicates in the resultset
                        foreach (SparqlResult result in resultSet)
                        {
                            INode predicateURI = result.Value("predicate");
                            INode predicateLabel = result.Value("label");
                            LexiconPredicate tmplexiconpredicate = new LexiconPredicate();

                            // check that the property is used .. not a non-used property
                            bool hasResuts = false;
                            string checkQuery = "select distinct * where { ?x <" + predicateURI + "> ?y } limit 1 ";
                            QueryHandler.startConnection();
                            SparqlResultSet checkResults = QueryHandler.ExecuteQueryWithString(checkQuery);
                            QueryHandler.closeConnection();

                            if (checkResults.Count != 0)
                            {
                                hasResuts = true;
                            }

                            // check that the predicate doesn't exists in the predicateslist before
                            bool exists = false;
                            foreach (LexiconPredicate x in __predicateList)
                            {
                                // we added Questionmatch == question left bec new predicates may be added with better score that the old ones so this should be considered
                                if (x.URI == predicateURI.ToString() && x.QuestionMatch == questionleft )
                                {
                                    exists = true;
                                    break;
                                }
                            }

                            // adding the new predicate to the __predicatelist
                            if (!exists && hasResuts)
                            {
                                tmplexiconpredicate.URI = predicateURI.ToString();
                                tmplexiconpredicate.QuestionMatch = questionleft;
                                tmplexiconpredicate.label = predicateLabel.ToString();
                                __predicateList.Add(tmplexiconpredicate);
                            }
                        }

                    }

                    // skipping results that raised timeout exceptions
                    catch
                    {
                        util.log("skipped : " + questionleft + " ---- due to time out ");
                    }
                }

                util.log(" finished getting " + __predicateList.Count + " predicates " + " Time taken : " + DateTime.Now.Subtract(dt).TotalMilliseconds + " msec");

                // now done of collecting predicates scoring them down and get the best n ones
                this.predicateList = scorePredicates(__predicateList, topN);
                this.predicateList = addDomainAndRange(this.predicateList);

                util.log("total time taken :" + DateTime.Now.Subtract(dt).TotalMilliseconds.ToString() + " mSecs");

                predicateFilled = true;
                return this.predicateList;
            }
        }