Пример #1
0
        /// <summary>
        /// return a clone of the new bucket 
        /// </summary>
        /// <param name="token"></param>
        /// <returns></returns>
        public override LexiconToken getClone(LexiconToken token)
        {
            LexiconPredicate predicateToReplace = new LexiconPredicate();
            predicateToReplace.URI = token.URI;
            predicateToReplace.label = token.label;
            predicateToReplace.ranges = (token as LexiconPredicate).ranges.ToList();
            predicateToReplace.QuestionMatch = token.QuestionMatch;
            predicateToReplace.score = token.score;
            predicateToReplace.domains = (token as LexiconPredicate).domains.ToList();

            return predicateToReplace;
        }
Пример #2
0
        /// <summary>
        /// return a clone of the new bucket
        /// </summary>
        /// <param name="token"></param>
        /// <returns></returns>
        public override LexiconToken getClone(LexiconToken token)
        {
            LexiconPredicate predicateToReplace = new LexiconPredicate();

            predicateToReplace.URI           = token.URI;
            predicateToReplace.label         = token.label;
            predicateToReplace.ranges        = (token as LexiconPredicate).ranges.ToList();
            predicateToReplace.QuestionMatch = token.QuestionMatch;
            predicateToReplace.score         = token.score;
            predicateToReplace.domains       = (token as LexiconPredicate).domains.ToList();


            return(predicateToReplace);
        }
Пример #3
0
        /// <summary>
        /// get predicates is a method in lexicon class that get all predicates objects that match some words in the Question 
        /// </summary>
        /// <param name="question">question to get matched predicates of it </param>
        /// <param name="topN">the number of top matching results to be returned, default = 10</param>
        /// <param name="Limit">the limit of the number of returned results in the query, default = 20</param>
        /// <returns>list of top matching LexiconPredicates</returns>
        public List<LexiconPredicate> getPredicates(string question, int topN = 20, int Limit = 30)
        {
            DateTime dt = DateTime.Now;  // capturing time for testing

            List<LexiconPredicate> __predicateList = new List<LexiconPredicate>();

            //getting all permutation of words formed from the question string
            List<string> permutationList = getPermutations(question);

            //removing permutations that most propbably wont return results and will take time in querying
            permutationList = trimPermutations(permutationList);

            //Get the stemmed version of the question words
            Dictionary<string, List<string>> stemmedWords = GetStemmedWords(question);

            // to check if the predicates are filled before - so returning the matching predicates only - or not
            if (predicateFilled)
            {
                foreach (LexiconPredicate predicate in predicateList)
                {
                    if (permutationList.Contains(predicate.QuestionMatch))
                    {
                        __predicateList.Add(predicate);
                    }
                }
                return __predicateList;
            }

            else
            {
                string bifContainsValue = "";

                // iterating over each permutation of Question left and Query them from virtuoso and return predicate list and add them
                foreach (string questionleft in permutationList)
                {
                    //Get all forms of questionLeft by replacing words with its stemmed version
                    bifContainsValue = "";  //empty string

                    bifContainsValue +="\'" + questionleft + "\'";  //add the original questionleft

                    //Replace words in questionleft with its stem and add it to the bifContainsValue
                    foreach (string word in stemmedWords.Keys)
                    {
                        if (questionleft.Contains(word))
                        {
                            foreach (string stem in stemmedWords[word]) //This is created because a wordcan has many stems (rare case)
                            {
                                bifContainsValue += "or\'" + questionleft.Replace(word, stem) + "\'";
                            }
                        }
                    }

                    string Query = "SELECT  * WHERE { { " +
                                    "?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label ." +
                                    "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#DatatypeProperty>." +
                                    "?label bif:contains \"" + bifContainsValue + "\" } " +
                                    "union {" +
                                    "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#ObjectProperty> ." +
                                    "?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label ." +
                                   "?label bif:contains \"" + bifContainsValue + "\" } " +
                                    "union {" +
                                    "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>  <http://www.w3.org/1999/02/22-rdf-syntax-ns#Property>  ." +
                                    "?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label ." +
                                   "?label bif:contains \"" + bifContainsValue + "\" } " +

                                    "} limit " + Limit;

                    //another Query to Get predicates untill deciding which of them is the best using statistics
                    string Query2 = "SELECT  ?predicate ?label WHERE {  " +
                                    "{ ?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label . " +
                                     "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?propertyType. " +
                                     "?label bif:contains \"" + bifContainsValue + "\" } " +

                                    "filter ( ?propertyType = <http://www.w3.org/2002/07/owl#DatatypeProperty> || " +
                                    "?propertyType = <http://www.w3.org/2002/07/owl#ObjectProperty> || " +
                                    "?propertyType = <http://www.w3.org/1999/02/22-rdf-syntax-ns#Property>  ) " +
                                    "} limit " + Limit;

                    SparqlRemoteEndpoint remoteEndPoint = new SparqlRemoteEndpoint(new Uri("http://localhost:8890/sparql"));

                    try
                    {
                        //executing the Query and finding results
                        SparqlResultSet resultSet = remoteEndPoint.QueryWithResultSet(Query);

                        //iterating over matched predicates in the resultset
                        foreach (SparqlResult result in resultSet)
                        {
                            INode predicateURI = result.Value("predicate");
                            INode predicateLabel = result.Value("label");
                            LexiconPredicate tmplexiconpredicate = new LexiconPredicate();

                            // check that the property is used .. not a non-used property
                            bool hasResuts = false;
                            string checkQuery = "select distinct * where { ?x <" + predicateURI + "> ?y } limit 1 ";
                            QueryHandler.startConnection();
                            SparqlResultSet checkResults = QueryHandler.ExecuteQueryWithString(checkQuery);
                            QueryHandler.closeConnection();

                            if (checkResults.Count != 0)
                            {
                                hasResuts = true;
                            }

                            // check that the predicate doesn't exists in the predicateslist before
                            bool exists = false;
                            foreach (LexiconPredicate x in __predicateList)
                            {
                                // we added Questionmatch == question left bec new predicates may be added with better score that the old ones so this should be considered
                                if (x.URI == predicateURI.ToString() && x.QuestionMatch == questionleft )
                                {
                                    exists = true;
                                    break;
                                }
                            }

                            // adding the new predicate to the __predicatelist
                            if (!exists && hasResuts)
                            {
                                tmplexiconpredicate.URI = predicateURI.ToString();
                                tmplexiconpredicate.QuestionMatch = questionleft;
                                tmplexiconpredicate.label = predicateLabel.ToString();
                                __predicateList.Add(tmplexiconpredicate);
                            }
                        }

                    }

                    // skipping results that raised timeout exceptions
                    catch
                    {
                        util.log("skipped : " + questionleft + " ---- due to time out ");
                    }
                }

                util.log(" finished getting " + __predicateList.Count + " predicates " + " Time taken : " + DateTime.Now.Subtract(dt).TotalMilliseconds + " msec");

                // now done of collecting predicates scoring them down and get the best n ones
                this.predicateList = scorePredicates(__predicateList, topN);
                this.predicateList = addDomainAndRange(this.predicateList);

                util.log("total time taken :" + DateTime.Now.Subtract(dt).TotalMilliseconds.ToString() + " mSecs");

                predicateFilled = true;
                return this.predicateList;
            }
        }
Пример #4
0
        /// removing the non used predicates domains and the literals type of owners
        /// </summary>
        /// <param name="tokens">list </param>of tokens
        /// <returns>cleaned list of tokens </returns>
        private List <QueryBucket> cleanBucket(List <QueryBucket> queryBuckets)
        {
            #region removing Buckets which still have question left  >1

            foreach (QueryBucket querybucket in queryBuckets.ToList())
            {
                if (querybucket.questionLeft.Length > 0)
                {
                    queryBuckets.Remove(querybucket);
                }
            }

            #endregion

            #region remove Predicates domains and type of owners

            foreach (QueryBucket bucket in queryBuckets.ToList())
            {
                //adding predicates and literals to a list
                List <LexiconPredicate> predicateList = new List <LexiconPredicate>();
                List <LexiconLiteral>   literalList   = new List <LexiconLiteral>();
                foreach (LexiconToken token in bucket.tokens)
                {
                    if (token is LexiconPredicate)
                    {
                        predicateList.Add(token as LexiconPredicate);
                    }

                    if (token is LexiconLiteral)
                    {
                        literalList.Add(token as LexiconLiteral);
                    }
                }

                if (predicateList.Count > 0)
                {
                    //removing domains and ranges that are not used
                    foreach (LexiconToken token in bucket.tokens.ToList())
                    {
                        if (token is LexiconPredicate)
                        {
                            //casting the lexicontoken to lexicon predicate
                            LexiconPredicate oldPredicate = token as LexiconPredicate;
                            //cloning the token to be modified
                            LexiconPredicate predicateToReplace = (LexiconPredicate)token.getClone(token);

                            foreach (string oldPredDomain in oldPredicate.domains.ToList())
                            {
                                bool exist = false;
                                foreach (LexiconLiteral tmpliteral in literalList)
                                {
                                    if (tmpliteral.typeOfOwner.Contains(oldPredDomain))
                                    {
                                        exist = true;
                                    }
                                }

                                //if this domains doesn't contained in any of literals type of owners remove it as it wont match|join with anything
                                if (!exist)
                                {
                                    //old bucket = new bucket and then modify in the new in order then to be able to remove the old
                                    predicateToReplace = oldPredicate.getClone(oldPredicate) as LexiconPredicate;

                                    //removing domain not used
                                    predicateToReplace.domains.Remove(oldPredDomain);
                                    //remove the old bucket and replace it with new modified one // needed because of reference issues
                                    bucket.tokens.Remove(oldPredicate);
                                    bucket.tokens.Add(predicateToReplace);

                                    oldPredicate = predicateToReplace;

                                    //remove the predicate if it doesnt have any domains left
                                    if (oldPredicate.domains.Count == 0)
                                    {
                                        bucket.tokens.Remove(oldPredicate);
                                        predicateList.Remove(oldPredicate as LexiconPredicate);
                                        //remove the bucket if it's free from predicates
                                        if (bucket.tokens.Count == 0)
                                        {
                                            queryBuckets.Remove(bucket);
                                        }
                                    }
                                }
                            }
                        }

                        if (token is LexiconLiteral)
                        {
                            LexiconLiteral oldLiteral = token as LexiconLiteral;
                            LexiconLiteral newLiteral = token.getClone(token) as LexiconLiteral;

                            foreach (string typeofowner in oldLiteral.typeOfOwner.ToList())
                            {
                                bool exist = false;
                                foreach (LexiconPredicate tmmpredicate in predicateList)
                                {
                                    if (tmmpredicate.domains.Contains(typeofowner))
                                    {
                                        exist = true;
                                    }
                                }

                                if (!exist)
                                {
                                    //taking a copy from the old literal in order to remove it from the bucket when replacing it with the newliteral
                                    newLiteral = oldLiteral.getClone(oldLiteral) as LexiconLiteral;

                                    // removing typeofowner not used
                                    newLiteral.typeOfOwner.Remove(typeofowner);
                                    // updating the bucket tokens by replacing the old literal with the new one
                                    bucket.tokens.Remove(oldLiteral);
                                    bucket.tokens.Add(newLiteral);

                                    oldLiteral = newLiteral;

                                    if (oldLiteral.typeOfOwner.Count == 0)
                                    {
                                        bucket.tokens.Remove(oldLiteral);
                                        literalList.Remove(oldLiteral as LexiconLiteral);
                                        //remove the bucket if it's free from Tokens
                                        if (bucket.tokens.Count == 0)
                                        {
                                            queryBuckets.Remove(bucket);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                else
                {
                    bucket.literalOnly = true;
                }
            }

            #endregion

            #region remove the multiple domains and multiple ranges
            foreach (QueryBucket bucket in queryBuckets)
            {
                foreach (LexiconToken predicateToken in bucket.tokens)
                {
                    if (predicateToken is LexiconPredicate)
                    {
                        foreach (LexiconToken literalToken in bucket.tokens)
                        {
                            if (literalToken is LexiconLiteral && Enumerable.SequenceEqual((predicateToken as LexiconPredicate).domains, (literalToken as LexiconLiteral).typeOfOwner))
                            {
                                (predicateToken as LexiconPredicate).domains.RemoveRange(1, (predicateToken as LexiconPredicate).domains.Count - 1);
                                (literalToken as LexiconLiteral).typeOfOwner.RemoveRange(1, (literalToken as LexiconLiteral).typeOfOwner.Count - 1);
                            }
                        }
                    }
                }
            }
            #endregion

            return(queryBuckets);
        }
Пример #5
0
        /// <summary>
        /// get predicates is a method in lexicon class that get all predicates objects that match some words in the Question
        /// </summary>
        /// <param name="question">question to get matched predicates of it </param>
        /// <param name="topN">the number of top matching results to be returned, default = 10</param>
        /// <param name="Limit">the limit of the number of returned results in the query, default = 20</param>
        /// <returns>list of top matching LexiconPredicates</returns>
        public List <LexiconPredicate> getPredicates(string question, int topN = 20, int Limit = 30)
        {
            DateTime dt = DateTime.Now;  // capturing time for testing

            List <LexiconPredicate> __predicateList = new List <LexiconPredicate>();

            //getting all permutation of words formed from the question string
            List <string> permutationList = getPermutations(question);

            //removing permutations that most propbably wont return results and will take time in querying
            permutationList = trimPermutations(permutationList);

            //Get the stemmed version of the question words
            Dictionary <string, List <string> > stemmedWords = GetStemmedWords(question);

            // to check if the predicates are filled before - so returning the matching predicates only - or not
            if (predicateFilled)
            {
                foreach (LexiconPredicate predicate in predicateList)
                {
                    if (permutationList.Contains(predicate.QuestionMatch))
                    {
                        __predicateList.Add(predicate);
                    }
                }
                return(__predicateList);
            }

            else
            {
                string bifContainsValue = "";

                // iterating over each permutation of Question left and Query them from virtuoso and return predicate list and add them
                foreach (string questionleft in permutationList)
                {
                    //Get all forms of questionLeft by replacing words with its stemmed version
                    bifContainsValue = "";                          //empty string

                    bifContainsValue += "\'" + questionleft + "\'"; //add the original questionleft

                    //Replace words in questionleft with its stem and add it to the bifContainsValue
                    foreach (string word in stemmedWords.Keys)
                    {
                        if (questionleft.Contains(word))
                        {
                            foreach (string stem in stemmedWords[word]) //This is created because a wordcan has many stems (rare case)
                            {
                                bifContainsValue += "or\'" + questionleft.Replace(word, stem) + "\'";
                            }
                        }
                    }


                    string Query = "SELECT  * WHERE { { " +
                                   "?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label ." +
                                   "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#DatatypeProperty>." +
                                   "?label bif:contains \"" + bifContainsValue + "\" } " +
                                   "union {" +
                                   "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#ObjectProperty> ." +
                                   "?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label ." +
                                   "?label bif:contains \"" + bifContainsValue + "\" } " +
                                   "union {" +
                                   "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>  <http://www.w3.org/1999/02/22-rdf-syntax-ns#Property>  ." +
                                   "?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label ." +
                                   "?label bif:contains \"" + bifContainsValue + "\" } " +

                                   "} limit " + Limit;


                    //another Query to Get predicates untill deciding which of them is the best using statistics
                    string Query2 = "SELECT  ?predicate ?label WHERE {  " +
                                    "{ ?predicate <http://www.w3.org/2000/01/rdf-schema#label> ?label . " +
                                    "?predicate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?propertyType. " +
                                    "?label bif:contains \"" + bifContainsValue + "\" } " +

                                    "filter ( ?propertyType = <http://www.w3.org/2002/07/owl#DatatypeProperty> || " +
                                    "?propertyType = <http://www.w3.org/2002/07/owl#ObjectProperty> || " +
                                    "?propertyType = <http://www.w3.org/1999/02/22-rdf-syntax-ns#Property>  ) " +
                                    "} limit " + Limit;

                    //SparqlRemoteEndpoint remoteEndPoint = new SparqlRemoteEndpoint(new Uri("http://localhost:8890/sparql"));

                    try
                    {
                        //executing the Query and finding results
                        //SparqlResultSet resultSet = remoteEndPoint.QueryWithResultSet(Query);
                        SparqlResultSet resultSet = Request.RequestWithHTTP(Query);

                        //iterating over matched predicates in the resultset
                        foreach (SparqlResult result in resultSet)
                        {
                            INode            predicateURI        = result.Value("predicate");
                            INode            predicateLabel      = result.Value("label");
                            LexiconPredicate tmplexiconpredicate = new LexiconPredicate();

                            // check that the property is used .. not a non-used property
                            bool   hasResuts  = false;
                            string checkQuery = "select distinct * where { ?x <" + predicateURI + "> ?y } limit 1 ";
                            //QueryHandler.startConnection();
                            //SparqlResultSet checkResults = QueryHandler.ExecuteQueryWithString(checkQuery);
                            //QueryHandler.closeConnection();

                            SparqlResultSet checkResults = Request.RequestWithHTTP(checkQuery);

                            if (checkResults.Count != 0)
                            {
                                hasResuts = true;
                            }

                            // check that the predicate doesn't exists in the predicateslist before
                            bool exists = false;
                            foreach (LexiconPredicate x in __predicateList)
                            {
                                // we added Questionmatch == question left bec new predicates may be added with better score that the old ones so this should be considered
                                if (x.URI == predicateURI.ToString() && x.QuestionMatch == questionleft)
                                {
                                    exists = true;
                                    break;
                                }
                            }

                            // adding the new predicate to the __predicatelist
                            if (!exists && hasResuts)
                            {
                                tmplexiconpredicate.URI           = predicateURI.ToString();
                                tmplexiconpredicate.QuestionMatch = questionleft;
                                tmplexiconpredicate.label         = predicateLabel.ToString();
                                __predicateList.Add(tmplexiconpredicate);
                            }
                        }
                    }

                    // skipping results that raised timeout exceptions
                    catch
                    {
                        util.log("skipped : " + questionleft + " ---- due to time out ");
                    }
                }

                util.log(" finished getting " + __predicateList.Count + " predicates " + " Time taken : " + DateTime.Now.Subtract(dt).TotalMilliseconds + " msec");

                // now done of collecting predicates scoring them down and get the best n ones
                this.predicateList = scorePredicates(__predicateList, topN);
                this.predicateList = addDomainAndRange(this.predicateList);

                util.log("total time taken :" + DateTime.Now.Subtract(dt).TotalMilliseconds.ToString() + " mSecs");

                predicateFilled = true;
                return(this.predicateList);
            }
        }