コード例 #1
0
        /// <summary>
        /// Get Postings
        /// </summary>
        /// <param name="index">Index</param>
        /// <param name="processor">Tokene processor</param>
        /// <returns></returns>
        public IList <Posting> GetPostings(IIndex index, ITokenProcessor processor)
        {
            //Process the term
            List <string> processedTerms = processor.ProcessToken(Term);

            //Gets a or-merged posting list from all results of multiple terms from index...
            return(index.GetPostings(processedTerms));
        }
コード例 #2
0
        public IList <Posting> GetPostings(IIndex index, ITokenProcessor processor)
        {
            //Get postings for the two term
            List <string>   termsFromFirst  = processor.ProcessToken(firstTerm);
            List <string>   termsFromSecond = processor.ProcessToken(secondTerm);
            IList <Posting> firstPostings   = index.GetPositionalPostings(termsFromFirst);
            IList <Posting> secondPostings  = index.GetPositionalPostings(termsFromSecond);

            //PositionalMerge to any postings found with gap(distance) 1 to k (up to k)
            List <IList <Posting> > list = new List <IList <Posting> >();

            for (int i = 1; i <= k; i++)
            {
                list.Add(Merge.PositionalMerge(firstPostings, secondPostings, i));
            }

            //OrMerge all of them
            return(Merge.OrMerge(list));
        }
コード例 #3
0
        private void ProcessExpression(int index)
        {
            string       toParse                     = _expression[index];
            IParseResult tokenParseResult            = TokenParser.Parse(toParse);
            Type         resultType                  = tokenParseResult.GetType();
            ITokenProcessor <IParseResult> processor = _tokenProcessorDictionary[resultType];
            Action result = processor.ProcessToken(tokenParseResult, _resultTokens, OutputProcessor);

            if (result != null)
            {
                result.Invoke();
            }
        }
コード例 #4
0
 /// <summary>
 /// Get Postings
 /// </summary>
 /// <param name="index">Index</param>
 /// <param name="processor">Tokene processor</param>
 /// <returns></returns>
 public IList<Posting> GetPostings(IIndex index, ITokenProcessor processor)
 {
     //A list of posting lists (postings for each term in the phrase)
     List<IList<Posting>> postingLists = new List<IList<Posting>>();
     //Retrieves the postings for the individual terms in the phrase
     foreach (string term in mTerms)
     {
         //Process the term
         List<string> processedTerms = processor.ProcessToken(term);
         postingLists.Add(index.GetPositionalPostings(processedTerms));
     }
     //positional merge all posting lists
     return Merge.PositionalMerge(postingLists);
 }
コード例 #5
0
        /// <summary>
        /// Get list of posting
        /// </summary>
        /// <param name="index"> inverted index</param>
        /// <param name="processor">nomal token processor</param>
        /// <returns></returns>
        public IList <Posting> GetPostings(IIndex index, ITokenProcessor processor)
        {
            processor = ((NormalTokenProcessor)processor);

            //Normal proccessing of token and split them into literal by *
            string[] literals = this.token.Split("*").ToArray();
            for (int i = 0; i < literals.Length; i++)
            {
                List <string> processedToken = processor.ProcessToken(literals[i]);
                if (processedToken.Count > 0)
                {
                    if (i == 0)
                    {
                        literals[i] = "$" + processedToken[0];
                    }
                    else if (i == literals.Length - 1)
                    {
                        literals[i] = processedToken[0] + "$";
                    }
                    else
                    {
                        literals[i] = processedToken[0];
                    }
                }
            }
            literals = literals.Where(x => !string.IsNullOrEmpty(x) && x != "$").ToArray();

            //Gather candidates for each literals
            List <List <string> > candidatesList = new List <List <string> >();

            foreach (string literal in literals)
            {
                List <string> candidates = new List <String>();
                bool          didMerge   = false;
                //KGram and AND merge results for a literal
                List <string> kGramTerms = this.KGramSplitter(literal);
                foreach (string kGramTerm in kGramTerms)
                {
                    if (!didMerge)
                    {
                        candidates = candidates.Union(this.kGram.getVocabularies(kGramTerm)).ToList();
                        didMerge   = true;
                    }
                    else
                    {
                        candidates = candidates.Intersect(this.kGram.getVocabularies(kGramTerm)).ToList();
                    }
                }

                //Post filtering step
                if (candidates.Count > 0)
                {
                    //$literal*
                    if (literal.ElementAt(0) == '$' && literal.ElementAt(literal.Length - 1) != '$')
                    {
                        candidates = candidates.Where(s => s.StartsWith(literal.Substring(1))).ToList();
                    }

                    // *literal$
                    else if (literal.ElementAt(0) != '$' && literal.ElementAt(literal.Length - 1) == '$')
                    {
                        candidates = candidates.Where(s => s.EndsWith(literal.Substring(0, literal.Length - 1))).ToList();
                    }

                    // *literal*
                    else if (literal.ElementAt(0) != '$' && literal.ElementAt(literal.Length - 1) != '$')
                    {
                        candidates = candidates.Where(s => s.Contains(literal) && !s.StartsWith(literal) && !s.EndsWith(literal)).ToList();
                    }
                    candidatesList.Add(candidates);
                }
                else
                {
                    candidatesList.Add(new List <string>());
                }
            }

            //Generate the final candidates by merging candidates from all literals
            List <string> finalCandidates = new List <string>();

            for (int i = 0; i < candidatesList.Count; i++)
            {
                if (i == 0)
                {
                    finalCandidates = finalCandidates.Union(candidatesList[i]).ToList();
                }
                else
                {
                    finalCandidates = finalCandidates.Intersect(candidatesList[i]).ToList();
                }
            }

            //Stem final candidates and remove duplicate
            HashSet <string> stemmedFinalCandidates = new HashSet <string>();

            foreach (string s in finalCandidates)
            {
                stemmedFinalCandidates.Add(stemmer.Stem(s).Value);
            }

            return(index.GetPostings(stemmedFinalCandidates.ToList()));
        }