/// <summary> /// Get Postings /// </summary> /// <param name="index">Index</param> /// <param name="processor">Tokene processor</param> /// <returns></returns> public IList <Posting> GetPostings(IIndex index, ITokenProcessor processor) { //Process the term List <string> processedTerms = processor.ProcessToken(Term); //Gets a or-merged posting list from all results of multiple terms from index... return(index.GetPostings(processedTerms)); }
public IList <Posting> GetPostings(IIndex index, ITokenProcessor processor) { //Get postings for the two term List <string> termsFromFirst = processor.ProcessToken(firstTerm); List <string> termsFromSecond = processor.ProcessToken(secondTerm); IList <Posting> firstPostings = index.GetPositionalPostings(termsFromFirst); IList <Posting> secondPostings = index.GetPositionalPostings(termsFromSecond); //PositionalMerge to any postings found with gap(distance) 1 to k (up to k) List <IList <Posting> > list = new List <IList <Posting> >(); for (int i = 1; i <= k; i++) { list.Add(Merge.PositionalMerge(firstPostings, secondPostings, i)); } //OrMerge all of them return(Merge.OrMerge(list)); }
private void ProcessExpression(int index) { string toParse = _expression[index]; IParseResult tokenParseResult = TokenParser.Parse(toParse); Type resultType = tokenParseResult.GetType(); ITokenProcessor <IParseResult> processor = _tokenProcessorDictionary[resultType]; Action result = processor.ProcessToken(tokenParseResult, _resultTokens, OutputProcessor); if (result != null) { result.Invoke(); } }
/// <summary> /// Get Postings /// </summary> /// <param name="index">Index</param> /// <param name="processor">Tokene processor</param> /// <returns></returns> public IList<Posting> GetPostings(IIndex index, ITokenProcessor processor) { //A list of posting lists (postings for each term in the phrase) List<IList<Posting>> postingLists = new List<IList<Posting>>(); //Retrieves the postings for the individual terms in the phrase foreach (string term in mTerms) { //Process the term List<string> processedTerms = processor.ProcessToken(term); postingLists.Add(index.GetPositionalPostings(processedTerms)); } //positional merge all posting lists return Merge.PositionalMerge(postingLists); }
/// <summary> /// Get list of posting /// </summary> /// <param name="index"> inverted index</param> /// <param name="processor">nomal token processor</param> /// <returns></returns> public IList <Posting> GetPostings(IIndex index, ITokenProcessor processor) { processor = ((NormalTokenProcessor)processor); //Normal proccessing of token and split them into literal by * string[] literals = this.token.Split("*").ToArray(); for (int i = 0; i < literals.Length; i++) { List <string> processedToken = processor.ProcessToken(literals[i]); if (processedToken.Count > 0) { if (i == 0) { literals[i] = "$" + processedToken[0]; } else if (i == literals.Length - 1) { literals[i] = processedToken[0] + "$"; } else { literals[i] = processedToken[0]; } } } literals = literals.Where(x => !string.IsNullOrEmpty(x) && x != "$").ToArray(); //Gather candidates for each literals List <List <string> > candidatesList = new List <List <string> >(); foreach (string literal in literals) { List <string> candidates = new List <String>(); bool didMerge = false; //KGram and AND merge results for a literal List <string> kGramTerms = this.KGramSplitter(literal); foreach (string kGramTerm in kGramTerms) { if (!didMerge) { candidates = candidates.Union(this.kGram.getVocabularies(kGramTerm)).ToList(); didMerge = true; } else { candidates = candidates.Intersect(this.kGram.getVocabularies(kGramTerm)).ToList(); } } //Post filtering step if (candidates.Count > 0) { //$literal* if (literal.ElementAt(0) == '$' && literal.ElementAt(literal.Length - 1) != '$') { candidates = candidates.Where(s => s.StartsWith(literal.Substring(1))).ToList(); } // *literal$ else if (literal.ElementAt(0) != '$' && literal.ElementAt(literal.Length - 1) == '$') { candidates = candidates.Where(s => s.EndsWith(literal.Substring(0, literal.Length - 1))).ToList(); } // *literal* else if (literal.ElementAt(0) != '$' && literal.ElementAt(literal.Length - 1) != '$') { candidates = candidates.Where(s => s.Contains(literal) && !s.StartsWith(literal) && !s.EndsWith(literal)).ToList(); } candidatesList.Add(candidates); } else { candidatesList.Add(new List <string>()); } } //Generate the final candidates by merging candidates from all literals List <string> finalCandidates = new List <string>(); for (int i = 0; i < candidatesList.Count; i++) { if (i == 0) { finalCandidates = finalCandidates.Union(candidatesList[i]).ToList(); } else { finalCandidates = finalCandidates.Intersect(candidatesList[i]).ToList(); } } //Stem final candidates and remove duplicate HashSet <string> stemmedFinalCandidates = new HashSet <string>(); foreach (string s in finalCandidates) { stemmedFinalCandidates.Add(stemmer.Stem(s).Value); } return(index.GetPostings(stemmedFinalCandidates.ToList())); }