Esempio n. 1
0
        public IEnumerable <IDictionary> Read(Query query)
        {
            IDictionary <ulong, double> result = null;

            while (query != null)
            {
                var keyHash = query.Term.Key.ToString().ToHash();
                var ix      = GetIndex(keyHash);

                if (ix != null)
                {
                    var match = ix.ClosestMatch(query.Term.Value.ToString());

                    if (match.Highscore >= VectorNode.TrueAngle)
                    {
                        var docIds = _postingsReader.Read(match.PostingsOffset)
                                     .ToDictionary(x => x, y => match.Highscore);

                        if (result == null)
                        {
                            result = docIds;
                        }
                        else
                        {
                            if (query.And)
                            {
                                result = (from doc in result
                                          join x in docIds on doc.Key equals x.Key
                                          select doc).ToDictionary(x => x.Key, y => y.Value);
                            }
                            else if (query.Not)
                            {
                                foreach (var id in docIds)
                                {
                                    result.Remove(id);
                                }
                            }
                            else // Or
                            {
                                foreach (var id in docIds)
                                {
                                    result[id.Key] = id.Value;
                                }
                            }
                        }
                    }
                    query = query.Next;
                }
            }

            if (result == null)
            {
                return(Enumerable.Empty <IDictionary>());
            }

            return(ReadDocs(result));
        }
Esempio n. 2
0
        public void Remove(IEnumerable <IDictionary> data, ITokenizer tokenizer)
        {
            var postingsWriter = new PagedPostingsWriter(PostingsStream);

            foreach (var model in data)
            {
                var docId = (ulong)model["_docid"];

                foreach (var key in model.Keys)
                {
                    var keyStr     = key.ToString();
                    var keyHash    = keyStr.ToHash();
                    var fieldIndex = GetIndex(keyHash);

                    if (fieldIndex == null)
                    {
                        continue;
                    }

                    var val    = (IComparable)model[key];
                    var str    = val as string;
                    var tokens = new HashSet <string>();

                    if (str != null)
                    {
                        var tokenlist = tokenizer.Tokenize(str);

                        foreach (var token in tokenlist)
                        {
                            tokens.Add(token);
                        }
                    }
                    else
                    {
                        tokens.Add(val.ToString());
                    }

                    foreach (var token in tokens)
                    {
                        // 1. find node
                        // 2. get postings list
                        // 3. find docId offset
                        // 2. flag document as deleted

                        var  match    = fieldIndex.ClosestMatch(token);
                        var  postings = _postingsReader.Read(match.PostingsOffset);
                        long offset   = 0;

                        foreach (var posting in postings)
                        {
                            if (posting == docId)
                            {
                                postingsWriter.FlagAsDeleted(offset, docId);
                                break;
                            }
                            offset += PagedPostingsWriter.BLOCK_SIZE;
                        }
                    }
                }
            }
        }