internal double calculateKwdRank(InverseList list, DocumentOccurrences d, int[] occ) { int frequency = occ.Length; int totalNumberOfDocuments = impl.documents.Count; int nRelevantDocuments = list.Count; int totalNumberOfWords = impl.inverseIndex.Count; double idf = System.Math.Log((double)totalNumberOfDocuments / nRelevantDocuments); double averageWords = (double)totalNumberOfWords / totalNumberOfDocuments; double density = frequency * System.Math.Log(1 + (DENSITY_MAGIC * averageWords / d.nWordsInDocument)); double wordWeight = (density * idf); double wordScore = 1; for (int i = 0; i < frequency; i++) { wordScore += wordWeight * occurrenceKindWeight[(uint)occ[i] >> OCC_KIND_OFFSET]; } return(System.Math.Log(wordScore)); }
internal virtual FullTextSearchResult SearchPrefix(string prefix, int maxResults, int timeLimit, bool sort) { const int TICKS_PER_MSEC = 10000; FullTextSearchHit[] hits = new FullTextSearchHit[maxResults]; int nResults = 0; int estimation = 0; long stop = DateTime.Now.Ticks + (long)timeLimit * TICKS_PER_MSEC; foreach (InverseList list in impl.inverseIndex.StartsWith(prefix)) { IDictionaryEnumerator occurrences = list.GetDictionaryEnumerator(0); estimation += list.Count; while (occurrences.MoveNext()) { int doc = (int)occurrences.Key; float rank = 1.0f; if (sort) { DocumentOccurrences d = (DocumentOccurrences)occurrences.Value; rank = (float)calculateKwdRank(list, d, d.occurrences); } hits[nResults] = new FullTextSearchHit(impl.Storage, doc, rank); if (++nResults >= maxResults || DateTime.Now.Ticks >= stop) { goto Done; } } } Done: if (nResults < maxResults) { FullTextSearchHit[] realHits = new FullTextSearchHit[nResults]; Array.Copy(hits, 0, realHits, 0, nResults); hits = realHits; } if (sort) { Array.Sort(hits); } return(new FullTextSearchResult(hits, estimation)); }
internal InverseList(Storage db, int oid, DocumentOccurrences doc) #if USE_GENERICS : base(true) #else : base(typeof(int), true)
internal virtual double evaluate(int doc, FullTextQuery query) { double left, right; switch (query.op) { case FullTextQuery.Operator.Near: case FullTextQuery.Operator.And: left = evaluate(doc, ((FullTextQueryBinaryOp)query).left); right = evaluate(doc, ((FullTextQueryBinaryOp)query).right); nOccurrences = 0; return(left < 0 || right < 0 ? -1 : left + right); case FullTextQuery.Operator.Or: left = evaluate(doc, ((FullTextQueryBinaryOp)query).left); right = evaluate(doc, ((FullTextQueryBinaryOp)query).right); return(left > right ? left : right); case FullTextQuery.Operator.Match: case FullTextQuery.Operator.StrictMatch: { KeywordList kwd = kwds[((FullTextQueryMatchOp)query).wno]; if (kwd.currDoc != doc) { return(-1); } DocumentOccurrences d = (DocumentOccurrences)kwd.currEntry.Value; int[] occ = d.occurrences; kwd.occ = occ; int frequency = occ.Length; if (query.op == FullTextQuery.Operator.StrictMatch) { if (nOccurrences == 0) { nOccurrences = frequency; if (occurrences == null || occurrences.Length < frequency) { occurrences = new int[frequency]; } for (int i = 0; i < frequency; i++) { occurrences[i] = occ[i] & OCC_POSITION_MASK; } } else { int nPairs = 0; int[] dst = occurrences; int occ1 = dst[0]; int occ2 = occ[0] & OCC_POSITION_MASK; int i = 0, j = 0; int offs = kwd.kwdOffset; while (true) { if (occ1 + offs <= occ2) { if (occ1 + offs + 1 >= occ2) { dst[nPairs++] = occ2; } if (++j == nOccurrences) { break; } occ1 = dst[j]; } else { if (++i == frequency) { break; } occ2 = occ[i] & OCC_POSITION_MASK; } } nOccurrences = nPairs; if (nPairs == 0) { return(-1); } } } return(calculateKwdRank(kwd.list, d, occ)); } case FullTextQuery.Operator.Not: { double rank = evaluate(doc, ((FullTextQueryUnaryOp)query).opd); return((rank >= 0) ? -1 : 0); } default: return(-1); } }