public WordIndexForQuery(Index.WordIndexReader wordIndex, int totalDocuments, int wordRank, int fieldRank) { FieldRank = fieldRank; WordRank = wordRank; RelTotalCount = wordIndex.RelDocCount; if (FieldRank <= 0) { FieldRank = 1; } if (WordRank <= 0) { WordRank = 1; } if (wordIndex.Count <= 0) { _CurIndex = -1; } else { _CurIndex = 0; } _WordIndex = wordIndex; Sum_d_t = (int)Math.Sqrt(_WordIndex.WordCount); Idf_t = (int)Math.Log10((double)totalDocuments / (double)_WordIndex.Count + 1) + 1; CurDocIdIndex = 0; WordIndexesLength = _WordIndex.Count; }
public WordIndexForQuery(Index.WordIndexReader wordIndex, int totalDocuments, int wordRank, int fieldRank, WordInfo.Flag flags) { FieldRank = fieldRank; WordRank = wordRank; RelTotalCount = wordIndex.RelDocCount; this.Flags = flags; if (FieldRank <= 0) { FieldRank = 1; } if (WordRank <= 0) { WordRank = 1; } _WordIndex = wordIndex; Sum_d_t = (int)Math.Sqrt(_WordIndex.WordCount); Idf_t = (int)Math.Log10((double)totalDocuments / (double)_WordIndex.RelDocCount + 1) + 1; //Idf_t = (int)Math.Log10((double)totalDocuments / (double)_WordIndex.Count + 1) + 1; //Old contains use this CurDocIdIndex = 0; WordIndexesLength = _WordIndex.Count; }
public WordIndexForQuery(Index.WordIndexReader wordIndex, int totalDocuments, int wordRank, int fieldRank) : this(wordIndex, totalDocuments, wordRank, fieldRank, WordInfo.Flag.None) { }
/// <summary> /// order by except only order by score desc. /// </summary> /// <param name="upDict"></param> /// <param name="docIdRank"></param> unsafe public void CalculateOptimizeNormalOrderBy(Core.SFQL.Parse.DocumentResultWhereDictionary upDict, ref Core.SFQL.Parse.DocumentResultWhereDictionary docIdRank) { DBProvider dBProvider = Argument.DBProvider; Argument.DBProvider.SharedPayloadProvider.EnterPayloladShareLock(); bool needFilterUntokenizedConditions = this.Argument.NeedFilterUntokenizedConditions; ExpressionTree untokenizedTree = this.Argument.UntokenizedTreeOnRoot; Query.DocumentResult documentResult; Query.DocumentResult *drp = &documentResult; bool orderByIncludingScore = Argument.OrderByIncludingScore(); try { Field[] orderByFields; DocId2LongComparer comparer = DocId2LongComparer.Generate( dBProvider, Argument.OrderBys, out orderByFields); bool needGroupBy = Argument.NeedGroupBy; WordIndexForQuery wifq = WordIndexes[0]; _IndexReader = wifq.WordIndex.IndexReader; Data.Field rankField = Argument.DBProvider.GetField("Rank"); if (rankField != null) { if (rankField.DataType == Hubble.Core.Data.DataType.Int && rankField.IndexType == Hubble.Core.Data.Field.Index.Untokenized) { _HasRandField = true; _RankTab = rankField.TabIndex; _DocidPayloads = new OriginalDocumentPositionList[2 * 1024]; _CurDocidPayloadIndex = _DocidPayloads.Length; } } if (_IndexReader != null) { int top; //vars for delete bool haveRecordsDeleted = dBProvider.DelProvider.Count > 0; int[] delDocs = null; int curDelIndex = 0; int curDelDocid = 0; int groupByCount = 0; int groupByLen = dBProvider.Table.GroupByLimit; int groupByStep = 1; int groupByIndex = 0; if (needGroupBy) { groupByStep = wifq.RelTotalCount / groupByLen; if (groupByStep <= 0) { groupByStep = 1; } } if (haveRecordsDeleted) { delDocs = dBProvider.DelProvider.DelDocs; curDelDocid = delDocs[curDelIndex]; } try { //calculate top //If less than 100, set to 100 if (this.Argument.End >= 0) { top = (1 + this.Argument.End / 100) * 100; if (top <= 0) { top = 100; } //if (this.Argument.End * 2 > top) //{ // top *= 2; //} } else { top = int.MaxValue; } PriorQueue <Docid2Long> priorQueue = new PriorQueue <Docid2Long>(top, comparer); int rows = 0; Entity.OriginalDocumentPositionList docList = new OriginalDocumentPositionList(); bool notEOF = GetNext(ref docList); Index.WordIndexReader wordIndexReader = wifq.WordIndex; Docid2Long last = new Docid2Long(); last.DocId = -1; int relCount = 0; while (notEOF) { //Process untokenized conditions. //If is not matched, get the next one. if (needFilterUntokenizedConditions) { int docId = docList.DocumentId; drp->DocId = docId; drp->PayloadData = dBProvider.GetPayloadDataWithShareLock(docId); if (!ParseWhere.GetComparisionExpressionValue(dBProvider, drp, untokenizedTree)) { notEOF = GetNext(ref docList); continue; } } //Process deleted records if (haveRecordsDeleted) { if (curDelIndex < delDocs.Length) { //If docid deleted, get next if (docList.DocumentId == curDelDocid) { notEOF = GetNext(ref docList); continue; } else if (docList.DocumentId > curDelDocid) { while (curDelIndex < delDocs.Length && curDelDocid < docList.DocumentId) { curDelIndex++; if (curDelIndex >= delDocs.Length) { haveRecordsDeleted = false; break; } curDelDocid = delDocs[curDelIndex]; } if (curDelIndex < delDocs.Length) { if (docList.DocumentId == curDelDocid) { notEOF = GetNext(ref docList); continue; } } } } } if (needGroupBy) { if (groupByCount < groupByLen) { if (groupByIndex >= groupByStep) { groupByIndex = 0; } if (groupByIndex == 0) { docIdRank.AddToGroupByCollection(docList.DocumentId); groupByCount++; } groupByIndex++; } } relCount++; Docid2Long cur = new Docid2Long(); if (rows >= top) { long score = 1; if (orderByIncludingScore) { int wordCount = docList.CountAndWordCount / 8; //one word, score = count score = (long)wifq.FieldRank * (long)wifq.WordRank * (long)wifq.Idf_t * (long)wordCount * (long)1000000 / ((long)wifq.Sum_d_t * (long)docList.TotalWordsInThisDocument); } cur.DocId = docList.DocumentId; cur.Rank = docList.TotalWordsInThisDocument; Docid2Long.Generate(ref cur, dBProvider, orderByFields, score); if (comparer.Compare(last, cur) > 0) { priorQueue.Add(cur); last = priorQueue.Last; } } else { long score = 1; if (orderByIncludingScore) { int wordCount = docList.CountAndWordCount / 8; //one word, score = count score = (long)wifq.FieldRank * (long)wifq.WordRank * (long)wifq.Idf_t * (long)wordCount * (long)1000000 / ((long)wifq.Sum_d_t * (long)docList.TotalWordsInThisDocument); } if (score < 0) { //Overflow score = long.MaxValue - 4000000; } cur.DocId = docList.DocumentId; cur.Rank = docList.TotalWordsInThisDocument; Docid2Long.Generate(ref cur, dBProvider, orderByFields, score); priorQueue.Add(cur); rows++; if (rows == top) { last = priorQueue.Last; } } notEOF = GetNext(ref docList); } docIdRank.RelTotalCount = relCount; foreach (Docid2Long docid2Long in priorQueue.ToArray()) { long score = comparer.GetScore(docid2Long); //use Rank store TotalWordsInThisDocument if (score < 0) { //Overflow score = long.MaxValue - 4000000; } docIdRank.Add(docid2Long.DocId, new DocumentResult(docid2Long.DocId, score)); } } finally { } docIdRank.Sorted = true; } } finally { Argument.DBProvider.SharedPayloadProvider.LeavePayloadShareLock(); } }