unsafe private void Calculate(DocumentResultWhereDictionary upDict, ref DocumentResultWhereDictionary docIdRank, WordIndexForQuery[] wordIndexes) { Array.Sort(wordIndexes); MinResultCount = _DBProvider.Table.GroupByLimit; //Get max word doc list count int minWordDocListCount = 1 * 1024 * 1024; //1M foreach (WordIndexForQuery wifq in wordIndexes) { minWordDocListCount = Math.Min(minWordDocListCount, wifq.WordIndex.WordDocList.Count); } if (docIdRank.Count == 0) { if (minWordDocListCount > DocumentResultWhereDictionary.DefaultSize) { docIdRank = new Core.SFQL.Parse.DocumentResultWhereDictionary(minWordDocListCount); } } Query.PerformanceReport performanceReport = new Hubble.Core.Query.PerformanceReport("Calculate"); //Merge bool oneWordOptimize = this._QueryParameter.CanLoadPartOfDocs && this._QueryParameter.NoAndExpression && wordIndexes.Length == 1; int oneWordMaxCount = 0; if (oneWordOptimize) { //One word WordIndexForQuery wifq = wordIndexes[0]; //first word //Entity.DocumentPositionList[] wifqDocBuf = wifq.WordIndex.DocPositionBuf; Entity.DocumentPositionList docList = wifq.WordIndex.GetNext(); int j = 0; while (docList.DocumentId >= 0) { //Entity.DocumentPositionList docList = wifq.WordIndex[j]; Core.SFQL.Parse.DocumentResultPoint drp; drp.pDocumentResult = null; if (j > MinResultCount) { if (oneWordMaxCount > docList.Count) { j++; docList = wifq.WordIndex.GetNext(); continue; } } else { if (oneWordMaxCount < docList.Count) { oneWordMaxCount = docList.Count; } } long score = (long)wifq.FieldRank * (long)wifq.WordRank * (long)wifq.Idf_t * (long)docList.Count * (long)1000000 / ((long)wifq.Sum_d_t * (long)docList.TotalWordsInThisDocument); if (score < 0) { //Overflow score = long.MaxValue - 4000000; } if (upDict == null) { docIdRank.Add(docList.DocumentId, score); } else { if (!upDict.Not) { if (upDict.ContainsKey(docList.DocumentId)) { docIdRank.Add(docList.DocumentId, score); } } else { if (!upDict.ContainsKey(docList.DocumentId)) { docIdRank.Add(docList.DocumentId, score); } } } j++; docList = wifq.WordIndex.GetNext(); } } else { int wordIndexesLen = wordIndexes.Length; WordIndexForQuery fstWifq = wordIndexes[0]; //first word Entity.DocumentPositionList fstDocList = fstWifq.WordIndex.GetNext(); Entity.DocumentPositionList[] docListArr = new Hubble.Core.Entity.DocumentPositionList[wordIndexesLen]; docListArr[0] = fstDocList; while (fstDocList.DocumentId >= 0) { int curWord = 1; int firstDocId = fstDocList.DocumentId; while (curWord < wordIndexesLen) { docListArr[curWord] = wordIndexes[curWord].WordIndex.Get(firstDocId); if (docListArr[curWord].DocumentId < 0) { break; } curWord++; } //While if (curWord >= wordIndexesLen) { //Matched long totalScore = 0; for (int i = 0; i < wordIndexesLen; i++) { WordIndexForQuery wifq = wordIndexes[i]; Entity.DocumentPositionList docList = docListArr[i]; long score = (long)wifq.FieldRank * (long)wifq.WordRank * (long)wifq.Idf_t * (long)docList.Count * (long)1000000 / ((long)wifq.Sum_d_t * (long)docList.TotalWordsInThisDocument); if (score < 0) { //Overflow score = long.MaxValue - 4000000; } totalScore += score; } if (upDict == null) { docIdRank.Add(firstDocId, totalScore); } else { if (!upDict.Not) { if (upDict.ContainsKey(firstDocId)) { docIdRank.Add(firstDocId, totalScore); } } else { if (!upDict.ContainsKey(firstDocId)) { docIdRank.Add(firstDocId, totalScore); } } } } fstDocList = fstWifq.WordIndex.GetNext(); docListArr[0] = fstDocList; } } //Merge score if upDict != null if (upDict != null) { if (!upDict.Not) { foreach (int docid in docIdRank.Keys) { DocumentResult *upDrp; if (upDict.TryGetValue(docid, out upDrp)) { DocumentResult *drpResult; if (docIdRank.TryGetValue(docid, out drpResult)) { drpResult->Score += upDrp->Score; } } } } } DeleteProvider delProvider = _DBProvider.DelProvider; int delCount = delProvider.Filter(docIdRank); if (oneWordOptimize && _QueryParameter.CanLoadPartOfDocs && upDict == null) { docIdRank.RelTotalCount = wordIndexes[0].RelTotalCount - delCount; } else { docIdRank.RelTotalCount = docIdRank.Count; } performanceReport.Stop(); }
unsafe private void CalculateWithPositionOrderByScoreDesc(Core.SFQL.Parse.DocumentResultWhereDictionary upDict, ref Core.SFQL.Parse.DocumentResultWhereDictionary docIdRank, WordIndexForQuery[] wordIndexes) { DBProvider dbProvider = Argument.DBProvider; bool needFilterUntokenizedConditions = this.Argument.NeedFilterUntokenizedConditions; ExpressionTree untokenizedTree = this.Argument.UntokenizedTreeOnRoot; if (upDict != null) { throw new ParseException("UpDict is not null!"); } //Calculate top int top; if (Argument.End >= 0) { top = (1 + Argument.End / 100) * 100; if (top <= 0) { top = 100; } } else { top = int.MaxValue; } PriorQueue <Docid2Long> priorQueue = null; List <Docid2Long> docid2longList = null; if (top == int.MaxValue) { docid2longList = new List <Docid2Long>(); } else { priorQueue = new PriorQueue <Docid2Long>(top, new DocIdLongComparer(false)); } long lastMinScore = 0; int rows = 0; Core.SFQL.Parse.DocumentResultWhereDictionary groupByDict = Argument.NeedGroupBy ? docIdRank : null; MultiWordsDocIdEnumerator mwde = new MultiWordsDocIdEnumerator(wordIndexes, dbProvider, groupByDict, -1, needFilterUntokenizedConditions); //Changed at 2012-3-18, top optimize will effect search result, disable it. //MultiWordsDocIdEnumerator mwde = new MultiWordsDocIdEnumerator(wordIndexes, dbProvider, groupByDict, top, // needFilterUntokenizedConditions); Entity.OriginalDocumentPositionList odpl = new Hubble.Core.Entity.OriginalDocumentPositionList(); mwde.GetNextOriginal(ref odpl); Entity.DocumentPositionList lastDocList = new Hubble.Core.Entity.DocumentPositionList(); double ratio = 1; if (wordIndexes.Length > 1) { ratio = (double)2 / (double)(wordIndexes.Length - 1); } Query.DocumentResult documentResult; Query.DocumentResult *drp = &documentResult; int skipCount = 0; //skip by filter untokenized conditions while (odpl.DocumentId >= 0) { //Process untokenized conditions. //If is not matched, get the next one. if (needFilterUntokenizedConditions) { int docId = odpl.DocumentId; drp->DocId = docId; drp->PayloadData = dbProvider.GetPayloadDataWithShareLock(docId); if (!ParseWhere.GetComparisionExpressionValue(dbProvider, drp, untokenizedTree)) { mwde.GetNextOriginal(ref odpl); skipCount++; continue; } } //Matched //Caculate score #region Caclate score long totalScore = 0; lastDocList.Count = 0; lastDocList.FirstPosition = 0; int lastWifqIndex = 0; for (int i = 0; i < mwde.SelectedCount; i++) { int index = mwde.SelectedIndexes[i]; WordIndexForQuery wifq = mwde.WordIndexes[index]; Int16 count = (Int16)mwde.SelectedDocLists[i].Count; int firstPosition = mwde.SelectedDocLists[i].FirstPosition; int totalWordsInThisDocument = mwde.SelectedDocLists[i].TotalWordsInThisDocument; long score = (long)wifq.FieldRank * (long)wifq.WordRank * (long)wifq.Idf_t * (long)count * (long)1000000 / ((long)wifq.Sum_d_t * (long)totalWordsInThisDocument); if (score < 0) { //Overflow score = long.MaxValue - 4000000; } double delta = 1; if (i > 0) { //Calculate with position double queryPositionDelta = wifq.FirstPosition - wordIndexes[lastWifqIndex].FirstPosition; double positionDelta = firstPosition - lastDocList.FirstPosition; delta = Math.Abs(queryPositionDelta - positionDelta); if (delta < 0.031) { delta = 0.031; } else if (delta <= 1.1) { delta = 0.5; } else if (delta <= 2.1) { delta = 1; } delta = Math.Pow((1 / delta), ratio) * count * lastDocList.Count / (double)(wifq.QueryCount * wordIndexes[lastWifqIndex].QueryCount); } lastDocList.Count = count; lastDocList.FirstPosition = firstPosition; lastWifqIndex = index; totalScore += (long)(score * delta); } //End of score calculation if (_HasRankField) { int rank = dbProvider.SharedPayloadProvider.GetPayloadRank(odpl.DocumentId); totalScore *= rank; if (totalScore < 0) { totalScore = long.MaxValue - 4000000; } } //all of the words matched //10 times if (mwde.SelectedCount == wordIndexes.Length) { totalScore *= 10; if (totalScore < 0) { totalScore = long.MaxValue - 4000000; } } #endregion //Insert to prior queue if (rows >= top) { if (lastMinScore < totalScore) { priorQueue.Add(new Docid2Long(odpl.DocumentId, totalScore)); lastMinScore = priorQueue.Last.Value1; } } else { if (top == int.MaxValue) { docid2longList.Add(new Docid2Long(odpl.DocumentId, totalScore)); } else { priorQueue.Add(new Docid2Long(odpl.DocumentId, totalScore)); rows++; if (rows == top) { lastMinScore = priorQueue.Last.Value1; } } } mwde.GetNextOriginal(ref odpl); } docIdRank.RelTotalCount = mwde.TotalDocIdCount - skipCount; Docid2Long[] docid2longArr; if (top == int.MaxValue) { docid2longList.Sort(new DocIdLongComparer(false)); docid2longArr = docid2longList.ToArray(); } else { docid2longArr = priorQueue.ToArray(); } foreach (Docid2Long docid2Long in docid2longArr) { long score = docid2Long.Value1; if (score < 0) { //Overflow score = long.MaxValue - 4000000; } docIdRank.Add(docid2Long.DocId, new DocumentResult(docid2Long.DocId, score)); } docIdRank.Sorted = true; }