unsafe private void Calculate(DocumentResultWhereDictionary upDict, ref DocumentResultWhereDictionary docIdRank, WordIndexForQuery[] wordIndexes) { Array.Sort(wordIndexes); MinResultCount = _DBProvider.Table.GroupByLimit; //Get max word doc list count int minWordDocListCount = 1 * 1024 * 1024; //1M foreach (WordIndexForQuery wifq in wordIndexes) { minWordDocListCount = Math.Min(minWordDocListCount, wifq.WordIndex.WordDocList.Count); } if (docIdRank.Count == 0) { if (minWordDocListCount > DocumentResultWhereDictionary.DefaultSize) { docIdRank = new Core.SFQL.Parse.DocumentResultWhereDictionary(minWordDocListCount); } } Query.PerformanceReport performanceReport = new Hubble.Core.Query.PerformanceReport("Calculate"); //Merge bool oneWordOptimize = this._QueryParameter.CanLoadPartOfDocs && this._QueryParameter.NoAndExpression && wordIndexes.Length == 1; int oneWordMaxCount = 0; if (oneWordOptimize) { //One word WordIndexForQuery wifq = wordIndexes[0]; //first word //Entity.DocumentPositionList[] wifqDocBuf = wifq.WordIndex.DocPositionBuf; Entity.DocumentPositionList docList = wifq.WordIndex.GetNext(); int j = 0; while (docList.DocumentId >= 0) { //Entity.DocumentPositionList docList = wifq.WordIndex[j]; Core.SFQL.Parse.DocumentResultPoint drp; drp.pDocumentResult = null; if (j > MinResultCount) { if (oneWordMaxCount > docList.Count) { j++; docList = wifq.WordIndex.GetNext(); continue; } } else { if (oneWordMaxCount < docList.Count) { oneWordMaxCount = docList.Count; } } long score = (long)wifq.FieldRank * (long)wifq.WordRank * (long)wifq.Idf_t * (long)docList.Count * (long)1000000 / ((long)wifq.Sum_d_t * (long)docList.TotalWordsInThisDocument); if (score < 0) { //Overflow score = long.MaxValue - 4000000; } if (upDict == null) { docIdRank.Add(docList.DocumentId, score); } else { if (!upDict.Not) { if (upDict.ContainsKey(docList.DocumentId)) { docIdRank.Add(docList.DocumentId, score); } } else { if (!upDict.ContainsKey(docList.DocumentId)) { docIdRank.Add(docList.DocumentId, score); } } } j++; docList = wifq.WordIndex.GetNext(); } } else { int wordIndexesLen = wordIndexes.Length; WordIndexForQuery fstWifq = wordIndexes[0]; //first word Entity.DocumentPositionList fstDocList = fstWifq.WordIndex.GetNext(); Entity.DocumentPositionList[] docListArr = new Hubble.Core.Entity.DocumentPositionList[wordIndexesLen]; docListArr[0] = fstDocList; while (fstDocList.DocumentId >= 0) { int curWord = 1; int firstDocId = fstDocList.DocumentId; while (curWord < wordIndexesLen) { docListArr[curWord] = wordIndexes[curWord].WordIndex.Get(firstDocId); if (docListArr[curWord].DocumentId < 0) { break; } curWord++; } //While if (curWord >= wordIndexesLen) { //Matched long totalScore = 0; for (int i = 0; i < wordIndexesLen; i++) { WordIndexForQuery wifq = wordIndexes[i]; Entity.DocumentPositionList docList = docListArr[i]; long score = (long)wifq.FieldRank * (long)wifq.WordRank * (long)wifq.Idf_t * (long)docList.Count * (long)1000000 / ((long)wifq.Sum_d_t * (long)docList.TotalWordsInThisDocument); if (score < 0) { //Overflow score = long.MaxValue - 4000000; } totalScore += score; } if (upDict == null) { docIdRank.Add(firstDocId, totalScore); } else { if (!upDict.Not) { if (upDict.ContainsKey(firstDocId)) { docIdRank.Add(firstDocId, totalScore); } } else { if (!upDict.ContainsKey(firstDocId)) { docIdRank.Add(firstDocId, totalScore); } } } } fstDocList = fstWifq.WordIndex.GetNext(); docListArr[0] = fstDocList; } } //Merge score if upDict != null if (upDict != null) { if (!upDict.Not) { foreach (int docid in docIdRank.Keys) { DocumentResult *upDrp; if (upDict.TryGetValue(docid, out upDrp)) { DocumentResult *drpResult; if (docIdRank.TryGetValue(docid, out drpResult)) { drpResult->Score += upDrp->Score; } } } } } DeleteProvider delProvider = _DBProvider.DelProvider; int delCount = delProvider.Filter(docIdRank); if (oneWordOptimize && _QueryParameter.CanLoadPartOfDocs && upDict == null) { docIdRank.RelTotalCount = wordIndexes[0].RelTotalCount - delCount; } else { docIdRank.RelTotalCount = docIdRank.Count; } performanceReport.Stop(); }
unsafe private void Calculate(DocumentResultWhereDictionary upDict, ref DocumentResultWhereDictionary docIdRank, WordIndexForQuery[] wordIndexes) { Array.Sort(wordIndexes); MinResultCount = _DBProvider.Table.GroupByLimit; //Get max word doc list count int maxWordDocListCount = 0; int documentSum = 0; foreach (WordIndexForQuery wifq in wordIndexes) { maxWordDocListCount += wifq.WordIndex.RelDocCount; } if (docIdRank.Count == 0) { if (maxWordDocListCount > DocumentResultWhereDictionary.DefaultSize) { docIdRank = new Core.SFQL.Parse.DocumentResultWhereDictionary(maxWordDocListCount); } } Query.PerformanceReport performanceReport = new Hubble.Core.Query.PerformanceReport("Calculate"); //Merge bool oneWordOptimize = this._QueryParameter.CanLoadPartOfDocs && this._QueryParameter.NoAndExpression && wordIndexes.Length == 1; for (int i = 0; i < wordIndexes.Length; i++) { WordIndexForQuery wifq = wordIndexes[i]; //Entity.DocumentPositionList[] wifqDocBuf = wifq.WordIndex.DocPositionBuf; Entity.DocumentPositionList docList = wifq.WordIndex.GetNext(); int j = 0; int oneWordMaxCount = 0; while (docList.DocumentId >= 0) { //Entity.DocumentPositionList docList = wifq.WordIndex[j]; Core.SFQL.Parse.DocumentResultPoint drp; drp.pDocumentResult = null; if (oneWordOptimize) { if (j > MinResultCount) { if (j > MinResultCount) { if (oneWordMaxCount > docList.Count) { docList = wifq.WordIndex.GetNext(); j++; continue; } } else { if (oneWordMaxCount < docList.Count) { oneWordMaxCount = docList.Count; } } } } long score = (long)wifq.FieldRank * (long)wifq.WordRank * (long)wifq.Idf_t * (long)docList.Count * (long)1000000 / ((long)wifq.Sum_d_t * (long)docList.TotalWordsInThisDocument); if (score < 0) { //Overflow score = long.MaxValue - 4000000; } bool exits = drp.pDocumentResult != null; if (!exits && i > 0) { exits = docIdRank.TryGetValue(docList.DocumentId, out drp); } if (exits) { drp.pDocumentResult->Score += score; } else { bool notInDict = false; if (_NotInDict != null) { if (_NotInDict.ContainsKey(docList.DocumentId)) { notInDict = true; } } if (!notInDict) { if (upDict == null) { docIdRank.Add(docList.DocumentId, score); } else { if (!upDict.Not) { if (upDict.ContainsKey(docList.DocumentId)) { docIdRank.Add(docList.DocumentId, score); } } else { if (!upDict.ContainsKey(docList.DocumentId)) { docIdRank.Add(docList.DocumentId, score); } } } } } docList = wifq.WordIndex.GetNext(); j++; } } //Merge score if upDict != null if (upDict != null) { if (!upDict.Not) { foreach (int docid in docIdRank.Keys) { DocumentResult *upDrp; if (upDict.TryGetValue(docid, out upDrp)) { DocumentResult *drpResult; if (docIdRank.TryGetValue(docid, out drpResult)) { drpResult->Score += upDrp->Score; } } } } } documentSum += docIdRank.Count; if (documentSum > _TotalDocuments) { documentSum = _TotalDocuments; } DeleteProvider delProvider = _DBProvider.DelProvider; int deleteCount = delProvider.Filter(docIdRank); if (_QueryParameter.CanLoadPartOfDocs && upDict == null) { if (docIdRank.Count < wordIndexes[wordIndexes.Length - 1].RelTotalCount) { if (wordIndexes.Length > 1) { if (wordIndexes[wordIndexes.Length - 1].RelTotalCount > _DBProvider.MaxReturnCount) { documentSum += wordIndexes[wordIndexes.Length - 1].RelTotalCount - _DBProvider.MaxReturnCount; } if (documentSum > _TotalDocuments) { documentSum = _TotalDocuments; } docIdRank.RelTotalCount = documentSum; } else { docIdRank.RelTotalCount = wordIndexes[wordIndexes.Length - 1].RelTotalCount; } } } docIdRank.RelTotalCount -= deleteCount; performanceReport.Stop(); }