unsafe public DocumentResultForSort(DocumentResult *pDocResult) : this(pDocResult->DocId, pDocResult->Score, pDocResult->PayloadData) { }
unsafe private void CalculateWithPosition(Core.SFQL.Parse.DocumentResultWhereDictionary upDict, ref Core.SFQL.Parse.DocumentResultWhereDictionary docIdRank, WordIndexForQuery[] wordIndexes) { Array.Sort(wordIndexes); MinResultCount = _DBProvider.Table.GroupByLimit; double ratio = 1; if (wordIndexes.Length > 1) { ratio = (double)2 / (double)(wordIndexes.Length - 1); } //Get max word doc list count int maxWordDocListCount = 0; int documentSum = 0; foreach (WordIndexForQuery wifq in wordIndexes) { maxWordDocListCount += wifq.WordIndex.Count; } maxWordDocListCount += maxWordDocListCount / 2; if (maxWordDocListCount > 1024 * 1024) { maxWordDocListCount = 1024 * 1024; } if (docIdRank.Count == 0) { if (maxWordDocListCount > DocumentResultWhereDictionary.DefaultSize) { docIdRank = new Core.SFQL.Parse.DocumentResultWhereDictionary(maxWordDocListCount); } } Query.PerformanceReport performanceReport = new Hubble.Core.Query.PerformanceReport("Calculate"); //Merge bool oneWordOptimize = this._QueryParameter.CanLoadPartOfDocs && this._QueryParameter.NoAndExpression && wordIndexes.Length == 1; for (int i = 0; i < wordIndexes.Length; i++) { WordIndexForQuery wifq = wordIndexes[i]; //Entity.DocumentPositionList[] wifqDocBuf = wifq.WordIndex.DocPositionBuf; Entity.DocumentPositionList docList = wifq.WordIndex.GetNext(); int j = 0; int oneWordMaxCount = 0; while (docList.DocumentId >= 0) { Core.SFQL.Parse.DocumentResultPoint drp; drp.pDocumentResult = null; if (oneWordOptimize) { if (j > MinResultCount) { if (oneWordMaxCount > docList.Count) { docList = wifq.WordIndex.GetNext(); j++; continue; } } else { if (oneWordMaxCount < docList.Count) { oneWordMaxCount = docList.Count; } } } if (j > wifq.RelTotalCount) { break; } long score = (long)wifq.FieldRank * (long)wifq.WordRank * (long)wifq.Idf_t * (long)docList.Count * (long)1000000 / ((long)wifq.Sum_d_t * (long)docList.TotalWordsInThisDocument); if (score < 0) { //Overflow score = long.MaxValue - 4000000; } bool exits = drp.pDocumentResult != null; if (!exits && i > 0) { exits = docIdRank.TryGetValue(docList.DocumentId, out drp); } if (exits) { drp.pDocumentResult->Score += score; double queryPositionDelta = wifq.FirstPosition - drp.pDocumentResult->LastWordIndexFirstPosition; double positionDelta = docList.FirstPosition - drp.pDocumentResult->LastPosition; double delta = Math.Abs(queryPositionDelta - positionDelta); if (delta < 0.031) { delta = 0.031; } else if (delta <= 1.1) { delta = 0.5; } else if (delta <= 2.1) { delta = 1; } delta = Math.Pow((1 / delta), ratio) * docList.Count * drp.pDocumentResult->LastCount / (double)(wifq.QueryCount * drp.pDocumentResult->LastWordIndexQueryCount); //some words missed //if (i - drp.pDocumentResult->LastIndex > 1) //{ // int sumWordRank = 10; // for (int k = drp.pDocumentResult->LastIndex + 1; k < i; k++) // { // sumWordRank += wordIndexes[k].WordRank; // } // delta /= (double)sumWordRank; //} drp.pDocumentResult->Score = (long)(drp.pDocumentResult->Score * delta); drp.pDocumentResult->LastIndex = (UInt16)i; drp.pDocumentResult->LastPosition = docList.FirstPosition; drp.pDocumentResult->LastCount = (UInt16)docList.Count; drp.pDocumentResult->LastWordIndexFirstPosition = (UInt16)wifq.FirstPosition; } else { //some words missed //if (i > 0) //{ // int sumWordRank = 10; // for (int k = 0; k < i; k++) // { // sumWordRank += wordIndexes[k].WordRank; // } // double delta = 1 / (double)sumWordRank; // score = (long)(score * delta); //} bool notInDict = false; if (_NotInDict != null) { if (_NotInDict.ContainsKey(docList.DocumentId)) { notInDict = true; } } if (!notInDict) { if (upDict == null) { DocumentResult docResult = new DocumentResult(docList.DocumentId, score, wifq.FirstPosition, wifq.QueryCount, docList.FirstPosition, docList.Count, i); docIdRank.Add(docList.DocumentId, docResult); } else { if (!upDict.Not) { if (upDict.ContainsKey(docList.DocumentId)) { DocumentResult docResult = new DocumentResult(docList.DocumentId, score, wifq.FirstPosition, wifq.QueryCount, docList.FirstPosition, docList.Count, i); docIdRank.Add(docList.DocumentId, docResult); } } else { if (!upDict.ContainsKey(docList.DocumentId)) { DocumentResult docResult = new DocumentResult(docList.DocumentId, score, wifq.FirstPosition, wifq.QueryCount, docList.FirstPosition, docList.Count, i); docIdRank.Add(docList.DocumentId, docResult); } } } } } docList = wifq.WordIndex.GetNext(); j++; if (j > wifq.WordIndex.Count) { break; } } } //Merge score if upDict != null if (upDict != null) { if (!upDict.Not) { foreach (int docid in docIdRank.Keys) { DocumentResult *upDrp; if (upDict.TryGetValue(docid, out upDrp)) { DocumentResult *drpResult; if (docIdRank.TryGetValue(docid, out drpResult)) { drpResult->Score += upDrp->Score; } } } } } if (wordIndexes.Length > 1) { List <DocumentResult> reduceDocs = new List <DocumentResult>(docIdRank.Count); int lstIndex = wordIndexes.Length - 1; foreach (Core.SFQL.Parse.DocumentResultPoint drp in docIdRank.Values) { DocumentResult *dr = drp.pDocumentResult; //DocumentResult* dr1 = drp.pDocumentResult; if (dr->LastIndex != lstIndex) { int sumWordRank = 10; for (int k = dr->LastIndex + 1; k <= lstIndex; k++) { sumWordRank += wordIndexes[k].WordRank; } double delta = 1 / (double)sumWordRank; dr->Score = (long)((double)dr->Score * delta); } if (dr->Score < 0) { dr->Score = long.MaxValue / 10; } } } performanceReport.Stop(); documentSum += docIdRank.Count; if (documentSum > _TotalDocuments) { documentSum = _TotalDocuments; } DeleteProvider delProvider = _DBProvider.DelProvider; int deleteCount = delProvider.Filter(docIdRank); if (_QueryParameter.CanLoadPartOfDocs && upDict == null) { if (docIdRank.Count < wordIndexes[wordIndexes.Length - 1].RelTotalCount) { if (wordIndexes.Length > 1) { if (wordIndexes[wordIndexes.Length - 1].RelTotalCount > _DBProvider.MaxReturnCount) { documentSum += wordIndexes[wordIndexes.Length - 1].RelTotalCount - _DBProvider.MaxReturnCount; } if (documentSum > _TotalDocuments) { documentSum = _TotalDocuments; } docIdRank.RelTotalCount = documentSum; } else { docIdRank.RelTotalCount = wordIndexes[wordIndexes.Length - 1].RelTotalCount; } } } docIdRank.RelTotalCount -= deleteCount; }