unsafe public DocumentResultWhereDictionary OrMerge(DocumentResultWhereDictionary fst, DocumentResultWhereDictionary sec) { if (fst == null) { return(sec); } if (sec == null) { return(fst); } foreach (int docid in sec.Keys) { if (fst.ContainsKey(docid)) { fst.Update(docid, fst[docid].Score + sec[docid].Score); } else { fst.Add(docid, sec[docid]); } } return(fst); }
/// <summary> /// And merge when the Not property is false both of fst and sec /// </summary> /// <param name="and"></param> /// <param name="or"></param> /// <returns></returns> unsafe public Core.SFQL.Parse.DocumentResultWhereDictionary AndMergeDict(Core.SFQL.Parse.DocumentResultWhereDictionary fst, Core.SFQL.Parse.DocumentResultWhereDictionary sec) { if (fst == null) { return(new Core.SFQL.Parse.DocumentResultWhereDictionary()); } if (sec == null) { return(new DocumentResultWhereDictionary()); } Core.SFQL.Parse.DocumentResultWhereDictionary src; Core.SFQL.Parse.DocumentResultWhereDictionary dest; if (fst.Count > sec.Count) { src = sec; dest = fst; } else { src = fst; dest = sec; } Core.SFQL.Parse.DocumentResultWhereDictionary result = new DocumentResultWhereDictionary(); result.Not = dest.Not; foreach (Core.SFQL.Parse.DocumentResultPoint drp in src.Values) { Query.DocumentResult *dr; if (dest.TryGetValue(drp.pDocumentResult->DocId, out dr)) { dr->Score += drp.pDocumentResult->Score; if (dr->PayloadData == null && drp.pDocumentResult->PayloadData != null) { dr->PayloadData = drp.pDocumentResult->PayloadData; } result.Add(drp.pDocumentResult->DocId, *dr); } } return(result); }
public unsafe void CalculateOptimize(Hubble.Core.SFQL.Parse.DocumentResultWhereDictionary upDict, ref Hubble.Core.SFQL.Parse.DocumentResultWhereDictionary docIdRank) { Init(); Argument.DBProvider.SharedPayloadProvider.EnterPayloladShareLock(); try { if (Argument.IsOrderByScoreDesc()) { CalculateOrderByScore(upDict, ref docIdRank); } else { CalculateNormal(upDict, ref docIdRank); } } finally { Argument.DBProvider.SharedPayloadProvider.LeavePayloadShareLock(); } }
public Core.SFQL.Parse.DocumentResultWhereDictionary GetDocumentResults(int end, string where, string orderby) { string sql; //if (end >= 0) //{ // sql = string.Format("select top {0} ", end + 1); //} //else //{ sql = "select "; //} if (string.IsNullOrEmpty(where)) { if (DocIdReplaceField == null) { sql += string.Format(" docid from {0} ", Table.DBTableName); } else { sql += string.Format(" {0} from {1} ", DocIdReplaceField, Table.DBTableName); } } else { if (DocIdReplaceField == null) { sql += string.Format(" docid from {0} where {1}", Table.DBTableName, where); } else { sql += string.Format(" {0} from {1} where {2}", DocIdReplaceField, Table.DBTableName, where); } } if (!string.IsNullOrEmpty(orderby)) { sql += " order by " + orderby; } if (end > 0) { sql += " limit " + end + ";"; } Core.SFQL.Parse.DocumentResultWhereDictionary result = new Core.SFQL.Parse.DocumentResultWhereDictionary(); using (SQLiteDataProvider sqlData = new SQLiteDataProvider()) { sqlData.Connect(Table.ConnectionString); foreach (System.Data.DataRow row in sqlData.QuerySql(sql).Tables[0].Rows) { int docId; if (DocIdReplaceField == null) { docId = int.Parse(row[0].ToString()); } else { docId = DBProvider.GetDocIdFromDocIdReplaceFieldValue(long.Parse(row[DocIdReplaceField].ToString())); if (docId < 0) { continue; } } result.Add(docId, new Hubble.Core.Query.DocumentResult(docId)); } System.Data.DataSet ds; if (string.IsNullOrEmpty(where)) { ds = sqlData.QuerySql(string.Format("select count(*) cnt from {0}", Table.DBTableName)); } else { ds = sqlData.QuerySql(string.Format("select count(*) cnt from {0} where {1}", Table.DBTableName, where)); } result.RelTotalCount = int.Parse(ds.Tables[0].Rows[0][0].ToString()); } return(result); }
/// <summary> /// And merge when the Not property is ture of fst or sec. /// </summary> /// <param name="fst"></param> /// <param name="sec"></param> /// <returns></returns> public DocumentResultWhereDictionary AndMergeForNot(DocumentResultWhereDictionary fst, DocumentResultWhereDictionary sec) { if (fst == null) { return(sec); } if (sec == null) { return(fst); } if (fst.Count > sec.Count) { //Swap input dictionaries //Let fst count less than sec DocumentResultWhereDictionary temp; temp = fst; fst = sec; sec = temp; } if (fst.Not && sec.Not) { foreach (int key in fst.Keys) { if (!sec.ContainsKey(key)) { sec.Add(key, fst[key]); } } sec.RelTotalCount = sec.Count; return(sec); } else { DocumentResultWhereDictionary yes; DocumentResultWhereDictionary not; if (fst.Not) { yes = sec; not = fst; } else { yes = fst; not = sec; } foreach (int key in not.Keys) { yes.Remove(key); } yes.RelTotalCount = yes.Count; return(yes); } }
private unsafe void CalculateNormal(Hubble.Core.SFQL.Parse.DocumentResultWhereDictionary upDict, ref Hubble.Core.SFQL.Parse.DocumentResultWhereDictionary docIdRank) { DBProvider dBProvider = Argument.DBProvider; bool needGroupBy = Argument.NeedGroupBy; bool needFilterUntokenizedConditions = this.Argument.NeedFilterUntokenizedConditions; ExpressionTree untokenizedTree = this.Argument.UntokenizedTreeOnRoot; Query.DocumentResult documentResult; Query.DocumentResult *drp = &documentResult; //vars for delete bool haveRecordsDeleted = dBProvider.DelProvider.Count > 0; int[] delDocs = null; int curDelIndex = 0; int curDelDocid = 0; Field[] orderByFields; DocId2LongComparer comparer = DocId2LongComparer.Generate( dBProvider, Argument.OrderBys, out orderByFields); if (haveRecordsDeleted) { delDocs = dBProvider.DelProvider.DelDocs; curDelDocid = delDocs[curDelIndex]; } double ratio = 1; WordIndexForQuery[] wordIndexes = WordIndexes; if (wordIndexes.Length > 1) { ratio = (double)2 / (double)(wordIndexes.Length - 1); } int wordIndexesLen = wordIndexes.Length; WordIndexForQuery fstWifq = wordIndexes[0]; //first word OriginalDocumentPositionList fstODPL = new OriginalDocumentPositionList(); //Entity.DocumentPositionList fstDocList = fstWifq.WordIndex.GetNext(); Entity.DocumentPositionList[] docListArr = new Hubble.Core.Entity.DocumentPositionList[wordIndexesLen]; //docListArr[0] = fstDocList; //fstWifq.WordIndex.GetNextOriginal(ref fstODPL); //fstODPL.ToDocumentPositionList(ref docListArr[0]); GetNext(fstWifq, ref fstODPL, ref docListArr[0]); OriginalDocumentPositionList odpl = new OriginalDocumentPositionList(); Entity.DocumentPositionList lastDocList = new Hubble.Core.Entity.DocumentPositionList(); int top; //calculate top //If less than 100, set to 100 if (this.Argument.End >= 0) { top = (1 + this.Argument.End / 100) * 100; if (top <= 0) { top = 100; } //if (this.Argument.End * 2 > top) //{ // top *= 2; //} } else { top = int.MaxValue; } PriorQueue <Docid2Long> priorQueue = new PriorQueue <Docid2Long>(top, comparer); int rows = 0; Docid2Long cur = new Docid2Long(); Docid2Long last = new Docid2Long(); last.DocId = -1; while (fstODPL.DocumentId >= 0) { int curWord = 1; int firstDocId = fstODPL.DocumentId; while (curWord < wordIndexesLen) { //docListArr[curWord] = wordIndexes[curWord].WordIndex.Get(firstDocId); wordIndexes[curWord].WordIndex.GetNextOriginalWithDocId(ref odpl, firstDocId); odpl.ToDocumentPositionList(ref docListArr[curWord]); if (docListArr[curWord].DocumentId < 0) { if ((wordIndexes[curWord].Flags & WordInfo.Flag.Or) != 0) { curWord++; continue; } else { break; } } curWord++; } //While if (curWord >= wordIndexesLen) { //Process untokenized conditions. //If is not matched, get the next one. if (needFilterUntokenizedConditions) { int docId = firstDocId; drp->DocId = docId; drp->PayloadData = dBProvider.GetPayloadDataWithShareLock(docId); if (!ParseWhere.GetComparisionExpressionValue(dBProvider, drp, untokenizedTree)) { GetNext(fstWifq, ref fstODPL, ref docListArr[0]); continue; } } //Matched //Caculate score long totalScore = 0; lastDocList.Count = 0; lastDocList.FirstPosition = 0; for (int i = 0; i < wordIndexesLen; i++) { WordIndexForQuery wifq = wordIndexes[i]; if (wifq.WordIndex.Count == 0) { //a^5000^0 b^5000^2^1 //if has a and hasn't b but b can be or //2010-09-30 eaglet continue; } Entity.DocumentPositionList docList = docListArr[i]; long score = (long)wifq.FieldRank * (long)wifq.WordRank * (long)wifq.Idf_t * (long)docList.Count * (long)1000000 / ((long)wifq.Sum_d_t * (long)docList.TotalWordsInThisDocument); if (score < 0) { //Overflow score = long.MaxValue - 4000000; } double delta = 1; if (i > 0) { //Calculate with position double queryPositionDelta = wifq.FirstPosition - wordIndexes[i - 1].FirstPosition; double positionDelta = docList.FirstPosition - lastDocList.FirstPosition; delta = Math.Abs(queryPositionDelta - positionDelta); if (delta < 0.031) { delta = 0.031; } else if (delta <= 1.1) { delta = 0.5; } else if (delta <= 2.1) { delta = 1; } delta = Math.Pow((1 / delta), ratio) * docList.Count * lastDocList.Count / (double)(wifq.QueryCount * wordIndexes[i - 1].QueryCount); } lastDocList.Count = docList.Count; lastDocList.FirstPosition = docList.FirstPosition; totalScore += (long)(score * delta); } //End for cycle if (haveRecordsDeleted) { if (curDelIndex < delDocs.Length) { //If docid deleted, get next if (firstDocId == curDelDocid) { GetNext(fstWifq, ref fstODPL, ref docListArr[0]); continue; } else if (firstDocId > curDelDocid) { while (curDelIndex < delDocs.Length && curDelDocid < firstDocId) { curDelIndex++; if (curDelIndex >= delDocs.Length) { haveRecordsDeleted = false; break; } curDelDocid = delDocs[curDelIndex]; } if (curDelIndex < delDocs.Length) { if (firstDocId == curDelDocid) { GetNext(fstWifq, ref fstODPL, ref docListArr[0]); continue; } } } } } if (needGroupBy) { docIdRank.AddToGroupByCollection(firstDocId); } if (_HasRankField) { int rank = dBProvider.SharedPayloadProvider.GetPayloadRank(firstDocId); totalScore *= rank; if (totalScore < 0) { totalScore = long.MaxValue - 4000000; } } if (rows >= top) { rows++; cur.DocId = firstDocId; Docid2Long.Generate(ref cur, dBProvider, orderByFields, totalScore); if (comparer.Compare(last, cur) > 0) { priorQueue.Add(cur); last = priorQueue.Last; } } else { cur.DocId = firstDocId; Docid2Long.Generate(ref cur, dBProvider, orderByFields, totalScore); priorQueue.Add(cur); rows++; if (rows == top) { last = priorQueue.Last; } } //docIdRank.Add(firstDocId, totalScore); }//if (curWord >= wordIndexesLen) GetNext(fstWifq, ref fstODPL, ref docListArr[0]); //fstWifq.WordIndex.GetNextOriginal(ref fstODPL); //fstODPL.ToDocumentPositionList(ref docListArr[0]); } docIdRank.RelTotalCount = rows; foreach (Docid2Long docid2Long in priorQueue.ToArray()) { long score = comparer.GetScore(docid2Long); if (score < 0) { //Overflow score = long.MaxValue - 4000000; } docIdRank.Add(docid2Long.DocId, new DocumentResult(docid2Long.DocId, score)); } docIdRank.Sorted = true; }