コード例 #1
0
        unsafe public DocumentResultWhereDictionary OrMerge(DocumentResultWhereDictionary fst, DocumentResultWhereDictionary sec)
        {
            if (fst == null)
            {
                return(sec);
            }

            if (sec == null)
            {
                return(fst);
            }

            foreach (int docid in sec.Keys)
            {
                if (fst.ContainsKey(docid))
                {
                    fst.Update(docid, fst[docid].Score + sec[docid].Score);
                }
                else
                {
                    fst.Add(docid, sec[docid]);
                }
            }

            return(fst);
        }
コード例 #2
0
        /// <summary>
        /// And merge when the Not property is false both of fst and sec
        /// </summary>
        /// <param name="and"></param>
        /// <param name="or"></param>
        /// <returns></returns>
        unsafe public Core.SFQL.Parse.DocumentResultWhereDictionary AndMergeDict(Core.SFQL.Parse.DocumentResultWhereDictionary fst, Core.SFQL.Parse.DocumentResultWhereDictionary sec)
        {
            if (fst == null)
            {
                return(new Core.SFQL.Parse.DocumentResultWhereDictionary());
            }

            if (sec == null)
            {
                return(new DocumentResultWhereDictionary());
            }

            Core.SFQL.Parse.DocumentResultWhereDictionary src;
            Core.SFQL.Parse.DocumentResultWhereDictionary dest;

            if (fst.Count > sec.Count)
            {
                src  = sec;
                dest = fst;
            }
            else
            {
                src  = fst;
                dest = sec;
            }


            Core.SFQL.Parse.DocumentResultWhereDictionary result = new DocumentResultWhereDictionary();
            result.Not = dest.Not;

            foreach (Core.SFQL.Parse.DocumentResultPoint drp in src.Values)
            {
                Query.DocumentResult *dr;
                if (dest.TryGetValue(drp.pDocumentResult->DocId, out dr))
                {
                    dr->Score += drp.pDocumentResult->Score;
                    if (dr->PayloadData == null && drp.pDocumentResult->PayloadData != null)
                    {
                        dr->PayloadData = drp.pDocumentResult->PayloadData;
                    }

                    result.Add(drp.pDocumentResult->DocId, *dr);
                }
            }

            return(result);
        }
コード例 #3
0
        public unsafe void CalculateOptimize(Hubble.Core.SFQL.Parse.DocumentResultWhereDictionary upDict, ref Hubble.Core.SFQL.Parse.DocumentResultWhereDictionary docIdRank)
        {
            Init();

            Argument.DBProvider.SharedPayloadProvider.EnterPayloladShareLock();

            try
            {
                if (Argument.IsOrderByScoreDesc())
                {
                    CalculateOrderByScore(upDict, ref docIdRank);
                }
                else
                {
                    CalculateNormal(upDict, ref docIdRank);
                }
            }
            finally
            {
                Argument.DBProvider.SharedPayloadProvider.LeavePayloadShareLock();
            }
        }
コード例 #4
0
        public Core.SFQL.Parse.DocumentResultWhereDictionary GetDocumentResults(int end, string where, string orderby)
        {
            string sql;

            //if (end >= 0)
            //{
            //    sql = string.Format("select top {0} ", end + 1);
            //}
            //else
            //{
            sql = "select ";
            //}

            if (string.IsNullOrEmpty(where))
            {
                if (DocIdReplaceField == null)
                {
                    sql += string.Format(" docid from {0} ", Table.DBTableName);
                }
                else
                {
                    sql += string.Format(" {0} from {1} ", DocIdReplaceField, Table.DBTableName);
                }
            }
            else
            {
                if (DocIdReplaceField == null)
                {
                    sql += string.Format(" docid from {0} where {1}", Table.DBTableName, where);
                }
                else
                {
                    sql += string.Format(" {0} from {1} where {2}", DocIdReplaceField, Table.DBTableName, where);
                }
            }

            if (!string.IsNullOrEmpty(orderby))
            {
                sql += " order by " + orderby;
            }

            if (end > 0)
            {
                sql += " limit " + end + ";";
            }

            Core.SFQL.Parse.DocumentResultWhereDictionary result = new Core.SFQL.Parse.DocumentResultWhereDictionary();

            using (SQLiteDataProvider sqlData = new SQLiteDataProvider())
            {
                sqlData.Connect(Table.ConnectionString);
                foreach (System.Data.DataRow row in sqlData.QuerySql(sql).Tables[0].Rows)
                {
                    int docId;
                    if (DocIdReplaceField == null)
                    {
                        docId = int.Parse(row[0].ToString());
                    }
                    else
                    {
                        docId = DBProvider.GetDocIdFromDocIdReplaceFieldValue(long.Parse(row[DocIdReplaceField].ToString()));

                        if (docId < 0)
                        {
                            continue;
                        }
                    }

                    result.Add(docId, new Hubble.Core.Query.DocumentResult(docId));
                }

                System.Data.DataSet ds;

                if (string.IsNullOrEmpty(where))
                {
                    ds = sqlData.QuerySql(string.Format("select count(*) cnt from {0}",
                                                        Table.DBTableName));
                }
                else
                {
                    ds = sqlData.QuerySql(string.Format("select count(*) cnt from {0} where {1}",
                                                        Table.DBTableName, where));
                }

                result.RelTotalCount = int.Parse(ds.Tables[0].Rows[0][0].ToString());
            }

            return(result);
        }
コード例 #5
0
        /// <summary>
        /// And merge when the Not property is ture of fst or sec.
        /// </summary>
        /// <param name="fst"></param>
        /// <param name="sec"></param>
        /// <returns></returns>
        public DocumentResultWhereDictionary AndMergeForNot(DocumentResultWhereDictionary fst, DocumentResultWhereDictionary sec)
        {
            if (fst == null)
            {
                return(sec);
            }

            if (sec == null)
            {
                return(fst);
            }

            if (fst.Count > sec.Count)
            {
                //Swap input dictionaries
                //Let fst count less than sec

                DocumentResultWhereDictionary temp;

                temp = fst;
                fst  = sec;
                sec  = temp;
            }

            if (fst.Not && sec.Not)
            {
                foreach (int key in fst.Keys)
                {
                    if (!sec.ContainsKey(key))
                    {
                        sec.Add(key, fst[key]);
                    }
                }

                sec.RelTotalCount = sec.Count;
                return(sec);
            }
            else
            {
                DocumentResultWhereDictionary yes;
                DocumentResultWhereDictionary not;

                if (fst.Not)
                {
                    yes = sec;
                    not = fst;
                }
                else
                {
                    yes = fst;
                    not = sec;
                }

                foreach (int key in not.Keys)
                {
                    yes.Remove(key);
                }

                yes.RelTotalCount = yes.Count;
                return(yes);
            }
        }
コード例 #6
0
        private unsafe void CalculateNormal(Hubble.Core.SFQL.Parse.DocumentResultWhereDictionary upDict, ref Hubble.Core.SFQL.Parse.DocumentResultWhereDictionary docIdRank)
        {
            DBProvider dBProvider  = Argument.DBProvider;
            bool       needGroupBy = Argument.NeedGroupBy;

            bool           needFilterUntokenizedConditions = this.Argument.NeedFilterUntokenizedConditions;
            ExpressionTree untokenizedTree = this.Argument.UntokenizedTreeOnRoot;

            Query.DocumentResult  documentResult;
            Query.DocumentResult *drp = &documentResult;

            //vars for delete
            bool haveRecordsDeleted = dBProvider.DelProvider.Count > 0;

            int[] delDocs     = null;
            int   curDelIndex = 0;
            int   curDelDocid = 0;

            Field[]            orderByFields;
            DocId2LongComparer comparer = DocId2LongComparer.Generate(
                dBProvider, Argument.OrderBys, out orderByFields);

            if (haveRecordsDeleted)
            {
                delDocs     = dBProvider.DelProvider.DelDocs;
                curDelDocid = delDocs[curDelIndex];
            }


            double ratio = 1;

            WordIndexForQuery[] wordIndexes = WordIndexes;

            if (wordIndexes.Length > 1)
            {
                ratio = (double)2 / (double)(wordIndexes.Length - 1);
            }

            int wordIndexesLen = wordIndexes.Length;

            WordIndexForQuery fstWifq = wordIndexes[0]; //first word

            OriginalDocumentPositionList fstODPL = new OriginalDocumentPositionList();

            //Entity.DocumentPositionList fstDocList = fstWifq.WordIndex.GetNext();

            Entity.DocumentPositionList[] docListArr = new Hubble.Core.Entity.DocumentPositionList[wordIndexesLen];

            //docListArr[0] = fstDocList;
            //fstWifq.WordIndex.GetNextOriginal(ref fstODPL);
            //fstODPL.ToDocumentPositionList(ref docListArr[0]);

            GetNext(fstWifq, ref fstODPL, ref docListArr[0]);

            OriginalDocumentPositionList odpl = new OriginalDocumentPositionList();

            Entity.DocumentPositionList lastDocList
                = new Hubble.Core.Entity.DocumentPositionList();
            int top;

            //calculate top
            //If less than 100, set to 100
            if (this.Argument.End >= 0)
            {
                top = (1 + this.Argument.End / 100) * 100;

                if (top <= 0)
                {
                    top = 100;
                }

                //if (this.Argument.End * 2 > top)
                //{
                //    top *= 2;
                //}
            }
            else
            {
                top = int.MaxValue;
            }

            PriorQueue <Docid2Long> priorQueue = new PriorQueue <Docid2Long>(top, comparer);

            int rows = 0;

            Docid2Long cur  = new Docid2Long();
            Docid2Long last = new Docid2Long();

            last.DocId = -1;

            while (fstODPL.DocumentId >= 0)
            {
                int curWord    = 1;
                int firstDocId = fstODPL.DocumentId;

                while (curWord < wordIndexesLen)
                {
                    //docListArr[curWord] = wordIndexes[curWord].WordIndex.Get(firstDocId);

                    wordIndexes[curWord].WordIndex.GetNextOriginalWithDocId(ref odpl, firstDocId);
                    odpl.ToDocumentPositionList(ref docListArr[curWord]);

                    if (docListArr[curWord].DocumentId < 0)
                    {
                        if ((wordIndexes[curWord].Flags & WordInfo.Flag.Or) != 0)
                        {
                            curWord++;
                            continue;
                        }
                        else
                        {
                            break;
                        }
                    }

                    curWord++;
                } //While

                if (curWord >= wordIndexesLen)
                {
                    //Process untokenized conditions.
                    //If is not matched, get the next one.
                    if (needFilterUntokenizedConditions)
                    {
                        int docId = firstDocId;
                        drp->DocId       = docId;
                        drp->PayloadData = dBProvider.GetPayloadDataWithShareLock(docId);
                        if (!ParseWhere.GetComparisionExpressionValue(dBProvider, drp,
                                                                      untokenizedTree))
                        {
                            GetNext(fstWifq, ref fstODPL, ref docListArr[0]);
                            continue;
                        }
                    }

                    //Matched
                    //Caculate score

                    long totalScore = 0;
                    lastDocList.Count         = 0;
                    lastDocList.FirstPosition = 0;

                    for (int i = 0; i < wordIndexesLen; i++)
                    {
                        WordIndexForQuery wifq = wordIndexes[i];

                        if (wifq.WordIndex.Count == 0)
                        {
                            //a^5000^0 b^5000^2^1
                            //if has a and hasn't b but b can be or
                            //2010-09-30 eaglet
                            continue;
                        }

                        Entity.DocumentPositionList docList = docListArr[i];


                        long score = (long)wifq.FieldRank * (long)wifq.WordRank * (long)wifq.Idf_t * (long)docList.Count * (long)1000000 / ((long)wifq.Sum_d_t * (long)docList.TotalWordsInThisDocument);

                        if (score < 0)
                        {
                            //Overflow
                            score = long.MaxValue - 4000000;
                        }

                        double delta = 1;

                        if (i > 0)
                        {
                            //Calculate with position
                            double queryPositionDelta = wifq.FirstPosition - wordIndexes[i - 1].FirstPosition;
                            double positionDelta      = docList.FirstPosition - lastDocList.FirstPosition;

                            delta = Math.Abs(queryPositionDelta - positionDelta);

                            if (delta < 0.031)
                            {
                                delta = 0.031;
                            }
                            else if (delta <= 1.1)
                            {
                                delta = 0.5;
                            }
                            else if (delta <= 2.1)
                            {
                                delta = 1;
                            }

                            delta = Math.Pow((1 / delta), ratio) * docList.Count * lastDocList.Count /
                                    (double)(wifq.QueryCount * wordIndexes[i - 1].QueryCount);
                        }

                        lastDocList.Count         = docList.Count;
                        lastDocList.FirstPosition = docList.FirstPosition;

                        totalScore += (long)(score * delta);
                    } //End for cycle

                    if (haveRecordsDeleted)
                    {
                        if (curDelIndex < delDocs.Length)
                        {
                            //If docid deleted, get next
                            if (firstDocId == curDelDocid)
                            {
                                GetNext(fstWifq, ref fstODPL, ref docListArr[0]);
                                continue;
                            }
                            else if (firstDocId > curDelDocid)
                            {
                                while (curDelIndex < delDocs.Length && curDelDocid < firstDocId)
                                {
                                    curDelIndex++;

                                    if (curDelIndex >= delDocs.Length)
                                    {
                                        haveRecordsDeleted = false;
                                        break;
                                    }

                                    curDelDocid = delDocs[curDelIndex];
                                }

                                if (curDelIndex < delDocs.Length)
                                {
                                    if (firstDocId == curDelDocid)
                                    {
                                        GetNext(fstWifq, ref fstODPL, ref docListArr[0]);
                                        continue;
                                    }
                                }
                            }
                        }
                    }

                    if (needGroupBy)
                    {
                        docIdRank.AddToGroupByCollection(firstDocId);
                    }

                    if (_HasRankField)
                    {
                        int rank = dBProvider.SharedPayloadProvider.GetPayloadRank(firstDocId);
                        totalScore *= rank;
                        if (totalScore < 0)
                        {
                            totalScore = long.MaxValue - 4000000;
                        }
                    }

                    if (rows >= top)
                    {
                        rows++;

                        cur.DocId = firstDocId;

                        Docid2Long.Generate(ref cur, dBProvider, orderByFields, totalScore);

                        if (comparer.Compare(last, cur) > 0)
                        {
                            priorQueue.Add(cur);
                            last = priorQueue.Last;
                        }
                    }
                    else
                    {
                        cur.DocId = firstDocId;

                        Docid2Long.Generate(ref cur, dBProvider, orderByFields, totalScore);
                        priorQueue.Add(cur);

                        rows++;

                        if (rows == top)
                        {
                            last = priorQueue.Last;
                        }
                    }

                    //docIdRank.Add(firstDocId, totalScore);
                }//if (curWord >= wordIndexesLen)

                GetNext(fstWifq, ref fstODPL, ref docListArr[0]);

                //fstWifq.WordIndex.GetNextOriginal(ref fstODPL);
                //fstODPL.ToDocumentPositionList(ref docListArr[0]);
            }

            docIdRank.RelTotalCount = rows;

            foreach (Docid2Long docid2Long in priorQueue.ToArray())
            {
                long score = comparer.GetScore(docid2Long);

                if (score < 0)
                {
                    //Overflow
                    score = long.MaxValue - 4000000;
                }

                docIdRank.Add(docid2Long.DocId, new DocumentResult(docid2Long.DocId, score));
            }

            docIdRank.Sorted = true;
        }