Example #1
0
        internal void Index(IList <Document> docs, int fieldIndex)
        {
            int   orginalFieldIndex = fieldIndex;
            Field field             = _DBProvider.GetField(this.FieldName);

            foreach (Document doc in docs)
            {
                if (doc.FieldValues[fieldIndex].FieldName.Trim().ToLower() != FieldName.Trim().ToLower())
                {
                    //Field index does not match, find new field index again.
                    //Happen as input different field order of rows
                    fieldIndex = 0;

                    foreach (FieldValue fValue in doc.FieldValues)
                    {
                        if (doc.FieldValues[fieldIndex].FieldName.Trim().ToLower() == FieldName.Trim().ToLower())
                        {
                            break;
                        }

                        fieldIndex++;
                    }

                    if (fieldIndex >= doc.FieldValues.Count)
                    {
                        //no data of this field in this row
                        fieldIndex = orginalFieldIndex;
                        continue;
                    }
                }

                this.Index(doc.FieldValues[fieldIndex].Value, doc.DocId, field.GetAnalyzer());
            }
        }
Example #2
0
        unsafe private void TestFillPayloadRank(string tableName)
        {
            OutputMessage("TestGetDocIdReplaceFieldValue");

            AddColumn("Times");
            AddColumn("Elapse(ms)");
            AddColumn("ElapseOneTime(ms)");

            Data.DBProvider dbProvider = Data.DBProvider.GetDBProvider(Parameters[0], false);

            if (dbProvider == null)
            {
                throw new DataException(string.Format("Table name {0} does not exist!", Parameters[0]));
            }

            Random rand      = new Random();
            int    count     = 1000000;
            int    lastDocId = dbProvider.LastDocId;

            OriginalDocumentPositionList[] payloads = new OriginalDocumentPositionList[count];

            for (int i = 0; i < payloads.Length; i++)
            {
                payloads[i] = new OriginalDocumentPositionList(i * 10);
                payloads[i].CountAndWordCount = 1;
            }

            payloads[0].DocumentId = 8 * payloads.Length;

            Data.Field rankField = dbProvider.GetField("Rank");
            int        tab       = rankField.TabIndex;

            Stopwatch sw = new Stopwatch();

            //int docid = rand.Next(lastDocId);
            sw.Start();

            for (int j = 0; j < count / payloads.Length; j++)
            {
                dbProvider.FillPayloadRank(tab, count, payloads);
            }
            sw.Stop();

            OutputValue("Times", count);
            OutputValue("Elapse(ms)", sw.ElapsedMilliseconds);
            OutputValue("ElapseOneTime(ms)", (double)sw.ElapsedMilliseconds / count);
        }
Example #3
0
        public static DocId2LongComparer Generate(Data.DBProvider dbProvider,
                                                  OrderBy[] orderBys, out Data.Field[] orderByFields)
        {
            bool[] ascs = new bool[orderBys.Length];
            orderByFields = new Hubble.Core.Data.Field[orderBys.Length];

            int scoreFieldIndex = -1;

            for (int i = 0; i < ascs.Length; i++)
            {
                if (orderBys[i].Order == null)
                {
                    ascs[i] = true;
                }
                else
                {
                    ascs[i] = !orderBys[i].Order.Equals("desc", StringComparison.CurrentCultureIgnoreCase);
                }

                if (orderBys[i].Name.Equals("docid", StringComparison.CurrentCultureIgnoreCase))
                {
                    orderByFields[i] = new Hubble.Core.Data.Field("docid", Hubble.Core.Data.DataType.Int);
                }
                else if (orderBys[i].Name.Equals("score", StringComparison.CurrentCultureIgnoreCase))
                {
                    scoreFieldIndex  = i;
                    orderByFields[i] = new Hubble.Core.Data.Field("score", Hubble.Core.Data.DataType.BigInt);
                }
                else
                {
                    orderByFields[i] = dbProvider.GetField(orderBys[i].Name);
                }
            }

            return(new DocId2LongComparer(ascs, scoreFieldIndex));
        }
Example #4
0
        /// <summary>
        /// Get IdFields List from trigger table
        /// </summary>
        /// <param name="dbProvider">dbProvider that used to get field info</param>
        /// <param name="dbAdapterName">DBAdapter name</param>
        /// <param name="table">table that read from trigger table</param>
        /// <param name="lastSerial">the last serial number of trigger table from which read</param>
        /// <returns></returns>
        internal static List <IdFields> GetIdFieldsList(Data.DBProvider dbProvider, string dbAdapterName,
                                                        System.Data.DataTable table, out long lastSerial)
        {
            lastSerial = -1;

            HashSet <string> fieldsSetWithTokenizedFields    = new HashSet <string>();
            HashSet <string> fieldsSetWithoutTokenizedFields = new HashSet <string>();
            List <string>    tempFields = new List <string>(128);
            List <IdFields>  result     = new List <IdFields>(table.Rows.Count);

            foreach (System.Data.DataRow row in table.Rows)
            {
                long id = long.Parse(row["id"].ToString());
                lastSerial = long.Parse(row["Serial"].ToString());
                string fields = row["Fields"].ToString();

                tempFields.Clear();
                bool hasTokenized = false;

                //check fields
                foreach (string field in fields.Split(new char[] { ',' }))
                {
                    string f = field.Trim().ToLower();

                    if (f == "")
                    {
                        continue;
                    }

                    Data.Field dbField = dbProvider.GetField(f);

                    if (dbField == null)
                    {
                        continue;
                    }

                    if (dbField.IndexType == Hubble.Core.Data.Field.Index.Tokenized)
                    {
                        hasTokenized = true;
                    }

                    tempFields.Add(f);
                }

                //Fill hash set
                if (hasTokenized)
                {
                    foreach (string field in tempFields)
                    {
                        if (!fieldsSetWithTokenizedFields.Contains(field))
                        {
                            fieldsSetWithTokenizedFields.Add(field);
                        }
                    }
                }
                else
                {
                    foreach (string field in tempFields)
                    {
                        if (!fieldsSetWithoutTokenizedFields.Contains(field))
                        {
                            fieldsSetWithoutTokenizedFields.Add(field);
                        }
                    }
                }

                result.Add(new IdFields(id, hasTokenized));
            }

            //Get new fields string
            string fieldsWithTokenized    = GetFieldsStringFromHashSet(fieldsSetWithTokenizedFields, dbAdapterName);
            string fieldsWithoutTokenized = GetFieldsStringFromHashSet(fieldsSetWithoutTokenizedFields, dbAdapterName);

            foreach (IdFields idFields in result)
            {
                if (idFields.HasTokenizedFields)
                {
                    idFields.Fields = fieldsWithTokenized;
                }
                else
                {
                    idFields.Fields = fieldsWithoutTokenized;
                }
            }

            //Merge same Id
            result.Sort();

            if (result.Count > 0)
            {
                IdFields last = result[0];

                for (int i = 1; i < result.Count; i++)
                {
                    if (result[i].Equals(last))
                    {
                        result[i] = null;
                        continue;
                    }
                    else
                    {
                        last = result[i];
                    }
                }
            }

            return(result);
        }
Example #5
0
        unsafe internal void Sort(Query.DocumentResultForSort[] docResults, int top)
        {
            if (_OrderBys.Count <= 0)
            {
                return;
            }

            QueryResultHeapSort heapSort = new QueryResultHeapSort(_OrderBys, _DBProvider);

            if (heapSort.CanDo)
            {
                heapSort.Prepare(docResults);
                heapSort.TopSort(docResults, top);
                return;
            }

            foreach (SyntaxAnalysis.Select.OrderBy orderBy in _OrderBys)
            {
                Data.Field field = _DBProvider.GetField(orderBy.Name);

                Query.SortType sortType = Hubble.Core.Query.SortType.None;
                bool           isDocId  = false;
                bool           isScore  = false;
                bool           isAsc    = orderBy.Order.Equals("ASC", StringComparison.CurrentCultureIgnoreCase);

                if (field == null)
                {
                    if (orderBy.Name.Equals("DocId", StringComparison.CurrentCultureIgnoreCase))
                    {
                        sortType = Hubble.Core.Query.SortType.Long;
                        isDocId  = true;
                    }
                    else if (orderBy.Name.Equals("Score", StringComparison.CurrentCultureIgnoreCase))
                    {
                        sortType = Hubble.Core.Query.SortType.Long;
                        isScore  = true;
                    }
                    else
                    {
                        throw new ParseException(string.Format("Unknown field name:{0}", orderBy.Name));
                    }
                }
                else
                {
                    if (field.IndexType != Hubble.Core.Data.Field.Index.Untokenized)
                    {
                        throw new ParseException(string.Format("Order by field name:{0} is not Untokenized Index!", orderBy.Name));
                    }
                }

                for (int i = 0; i < docResults.Length; i++)
                {
                    if (docResults[i].SortInfoList == null)
                    {
                        docResults[i].SortInfoList = new List <Hubble.Core.Query.SortInfo>(2);
                    }

                    if (isDocId)
                    {
                        docResults[i].SortInfoList.Add(new Hubble.Core.Query.SortInfo(isAsc, sortType, docResults[i].DocId));
                    }
                    else if (isScore)
                    {
                        docResults[i].SortInfoList.Add(new Hubble.Core.Query.SortInfo(isAsc, sortType, docResults[i].Score));
                    }
                    else
                    {
                        if (docResults[i].PayloadData == null)
                        {
                            int *payloadData = _DBProvider.GetPayloadData(docResults[i].DocId);

                            if (payloadData == null)
                            {
                                throw new ParseException(string.Format("DocId={0} has not payload!", docResults[i].DocId));
                            }

                            docResults[i].PayloadData = payloadData;
                        }

                        docResults[i].SortInfoList.Add(Data.DataTypeConvert.GetSortInfo(isAsc,
                                                                                        field.DataType, docResults[i].PayloadData, field.TabIndex, field.SubTabIndex, field.DataLength));
                    }
                }
            }

            Array.Sort(docResults);

            //Has a bug of partial sort, make comments on following codes until it fixed.
            //if (top <= 0 || top >= docResults.Length/2)
            //{
            //    Array.Sort(docResults);
            //}
            //else
            //{
            //    QuickSort<Query.DocumentResult>.TopSort(docResults, top, new Query.DocumentResultComparer());
            //}
        }
Example #6
0
        public QueryResultHeapSort(List <SyntaxAnalysis.Select.OrderBy> orderBys, Data.DBProvider dbProvider)
        {
            _DBProvider = dbProvider;
            _OrderBys   = orderBys;
            _CanDo      = true;

            if (orderBys == null)
            {
                _CanDo = false;
            }
            else if (orderBys.Count <= 0 || orderBys.Count > 2)
            {
                _CanDo = false;
            }
            else
            {
                _SortFieldsCount = orderBys.Count;

                for (int i = 0; i < orderBys.Count; i++)
                {
                    bool asc = orderBys[i].Order.Equals("ASC", StringComparison.CurrentCultureIgnoreCase);

                    if (orderBys[i].Name.Equals("DocId", StringComparison.CurrentCultureIgnoreCase))
                    {
                        if (i == 0)
                        {
                            _Asc1 = asc;
                        }
                        else
                        {
                            _Asc2 = asc;
                        }
                    }
                    else if (orderBys[i].Name.Equals("Score", StringComparison.CurrentCultureIgnoreCase))
                    {
                        if (i == 0)
                        {
                            _Asc1 = asc;
                        }
                        else
                        {
                            _Asc2 = asc;
                        }
                    }
                    else
                    {
                        if (i == 0)
                        {
                            _Asc1 = asc;
                        }
                        else
                        {
                            _Asc2 = asc;
                        }

                        Data.Field field = _DBProvider.GetField(orderBys[i].Name);

                        if (field != null)
                        {
                            if (field.IndexType != Hubble.Core.Data.Field.Index.Untokenized)
                            {
                                throw new ParseException(string.Format("Order by field name:{0} is not Untokenized Index!", orderBys[i].Name));
                            }


                            switch (field.DataType)
                            {
                            case Hubble.Core.Data.DataType.Date:
                            case Hubble.Core.Data.DataType.SmallDateTime:
                            case Hubble.Core.Data.DataType.Int:
                            case Hubble.Core.Data.DataType.SmallInt:
                            case Hubble.Core.Data.DataType.TinyInt:
                            case Hubble.Core.Data.DataType.BigInt:
                            case Hubble.Core.Data.DataType.DateTime:
                            case Hubble.Core.Data.DataType.Float:
                                break;

                            default:
                                _CanDo = false;
                                break;
                            }
                        }
                    }

                    if (!_CanDo)
                    {
                        break;
                    }
                }
            }
        }
Example #7
0
        unsafe public void Prepare(DocumentResultForSort[] docResults)
        {
            for (int index = 0; index < _SortFieldsCount; index++)
            {
                bool asc;
                if (index == 0)
                {
                    asc = _Asc1;
                }
                else
                {
                    asc = _Asc2;
                }

                if (_OrderBys[index].Name.Equals("DocId", StringComparison.CurrentCultureIgnoreCase))
                {
                    for (int i = 0; i < docResults.Length; i++)
                    {
                        if (index == 0)
                        {
                            docResults[i].SortValue = docResults[i].DocId;
                        }
                        else
                        {
                            docResults[i].SortValue1 = docResults[i].DocId;
                        }
                    }
                }
                else if (_OrderBys[index].Name.Equals("Score", StringComparison.CurrentCultureIgnoreCase))
                {
                    for (int i = 0; i < docResults.Length; i++)
                    {
                        if (index == 0)
                        {
                            docResults[i].SortValue = docResults[i].Score;
                        }
                        else
                        {
                            docResults[i].SortValue1 = docResults[i].Score;
                        }
                    }
                }
                else
                {
                    Data.Field field = _DBProvider.GetField(_OrderBys[index].Name);

                    if (field != null)
                    {
                        if (field.IndexType != Hubble.Core.Data.Field.Index.Untokenized)
                        {
                            throw new ParseException(string.Format("Order by field name:{0} is not Untokenized Index!", _OrderBys[index].Name));
                        }


                        switch (field.DataType)
                        {
                        case Hubble.Core.Data.DataType.Date:
                        case Hubble.Core.Data.DataType.SmallDateTime:
                        case Hubble.Core.Data.DataType.Int:
                        case Hubble.Core.Data.DataType.SmallInt:
                        case Hubble.Core.Data.DataType.TinyInt:
                        {
                            _DBProvider.FillPayloadData(docResults);

                            for (int i = 0; i < docResults.Length; i++)
                            {
                                int *payLoadData = docResults[i].PayloadData;

                                Query.SortInfo sortInfo = Data.DataTypeConvert.GetSortInfo(asc, field.DataType,
                                                                                           payLoadData, field.TabIndex, field.SubTabIndex, field.DataLength);

                                if (index == 0)
                                {
                                    docResults[i].SortValue = sortInfo.IntValue;
                                }
                                else
                                {
                                    docResults[i].SortValue1 = sortInfo.IntValue;
                                }
                            }
                        }
                        break;

                        case Hubble.Core.Data.DataType.BigInt:
                        case Hubble.Core.Data.DataType.DateTime:
                        {
                            _DBProvider.FillPayloadData(docResults);

                            for (int i = 0; i < docResults.Length; i++)
                            {
                                int *payLoadData = docResults[i].PayloadData;

                                Query.SortInfo sortInfo = Data.DataTypeConvert.GetSortInfo(asc, field.DataType,
                                                                                           payLoadData, field.TabIndex, field.SubTabIndex, field.DataLength);

                                if (index == 0)
                                {
                                    docResults[i].SortValue = sortInfo.LongValue;
                                }
                                else
                                {
                                    docResults[i].SortValue1 = sortInfo.LongValue;
                                }
                            }
                        }
                        break;

                        case Hubble.Core.Data.DataType.Float:
                        {
                            _DBProvider.FillPayloadData(docResults);

                            for (int i = 0; i < docResults.Length; i++)
                            {
                                int *payLoadData = docResults[i].PayloadData;

                                Query.SortInfo sortInfo = Data.DataTypeConvert.GetSortInfo(asc, field.DataType,
                                                                                           payLoadData, field.TabIndex, field.SubTabIndex, field.DataLength);

                                if (index == 0)
                                {
                                    docResults[i].SortValue = (long)(sortInfo.DoubleValue * 1000);
                                }
                                else
                                {
                                    docResults[i].SortValue1 = (long)(sortInfo.DoubleValue * 1000);
                                }
                            }
                        }
                        break;
                        }
                    }
                }
            }
        }
Example #8
0
        public void Run()
        {
            if (Parameters.Count < 3)
            {
                throw new ArgumentException("Parameter 1 is table name, Parameter 2 is field name, Parameter 3 is a text for test, Parameter 4 is analyzer type(optional)");
            }

            Data.DBProvider dbProvider = Data.DBProvider.GetDBProvider(Parameters[0]);

            if (dbProvider == null)
            {
                throw new Data.DataException(string.Format("Can't find table name : {0}", Parameters[0]));
            }

            Data.Field field = dbProvider.GetField(Parameters[1]);

            if (field == null)
            {
                throw new Data.DataException(string.Format("Can't find field name : {0}", Parameters[2]));
            }

            if (field.AnalyzerName == null)
            {
                throw new Data.DataException(string.Format("Field: {0} in {1} is not tokenized", Parameters[2],
                                                           dbProvider.Table.Name));
            }

            Analysis.IAnalyzer analyzer = Data.DBProvider.GetAnalyzer(field.AnalyzerName);

            if (analyzer == null)
            {
                throw new Data.DataException(string.Format("Can't find analyzer name : {0}", field.AnalyzerName));
            }

            bool clientAnalyzer = false;

            if (Parameters.Count == 4)
            {
                if (Parameters[3].Equals("sqlclient", StringComparison.CurrentCultureIgnoreCase))
                {
                    clientAnalyzer = true;
                }
            }

            AddColumn("Word");
            AddColumn("Position");
            AddColumn("Rank");

            if (clientAnalyzer)
            {
                foreach (Entity.WordInfo word in analyzer.TokenizeForSqlClient(Parameters[2]))
                {
                    NewRow();
                    OutputValue("Word", word.Word);
                    OutputValue("Position", word.Position.ToString());
                    OutputValue("Rank", word.Rank.ToString());
                }
            }
            else
            {
                foreach (Entity.WordInfo word in analyzer.Tokenize(Parameters[2]))
                {
                    NewRow();
                    OutputValue("Word", word.Word);
                    OutputValue("Position", word.Position.ToString());
                    OutputValue("Rank", word.Rank.ToString());
                }
            }
        }