internal void Index(IList <Document> docs, int fieldIndex) { int orginalFieldIndex = fieldIndex; Field field = _DBProvider.GetField(this.FieldName); foreach (Document doc in docs) { if (doc.FieldValues[fieldIndex].FieldName.Trim().ToLower() != FieldName.Trim().ToLower()) { //Field index does not match, find new field index again. //Happen as input different field order of rows fieldIndex = 0; foreach (FieldValue fValue in doc.FieldValues) { if (doc.FieldValues[fieldIndex].FieldName.Trim().ToLower() == FieldName.Trim().ToLower()) { break; } fieldIndex++; } if (fieldIndex >= doc.FieldValues.Count) { //no data of this field in this row fieldIndex = orginalFieldIndex; continue; } } this.Index(doc.FieldValues[fieldIndex].Value, doc.DocId, field.GetAnalyzer()); } }
unsafe private void TestFillPayloadRank(string tableName) { OutputMessage("TestGetDocIdReplaceFieldValue"); AddColumn("Times"); AddColumn("Elapse(ms)"); AddColumn("ElapseOneTime(ms)"); Data.DBProvider dbProvider = Data.DBProvider.GetDBProvider(Parameters[0], false); if (dbProvider == null) { throw new DataException(string.Format("Table name {0} does not exist!", Parameters[0])); } Random rand = new Random(); int count = 1000000; int lastDocId = dbProvider.LastDocId; OriginalDocumentPositionList[] payloads = new OriginalDocumentPositionList[count]; for (int i = 0; i < payloads.Length; i++) { payloads[i] = new OriginalDocumentPositionList(i * 10); payloads[i].CountAndWordCount = 1; } payloads[0].DocumentId = 8 * payloads.Length; Data.Field rankField = dbProvider.GetField("Rank"); int tab = rankField.TabIndex; Stopwatch sw = new Stopwatch(); //int docid = rand.Next(lastDocId); sw.Start(); for (int j = 0; j < count / payloads.Length; j++) { dbProvider.FillPayloadRank(tab, count, payloads); } sw.Stop(); OutputValue("Times", count); OutputValue("Elapse(ms)", sw.ElapsedMilliseconds); OutputValue("ElapseOneTime(ms)", (double)sw.ElapsedMilliseconds / count); }
public static DocId2LongComparer Generate(Data.DBProvider dbProvider, OrderBy[] orderBys, out Data.Field[] orderByFields) { bool[] ascs = new bool[orderBys.Length]; orderByFields = new Hubble.Core.Data.Field[orderBys.Length]; int scoreFieldIndex = -1; for (int i = 0; i < ascs.Length; i++) { if (orderBys[i].Order == null) { ascs[i] = true; } else { ascs[i] = !orderBys[i].Order.Equals("desc", StringComparison.CurrentCultureIgnoreCase); } if (orderBys[i].Name.Equals("docid", StringComparison.CurrentCultureIgnoreCase)) { orderByFields[i] = new Hubble.Core.Data.Field("docid", Hubble.Core.Data.DataType.Int); } else if (orderBys[i].Name.Equals("score", StringComparison.CurrentCultureIgnoreCase)) { scoreFieldIndex = i; orderByFields[i] = new Hubble.Core.Data.Field("score", Hubble.Core.Data.DataType.BigInt); } else { orderByFields[i] = dbProvider.GetField(orderBys[i].Name); } } return(new DocId2LongComparer(ascs, scoreFieldIndex)); }
/// <summary> /// Get IdFields List from trigger table /// </summary> /// <param name="dbProvider">dbProvider that used to get field info</param> /// <param name="dbAdapterName">DBAdapter name</param> /// <param name="table">table that read from trigger table</param> /// <param name="lastSerial">the last serial number of trigger table from which read</param> /// <returns></returns> internal static List <IdFields> GetIdFieldsList(Data.DBProvider dbProvider, string dbAdapterName, System.Data.DataTable table, out long lastSerial) { lastSerial = -1; HashSet <string> fieldsSetWithTokenizedFields = new HashSet <string>(); HashSet <string> fieldsSetWithoutTokenizedFields = new HashSet <string>(); List <string> tempFields = new List <string>(128); List <IdFields> result = new List <IdFields>(table.Rows.Count); foreach (System.Data.DataRow row in table.Rows) { long id = long.Parse(row["id"].ToString()); lastSerial = long.Parse(row["Serial"].ToString()); string fields = row["Fields"].ToString(); tempFields.Clear(); bool hasTokenized = false; //check fields foreach (string field in fields.Split(new char[] { ',' })) { string f = field.Trim().ToLower(); if (f == "") { continue; } Data.Field dbField = dbProvider.GetField(f); if (dbField == null) { continue; } if (dbField.IndexType == Hubble.Core.Data.Field.Index.Tokenized) { hasTokenized = true; } tempFields.Add(f); } //Fill hash set if (hasTokenized) { foreach (string field in tempFields) { if (!fieldsSetWithTokenizedFields.Contains(field)) { fieldsSetWithTokenizedFields.Add(field); } } } else { foreach (string field in tempFields) { if (!fieldsSetWithoutTokenizedFields.Contains(field)) { fieldsSetWithoutTokenizedFields.Add(field); } } } result.Add(new IdFields(id, hasTokenized)); } //Get new fields string string fieldsWithTokenized = GetFieldsStringFromHashSet(fieldsSetWithTokenizedFields, dbAdapterName); string fieldsWithoutTokenized = GetFieldsStringFromHashSet(fieldsSetWithoutTokenizedFields, dbAdapterName); foreach (IdFields idFields in result) { if (idFields.HasTokenizedFields) { idFields.Fields = fieldsWithTokenized; } else { idFields.Fields = fieldsWithoutTokenized; } } //Merge same Id result.Sort(); if (result.Count > 0) { IdFields last = result[0]; for (int i = 1; i < result.Count; i++) { if (result[i].Equals(last)) { result[i] = null; continue; } else { last = result[i]; } } } return(result); }
unsafe internal void Sort(Query.DocumentResultForSort[] docResults, int top) { if (_OrderBys.Count <= 0) { return; } QueryResultHeapSort heapSort = new QueryResultHeapSort(_OrderBys, _DBProvider); if (heapSort.CanDo) { heapSort.Prepare(docResults); heapSort.TopSort(docResults, top); return; } foreach (SyntaxAnalysis.Select.OrderBy orderBy in _OrderBys) { Data.Field field = _DBProvider.GetField(orderBy.Name); Query.SortType sortType = Hubble.Core.Query.SortType.None; bool isDocId = false; bool isScore = false; bool isAsc = orderBy.Order.Equals("ASC", StringComparison.CurrentCultureIgnoreCase); if (field == null) { if (orderBy.Name.Equals("DocId", StringComparison.CurrentCultureIgnoreCase)) { sortType = Hubble.Core.Query.SortType.Long; isDocId = true; } else if (orderBy.Name.Equals("Score", StringComparison.CurrentCultureIgnoreCase)) { sortType = Hubble.Core.Query.SortType.Long; isScore = true; } else { throw new ParseException(string.Format("Unknown field name:{0}", orderBy.Name)); } } else { if (field.IndexType != Hubble.Core.Data.Field.Index.Untokenized) { throw new ParseException(string.Format("Order by field name:{0} is not Untokenized Index!", orderBy.Name)); } } for (int i = 0; i < docResults.Length; i++) { if (docResults[i].SortInfoList == null) { docResults[i].SortInfoList = new List <Hubble.Core.Query.SortInfo>(2); } if (isDocId) { docResults[i].SortInfoList.Add(new Hubble.Core.Query.SortInfo(isAsc, sortType, docResults[i].DocId)); } else if (isScore) { docResults[i].SortInfoList.Add(new Hubble.Core.Query.SortInfo(isAsc, sortType, docResults[i].Score)); } else { if (docResults[i].PayloadData == null) { int *payloadData = _DBProvider.GetPayloadData(docResults[i].DocId); if (payloadData == null) { throw new ParseException(string.Format("DocId={0} has not payload!", docResults[i].DocId)); } docResults[i].PayloadData = payloadData; } docResults[i].SortInfoList.Add(Data.DataTypeConvert.GetSortInfo(isAsc, field.DataType, docResults[i].PayloadData, field.TabIndex, field.SubTabIndex, field.DataLength)); } } } Array.Sort(docResults); //Has a bug of partial sort, make comments on following codes until it fixed. //if (top <= 0 || top >= docResults.Length/2) //{ // Array.Sort(docResults); //} //else //{ // QuickSort<Query.DocumentResult>.TopSort(docResults, top, new Query.DocumentResultComparer()); //} }
public QueryResultHeapSort(List <SyntaxAnalysis.Select.OrderBy> orderBys, Data.DBProvider dbProvider) { _DBProvider = dbProvider; _OrderBys = orderBys; _CanDo = true; if (orderBys == null) { _CanDo = false; } else if (orderBys.Count <= 0 || orderBys.Count > 2) { _CanDo = false; } else { _SortFieldsCount = orderBys.Count; for (int i = 0; i < orderBys.Count; i++) { bool asc = orderBys[i].Order.Equals("ASC", StringComparison.CurrentCultureIgnoreCase); if (orderBys[i].Name.Equals("DocId", StringComparison.CurrentCultureIgnoreCase)) { if (i == 0) { _Asc1 = asc; } else { _Asc2 = asc; } } else if (orderBys[i].Name.Equals("Score", StringComparison.CurrentCultureIgnoreCase)) { if (i == 0) { _Asc1 = asc; } else { _Asc2 = asc; } } else { if (i == 0) { _Asc1 = asc; } else { _Asc2 = asc; } Data.Field field = _DBProvider.GetField(orderBys[i].Name); if (field != null) { if (field.IndexType != Hubble.Core.Data.Field.Index.Untokenized) { throw new ParseException(string.Format("Order by field name:{0} is not Untokenized Index!", orderBys[i].Name)); } switch (field.DataType) { case Hubble.Core.Data.DataType.Date: case Hubble.Core.Data.DataType.SmallDateTime: case Hubble.Core.Data.DataType.Int: case Hubble.Core.Data.DataType.SmallInt: case Hubble.Core.Data.DataType.TinyInt: case Hubble.Core.Data.DataType.BigInt: case Hubble.Core.Data.DataType.DateTime: case Hubble.Core.Data.DataType.Float: break; default: _CanDo = false; break; } } } if (!_CanDo) { break; } } } }
unsafe public void Prepare(DocumentResultForSort[] docResults) { for (int index = 0; index < _SortFieldsCount; index++) { bool asc; if (index == 0) { asc = _Asc1; } else { asc = _Asc2; } if (_OrderBys[index].Name.Equals("DocId", StringComparison.CurrentCultureIgnoreCase)) { for (int i = 0; i < docResults.Length; i++) { if (index == 0) { docResults[i].SortValue = docResults[i].DocId; } else { docResults[i].SortValue1 = docResults[i].DocId; } } } else if (_OrderBys[index].Name.Equals("Score", StringComparison.CurrentCultureIgnoreCase)) { for (int i = 0; i < docResults.Length; i++) { if (index == 0) { docResults[i].SortValue = docResults[i].Score; } else { docResults[i].SortValue1 = docResults[i].Score; } } } else { Data.Field field = _DBProvider.GetField(_OrderBys[index].Name); if (field != null) { if (field.IndexType != Hubble.Core.Data.Field.Index.Untokenized) { throw new ParseException(string.Format("Order by field name:{0} is not Untokenized Index!", _OrderBys[index].Name)); } switch (field.DataType) { case Hubble.Core.Data.DataType.Date: case Hubble.Core.Data.DataType.SmallDateTime: case Hubble.Core.Data.DataType.Int: case Hubble.Core.Data.DataType.SmallInt: case Hubble.Core.Data.DataType.TinyInt: { _DBProvider.FillPayloadData(docResults); for (int i = 0; i < docResults.Length; i++) { int *payLoadData = docResults[i].PayloadData; Query.SortInfo sortInfo = Data.DataTypeConvert.GetSortInfo(asc, field.DataType, payLoadData, field.TabIndex, field.SubTabIndex, field.DataLength); if (index == 0) { docResults[i].SortValue = sortInfo.IntValue; } else { docResults[i].SortValue1 = sortInfo.IntValue; } } } break; case Hubble.Core.Data.DataType.BigInt: case Hubble.Core.Data.DataType.DateTime: { _DBProvider.FillPayloadData(docResults); for (int i = 0; i < docResults.Length; i++) { int *payLoadData = docResults[i].PayloadData; Query.SortInfo sortInfo = Data.DataTypeConvert.GetSortInfo(asc, field.DataType, payLoadData, field.TabIndex, field.SubTabIndex, field.DataLength); if (index == 0) { docResults[i].SortValue = sortInfo.LongValue; } else { docResults[i].SortValue1 = sortInfo.LongValue; } } } break; case Hubble.Core.Data.DataType.Float: { _DBProvider.FillPayloadData(docResults); for (int i = 0; i < docResults.Length; i++) { int *payLoadData = docResults[i].PayloadData; Query.SortInfo sortInfo = Data.DataTypeConvert.GetSortInfo(asc, field.DataType, payLoadData, field.TabIndex, field.SubTabIndex, field.DataLength); if (index == 0) { docResults[i].SortValue = (long)(sortInfo.DoubleValue * 1000); } else { docResults[i].SortValue1 = (long)(sortInfo.DoubleValue * 1000); } } } break; } } } } }
public void Run() { if (Parameters.Count < 3) { throw new ArgumentException("Parameter 1 is table name, Parameter 2 is field name, Parameter 3 is a text for test, Parameter 4 is analyzer type(optional)"); } Data.DBProvider dbProvider = Data.DBProvider.GetDBProvider(Parameters[0]); if (dbProvider == null) { throw new Data.DataException(string.Format("Can't find table name : {0}", Parameters[0])); } Data.Field field = dbProvider.GetField(Parameters[1]); if (field == null) { throw new Data.DataException(string.Format("Can't find field name : {0}", Parameters[2])); } if (field.AnalyzerName == null) { throw new Data.DataException(string.Format("Field: {0} in {1} is not tokenized", Parameters[2], dbProvider.Table.Name)); } Analysis.IAnalyzer analyzer = Data.DBProvider.GetAnalyzer(field.AnalyzerName); if (analyzer == null) { throw new Data.DataException(string.Format("Can't find analyzer name : {0}", field.AnalyzerName)); } bool clientAnalyzer = false; if (Parameters.Count == 4) { if (Parameters[3].Equals("sqlclient", StringComparison.CurrentCultureIgnoreCase)) { clientAnalyzer = true; } } AddColumn("Word"); AddColumn("Position"); AddColumn("Rank"); if (clientAnalyzer) { foreach (Entity.WordInfo word in analyzer.TokenizeForSqlClient(Parameters[2])) { NewRow(); OutputValue("Word", word.Word); OutputValue("Position", word.Position.ToString()); OutputValue("Rank", word.Rank.ToString()); } } else { foreach (Entity.WordInfo word in analyzer.Tokenize(Parameters[2])) { NewRow(); OutputValue("Word", word.Word); OutputValue("Position", word.Position.ToString()); OutputValue("Rank", word.Rank.ToString()); } } }