/// <summary> /// 二分法模糊搜索 /// </summary> /// <param name="dictIndexList"></param> /// <param name="word"></param> /// <returns></returns> private List <int> BinaryQuery(List <DictIndex> dictIndexList, string word) { int low = 0; int high = dictIndexList.Count - 1; List <int> simVal = new List <int>(); while ((low <= high) && (low <= dictIndexList.Count - 1) && (high <= dictIndexList.Count - 1)) { int middle = (high + low) >> 1; DictIndex midVal = dictIndexList[middle]; if (midVal.word.ToLower().StartsWith(word.ToLower())) { simVal.Add(middle); } if (CompareTo(midVal.word, word) < 0) { low = middle + 1; } else if (CompareTo(midVal.word, word) > 0) { high = middle - 1; } else { return(simVal); } } return(simVal); }
/// <summary> /// 二分查找算法 /// </summary> /// <returns></returns> private int BinarySearch(List <DictIndex> dictIndexList, string word) { int low = 0; int high = dictIndexList.Count - 1; while ((low <= high) && (low <= dictIndexList.Count - 1) && (high <= dictIndexList.Count - 1)) { int middle = (high + low) >> 1; DictIndex midVal = dictIndexList[middle]; if (CompareTo(midVal.word, word) < 0) { low = middle + 1; } else if (CompareTo(midVal.word, word) > 0) { high = middle - 1; } else { return(middle); } } return(-1); }
private List <DictIndex> GetIndex() { //List<DictIndex> dictIndexList = new List<DictIndex>(); // the maximun length of a word must less 256 // 256 bytes(word) + 1 byte('\0') + 4 bytes(offset) + 4 bytes(def size) byte[] bytes = new byte[256 + 1 + 4 + 4]; int currentPos = 0; while (IdxStream.Read(bytes, 0, bytes.Length) > 0) { int j = 0; bool isWordPart = true; bool isOffsetPart = false; bool isSizePart = false; string word = null; long offset = 0; // offset of a word in data file long size = 0; // size of word's defition int wordLength = 0; // the byte(s) length of a word for (int i = 0; i < bytes.Length; i++) { if (isWordPart) { if (bytes[i] == 0) { wordLength = i; word = System.Text.Encoding.UTF8.GetString(bytes, j, i - j); j = i; isWordPart = false; isOffsetPart = true; } continue; } if (isOffsetPart) { i += 3;// skip the split token: '\0' j++; if (i >= bytes.Length) { i = bytes.Length - 1; } offset = ByteConverter.Unsigned4BytesToInt(bytes, j); j = i + 1; isOffsetPart = false; isSizePart = true; continue; } if (isSizePart) { i += 3; if (i >= bytes.Length) { i = bytes.Length - 1; } size = ByteConverter.Unsigned4BytesToInt(bytes, j); j = i + 1; isSizePart = false; isWordPart = true; } DictIndex dictIndex = new DictIndex(); dictIndex.word = word; dictIndex.offset = offset; dictIndex.size = size; dictIndexList.Add(dictIndex); // skip current index entry int indexSize = wordLength + 1 + 4 + 4; IdxStream.Seek(indexSize + currentPos, SeekOrigin.Begin); currentPos += indexSize; break; } } return(dictIndexList); }