public TrieIndexHeader Deserialize(Stream stream)
        {
            var header     = new TrieIndexHeader();
            var properties = GetProperties(header);

            using (var reader = new StreamReader(stream, Encoding.UTF8))
            {
                while (reader.Peek() > -1)
                {
                    string[] keyValue = reader.ReadLine().Split(KeyValueSeperator);
                    string   key      = keyValue[0];

                    var property = properties.SingleOrDefault(f => f.Name == key);
                    if (property == null)
                    {
                        throw new Exception("Property not found");
                    }

                    object propertyValue = DeserializeValue(keyValue[1], property.PropertyType);
                    property.SetValue(header, propertyValue);
                }
            }

            return(header);
        }
Пример #2
0
        public void get_character_index_must_be_null()
        {
            var header         = new TrieIndexHeader();
            var characterIndex = TrieIndexHeaderCharacterReader.Instance.GetCharacterIndex(header, 'a');

            Assert.AreEqual(null, characterIndex);
        }
Пример #3
0
        internal long[] GetChildrenPositionsFromNode(BinaryReader binaryReader, TrieIndexHeader Header, long parentPosition)
        {
            int childrenOffset = ReadChildrenOffset(parentPosition);

            if (childrenOffset == 0) // -1 equals to non-childed parent
            {
                return(null);
            }

            bool[] childrenFlags = ReadChildrenFlags(parentPosition);
            int    childrenCount = GetFlaggedCount(childrenFlags, true);

            long[] childrenPositions = new long[childrenCount];

            for (int i = 0; i < childrenCount; i++)
            {
                long targetPosition = parentPosition +
                                      childrenOffset +
                                      (i * Header.LENGTH_OF_STRUCT);

                childrenPositions[i] = targetPosition;
            }

            return(childrenPositions);
        }
Пример #4
0
        public void character_list_must_be_initialized_when_new_instance_was_created()
        {
            var header = new TrieIndexHeader();

            Assert.IsNotNull(header);
            Assert.IsNotNull(header.CharacterList);
            Assert.AreEqual(0, header.CharacterList.Count);
        }
Пример #5
0
        private TrieIndexHeaderBuilder SortCharacterList(ref TrieIndexHeader header)
        {
            if (header == null || _characterList == null)
            {
                throw new ArgumentNullException("header");
            }

            _characterList.Sort(new TrieCharacterComparer());

            return(this);
        }
Пример #6
0
        public IndexBuilder(Stream headerStream, Stream indexStream, Stream tailStream)
        {
            _headerStream = headerStream;
            _indexStream  = indexStream;
            _tailStream   = tailStream;

            _header            = new TrieIndexHeader();
            _trie              = new Trie();
            _keywords          = new HashSet <string>();
            _keywordDictionary = new Dictionary <string, uint>();
        }
Пример #7
0
        /// <summary>
        /// Gets the index of the character.
        /// Returns null when charcacter is not found.
        /// </summary>
        /// <returns>The character index.</returns>
        /// <param name="c">C.</param>
        internal ushort?GetCharacterIndex(TrieIndexHeader header, char c)
        {
            InitCharacterCache(header);

            if (!_characterIndexDictionary[header].ContainsKey(c))
            {
                return(null);
            }

            return(_characterIndexDictionary[header][c]);
        }
Пример #8
0
        internal TrieIndexHeader Build()
        {
            var header = new TrieIndexHeader();

            header.CharacterList = _characterList;

            SortCharacterList(ref header);
            CalculateMetrics(ref header);

            return(header);
        }
        public static int CreateIndexFile(this TrieBinaryReader instance, TrieIndexHeader header, TrieNode node, string path, int readBufferSizeInBytes)
        {
            Stream stream = new FileStream(
                path,
                FileMode.OpenOrCreate,
                FileAccess.Write,
                FileShare.None,
                readBufferSizeInBytes,
                FileOptions.RandomAccess
                );

            return(TrieIndexSerializer.Serialize(node, header, stream));
        }
        public static void CreateHeaderFile(this TrieIndexHeader header, string path)
        {
            var serializer = new TrieIndexHeaderSerializer();

            using (Stream stream = new FileStream(
                       path,
                       FileMode.OpenOrCreate,
                       FileAccess.Write,
                       FileShare.None
                       ))
            {
                serializer.Serialize(stream, header);
            }
        }
Пример #11
0
        internal ICollection <UInt16> GetFlagedCharCodes(BitArray bitArray, TrieIndexHeader Header, bool flag)
        {
            ICollection <UInt16> charCodeList = new List <UInt16>(Header.COUNT_OF_CHILDREN_FLAGS_IN_BYTES); // TODO: use different constant

            for (UInt16 i = 0; i < bitArray.Length; i++)
            {
                if (bitArray.Get(i) == flag)
                {
                    charCodeList.Add(i); // TODO: use mapping
                }
            }

            return(charCodeList);
        }
Пример #12
0
        public static void CreateHeaderFile(this TrieIndexHeader header, string path)
        {
            Stream stream = new FileStream(
                path,
                FileMode.OpenOrCreate,
                FileAccess.Write,
                FileShare.None
                );

            TrieSerializer.SerializeHeaderWithJsonSerializer(stream, header);

            //stream.Close();
            stream.Dispose();
            stream = null;
        }
Пример #13
0
        internal ICollection <char> GetFlagedChars(BitArray bitArray, TrieIndexHeader Header, bool flag)
        {
            ICollection <char>   charList  = new List <char>();
            ICollection <UInt16> charCodes = GetFlagedCharCodes(bitArray, Header, flag);

            if (charCodes != null)
            {
                foreach (UInt16 item in charCodes)
                {
                    char currentCharacter = TrieIndexHeaderCharacterReader.Instance.GetCharacterAtIndex(Header, item);
                    charList.Add(currentCharacter);
                }
            }

            return(charList);
        }
Пример #14
0
        private void ReorderTrieAndLoadHeader(TrieNode rootNode)
        {
            TrieIndexHeader  header       = new TrieIndexHeader();
            Queue <TrieNode> indexerQueue = new Queue <TrieNode>();

            indexerQueue.Enqueue(rootNode);

            int order   = 0;
            var builder = new TrieIndexHeaderBuilder();

            TrieNode currentNode = null;

            while (indexerQueue.Count > 0)
            {
                currentNode = indexerQueue.Dequeue();

                if (currentNode == null)
                {
                    throw new ArgumentNullException("Root node is null");
                }

                currentNode.Order = order;
                builder.AddChar(currentNode.Character);

                // set parent's children index when current node's child
                // index not equal to zero and current index is not the root
                if (currentNode.Parent != null && currentNode.ChildIndex == 0)
                {
                    currentNode.Parent.ChildrenCount = (currentNode.Order - currentNode.Parent.Order);
                }

                if (currentNode.Children != null)
                {
                    int childIndex = 0;

                    foreach (var childNode in currentNode.Children)
                    {
                        childNode.Value.ChildIndex = childIndex++;
                        indexerQueue.Enqueue(childNode.Value);
                    }
                }

                ++order;
            }

            _header = builder.Build();
        }
Пример #15
0
        public SearchResult Search(SearchOptions options)
        {
            if (options == null)
            {
                throw new ArgumentException("options");
            }

            if (_header == null)
            {
                _header = GetHeader();
            }

            var reader = CreateTrieBinaryReader();
            var node   = reader.SearchLastNode(0, options.Term);

            return(CreateResultFromNode(reader, node, options.Term, options));
        }
        public void Serialize(Stream stream, TrieIndexHeader header)
        {
            var properties = GetProperties(header);

            using (var writer = new StreamWriter(stream, Encoding.UTF8))
            {
                foreach (var property in properties)
                {
                    writer.Write(property.Name);
                    writer.Write(KeyValueSeperator);

                    var propertyValue = property.GetValue(header);

                    SerializePropertyValue(propertyValue, property.PropertyType, writer);

                    writer.Write(Environment.NewLine);
                }
            }
        }
Пример #17
0
        internal override TrieIndexHeader GetHeader()
        {
            // double checked initialization
            if (!_headerDictionary.ContainsKey(_headerFileName))
            {
                lock (_lockObject)
                {
                    if (!_headerDictionary.ContainsKey(_headerFileName))
                    {
                        var currentHeader = TrieNodeHelperFileSystemExtensions.ReadHeaderFile(_headerFileName);

                        _headerDictionary.Add(_headerFileName, currentHeader);
                    }
                }
            }

            TrieIndexHeader header = _headerDictionary[_headerFileName];

            return(header);
        }
Пример #18
0
        private void CalculateMetrics(ref TrieIndexHeader header)
        {
            // Set structural based properties
            header.COUNT_OF_CHARSET = _characterList.Count;

            header.COUNT_OF_CHILDREN_FLAGS                    = header.COUNT_OF_CHARSET / 8 + (header.COUNT_OF_CHARSET % 8 == 0 ? 0 : 1);
            header.COUNT_OF_CHILDREN_FLAGS_IN_BYTES           = header.COUNT_OF_CHARSET / 32 + (header.COUNT_OF_CHARSET % 32 == 0 ? 0 : 1);
            header.COUNT_OF_CHILDREN_FLAGS_BIT_ARRAY_IN_BYTES = header.COUNT_OF_CHILDREN_FLAGS_IN_BYTES * 4;

            header.LENGTH_OF_CHILDREN_FLAGS = header.COUNT_OF_CHARACTER_IN_BYTES + // 2
                                              header.COUNT_TERMINAL_SIZE_IN_BYTES; // 1;

            header.LENGTH_OF_CHILDREN_OFFSET = header.LENGTH_OF_CHILDREN_FLAGS +   // 2
                                               header.COUNT_OF_CHILDREN_FLAGS_BIT_ARRAY_IN_BYTES;

            header.LENGHT_OF_TEXT_FILE_START_POSITION_IN_BYTES = header.LENGTH_OF_CHILDREN_OFFSET +
                                                                 header.COUNT_OF_TEXT_FILE_START_POSITION_IN_BYTES;

            header.LENGTH_OF_STRUCT = header.LENGHT_OF_TEXT_FILE_START_POSITION_IN_BYTES +
                                      header.COUNT_OF_CHILDREN_OFFSET_IN_BYTES;
        }
Пример #19
0
        internal void InitCharacterCache(TrieIndexHeader header)
        {
            if (!_isCharacterIndexCacheInitialized.ContainsKey(header))
            {
                lock (this)
                {
                    if (!_isCharacterIndexCacheInitialized.ContainsKey(header))
                    {
                        _isCharacterIndexCacheInitialized.Add(header, true);
                        _characterIndexDictionary.Add(header, new Dictionary <char, UInt16>());

                        for (UInt16 i = 0; i < header.CharacterList.Count; i++)
                        {
                            if (header.CharacterList[i] != '\0')
                            {
                                _characterIndexDictionary[header].Add(header.CharacterList[i], i);
                            }
                        }
                    }
                }
            }
        }
Пример #20
0
        internal char GetCharacterAtIndex(TrieIndexHeader header, UInt16 index)
        {
            InitCharacterCache(header);

            return(header.CharacterList[index]);
        }
Пример #21
0
 public TrieBinaryReader(BinaryReader binaryReader, TrieIndexHeader header)
 {
     _binaryReader = binaryReader;
     _header       = header;
 }
 private IEnumerable <PropertyInfo> GetProperties(TrieIndexHeader header)
 {
     return(header.GetType().GetRuntimeProperties());
 }
Пример #23
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="rootNode"></param>
        /// <param name="trieIndexHeader"></param>
        /// <param name="index"></param>
        /// <remarks>Don't forget to dispose stream</remarks>
        /// <returns></returns>
        public static int Serialize(TrieNode rootNode, TrieIndexHeader trieIndexHeader, Stream index)
        {
            int processedNodeCount = 0;

            Queue <TrieNode> serializerQueue = new Queue <TrieNode>();

            serializerQueue.Enqueue(rootNode);

            TrieNode     currentNode  = null;
            BinaryWriter binaryWriter = new BinaryWriter(index);

            while (serializerQueue.Count > 0)
            {
                currentNode = serializerQueue.Dequeue();

                if (currentNode == null)
                {
                    throw new InvalidDataException(string.Format("Value cannot be null ", processedNodeCount));
                }

                long currentPositionOfStream = binaryWriter.BaseStream.Position;

                // write character
                //bw.Write(Encoding.Unicode.GetBytes(node.Character.ToString()));
                UInt16?characterIndex = TrieIndexHeaderCharacterReader.Instance.GetCharacterIndex(trieIndexHeader, currentNode.Character);
                if (characterIndex != null && characterIndex.HasValue)
                {
                    binaryWriter.Write(characterIndex.Value);
                }
                else
                {
                    binaryWriter.Write(Convert.ToUInt16(0)); // Its root
                }

                binaryWriter.Write(currentNode.IsTerminal);

                // write children flags
                // convert 512 bool value to 64 byte value for efficient storage
                BitArray baChildren = new BitArray(trieIndexHeader.COUNT_OF_CHARSET);
                if (currentNode.Children != null)
                {
                    foreach (var item in currentNode.Children)
                    {
                        UInt16?itemIndex = TrieIndexHeaderCharacterReader.Instance.GetCharacterIndex(trieIndexHeader, item.Key);
                        baChildren.Set(itemIndex.Value, true);
                    }
                }

                int[] childrenFlags = new int[trieIndexHeader.COUNT_OF_CHILDREN_FLAGS_IN_BYTES];
                BitArrayHelper.CopyToInt32Array(baChildren, childrenFlags, 0);

                for (int i = 0; i < childrenFlags.Length; i++)
                {
                    binaryWriter.Write(childrenFlags[i]);
                }

                // write children offset
                binaryWriter.Write(currentNode.ChildrenCount * trieIndexHeader.LENGTH_OF_STRUCT);

                // todo:position of text file
                if (currentNode.PositionOnTextFile.HasValue)
                {
                    binaryWriter.Write((uint)currentNode.PositionOnTextFile.Value);
                }
                else
                {
                    binaryWriter.Write((uint)0);
                }

                if (currentNode.Children != null)
                {
                    foreach (var childNode in currentNode.Children)
                    {
                        serializerQueue.Enqueue(childNode.Value);
                    }
                }

                ++processedNodeCount;
            }

            return(processedNodeCount);
        }