AddLabelOffset() private method

private AddLabelOffset ( int hash, int cid, int knownOffset ) : void
hash int
cid int
knownOffset int
return void
        /// <summary>
        /// Opens the file and reloads the CompactLabelToOrdinal. The file it expects
        /// is generated from the <seealso cref="#flush(File)"/> command.
        /// </summary>
        public static CompactLabelToOrdinal Open(string file, float loadFactor, int numHashArrays)
        {
            /// <summary>
            /// Part of the file is the labelRepository, which needs to be rehashed
            /// and label offsets re-added to the object. I am unsure as to why we
            /// can't just store these off in the file as well, but in keeping with
            /// the spirit of the original code, I did it this way. (ssuppe)
            /// </summary>
            CompactLabelToOrdinal l2o = new CompactLabelToOrdinal();
            l2o.loadFactor = loadFactor;
            l2o.hashArrays = new HashArray[numHashArrays];

            BinaryReader dis = null;
            try
            {
                dis = new BinaryReader(new FileStream(file,FileMode.Open,FileAccess.Read));

                // TaxiReader needs to load the "counter" or occupancy (L2O) to know
                // the next unique facet. we used to load the delimiter too, but
                // never used it.
                l2o.counter = dis.ReadInt32();

                l2o.capacity = DetermineCapacity((int)Math.Pow(2, l2o.hashArrays.Length), l2o.counter);
                l2o.Init();

                // now read the chars
                l2o.labelRepository = CharBlockArray.Open(dis);

                l2o.collisionMap = new CollisionMap(l2o.labelRepository);

                // Calculate hash on the fly based on how CategoryPath hashes
                // itself. Maybe in the future we can call some static based methods
                // in CategoryPath so that this doesn't break again? I don't like
                // having code in two different places...
                int cid = 0;
                // Skip the initial offset, it's the CategoryPath(0,0), which isn't
                // a hashed value.
                int offset = 1;
                int lastStartOffset = offset;
                // This loop really relies on a well-formed input (assumes pretty blindly
                // that array offsets will work).  Since the initial file is machine
                // generated, I think this should be OK.
                while (offset < l2o.labelRepository.Length)
                {
                    // identical code to CategoryPath.hashFromSerialized. since we need to
                    // advance offset, we cannot call the method directly. perhaps if we
                    // could pass a mutable Integer or something...
                    int length = (short)l2o.labelRepository.CharAt(offset++);
                    int hash = length;
                    if (length != 0)
                    {
                        for (int i = 0; i < length; i++)
                        {
                            int len = (short)l2o.labelRepository.CharAt(offset++);
                            hash = hash * 31 + l2o.labelRepository.SubSequence(offset, offset + len).GetHashCode();
                            offset += len;
                        }
                    }
                    // Now that we've hashed the components of the label, do the
                    // final part of the hash algorithm.
                    hash = hash ^ (((int)((uint)hash >> 20)) ^ ((int)((uint)hash >> 12)));
                    hash = hash ^ ((int)((uint)hash >> 7)) ^ ((int)((uint)hash >> 4));
                    // Add the label, and let's keep going
                    l2o.AddLabelOffset(hash, cid, lastStartOffset);
                    cid++;
                    lastStartOffset = offset;
                }

            }
            catch (DllNotFoundException)
            {
                throw new IOException("Invalid file format. Cannot deserialize.");
            }
            finally
            {
                if (dis != null)
                {
                    dis.Dispose();
                }
            }

            l2o.threshold = (int)(l2o.loadFactor * l2o.capacity);
            return l2o;
        }
Beispiel #2
0
        /// <summary>
        /// Opens the file and reloads the CompactLabelToOrdinal. The file it expects
        /// is generated from the <see cref="Flush(Stream)"/> command.
        /// </summary>
        internal static CompactLabelToOrdinal Open(FileInfo file, float loadFactor, int numHashArrays)
        {
            // Part of the file is the labelRepository, which needs to be rehashed
            // and label offsets re-added to the object. I am unsure as to why we
            // can't just store these off in the file as well, but in keeping with
            // the spirit of the original code, I did it this way. (ssuppe)
            CompactLabelToOrdinal l2o = new CompactLabelToOrdinal
            {
                loadFactor = loadFactor,
                hashArrays = new HashArray[numHashArrays]
            };

            BinaryReader dis = null;

            try
            {
                dis = new BinaryReader(new FileStream(file.FullName, FileMode.Open, FileAccess.Read));

                // TaxiReader needs to load the "counter" or occupancy (L2O) to know
                // the next unique facet. we used to load the delimiter too, but
                // never used it.
                l2o.m_counter = dis.ReadInt32();

                l2o.capacity = DetermineCapacity((int)Math.Pow(2, l2o.hashArrays.Length), l2o.m_counter);
                l2o.Init();

                // now read the chars
                l2o.labelRepository = CharBlockArray.Open(dis.BaseStream);

                l2o.collisionMap = new CollisionMap(l2o.labelRepository);

                // Calculate hash on the fly based on how CategoryPath hashes
                // itself. Maybe in the future we can call some static based methods
                // in CategoryPath so that this doesn't break again? I don't like
                // having code in two different places...
                int cid = 0;
                // Skip the initial offset, it's the CategoryPath(0,0), which isn't
                // a hashed value.
                int offset          = 1;
                int lastStartOffset = offset;
                // This loop really relies on a well-formed input (assumes pretty blindly
                // that array offsets will work).  Since the initial file is machine
                // generated, I think this should be OK.
                while (offset < l2o.labelRepository.Length)
                {
                    // identical code to CategoryPath.hashFromSerialized. since we need to
                    // advance offset, we cannot call the method directly. perhaps if we
                    // could pass a mutable Integer or something...
                    int length = (ushort)l2o.labelRepository[offset++];
                    int hash   = length;
                    if (length != 0)
                    {
                        for (int i = 0; i < length; i++)
                        {
                            int len = (ushort)l2o.labelRepository[offset++];
                            hash    = hash * 31 + l2o.labelRepository.Subsequence(offset, len).GetHashCode(); // LUCENENET: Corrected 2nd Subsequence parameter
                            offset += len;
                        }
                    }
                    // Now that we've hashed the components of the label, do the
                    // final part of the hash algorithm.
                    hash = hash ^ (((int)((uint)hash >> 20)) ^ ((int)((uint)hash >> 12)));
                    hash = hash ^ ((int)((uint)hash >> 7)) ^ ((int)((uint)hash >> 4));
                    // Add the label, and let's keep going
                    l2o.AddLabelOffset(hash, cid, lastStartOffset);
                    cid++;
                    lastStartOffset = offset;
                }
            }
            catch (SerializationException se)
            {
                throw new IOException("Invalid file format. Cannot deserialize.", se);
            }
            finally
            {
                if (dis != null)
                {
                    dis.Dispose();
                }
            }

            l2o.threshold = (int)(l2o.loadFactor * l2o.capacity);
            return(l2o);
        }