/// <summary> /// Opens the file and reloads the CompactLabelToOrdinal. The file it expects /// is generated from the <seealso cref="#flush(File)"/> command. /// </summary> public static CompactLabelToOrdinal Open(string file, float loadFactor, int numHashArrays) { /// <summary> /// Part of the file is the labelRepository, which needs to be rehashed /// and label offsets re-added to the object. I am unsure as to why we /// can't just store these off in the file as well, but in keeping with /// the spirit of the original code, I did it this way. (ssuppe) /// </summary> CompactLabelToOrdinal l2o = new CompactLabelToOrdinal(); l2o.loadFactor = loadFactor; l2o.hashArrays = new HashArray[numHashArrays]; BinaryReader dis = null; try { dis = new BinaryReader(new FileStream(file,FileMode.Open,FileAccess.Read)); // TaxiReader needs to load the "counter" or occupancy (L2O) to know // the next unique facet. we used to load the delimiter too, but // never used it. l2o.counter = dis.ReadInt32(); l2o.capacity = DetermineCapacity((int)Math.Pow(2, l2o.hashArrays.Length), l2o.counter); l2o.Init(); // now read the chars l2o.labelRepository = CharBlockArray.Open(dis); l2o.collisionMap = new CollisionMap(l2o.labelRepository); // Calculate hash on the fly based on how CategoryPath hashes // itself. Maybe in the future we can call some static based methods // in CategoryPath so that this doesn't break again? I don't like // having code in two different places... int cid = 0; // Skip the initial offset, it's the CategoryPath(0,0), which isn't // a hashed value. int offset = 1; int lastStartOffset = offset; // This loop really relies on a well-formed input (assumes pretty blindly // that array offsets will work). Since the initial file is machine // generated, I think this should be OK. while (offset < l2o.labelRepository.Length) { // identical code to CategoryPath.hashFromSerialized. since we need to // advance offset, we cannot call the method directly. perhaps if we // could pass a mutable Integer or something... int length = (short)l2o.labelRepository.CharAt(offset++); int hash = length; if (length != 0) { for (int i = 0; i < length; i++) { int len = (short)l2o.labelRepository.CharAt(offset++); hash = hash * 31 + l2o.labelRepository.SubSequence(offset, offset + len).GetHashCode(); offset += len; } } // Now that we've hashed the components of the label, do the // final part of the hash algorithm. hash = hash ^ (((int)((uint)hash >> 20)) ^ ((int)((uint)hash >> 12))); hash = hash ^ ((int)((uint)hash >> 7)) ^ ((int)((uint)hash >> 4)); // Add the label, and let's keep going l2o.AddLabelOffset(hash, cid, lastStartOffset); cid++; lastStartOffset = offset; } } catch (DllNotFoundException) { throw new IOException("Invalid file format. Cannot deserialize."); } finally { if (dis != null) { dis.Dispose(); } } l2o.threshold = (int)(l2o.loadFactor * l2o.capacity); return l2o; }
/// <summary> /// Opens the file and reloads the CompactLabelToOrdinal. The file it expects /// is generated from the <see cref="Flush(Stream)"/> command. /// </summary> internal static CompactLabelToOrdinal Open(FileInfo file, float loadFactor, int numHashArrays) { // Part of the file is the labelRepository, which needs to be rehashed // and label offsets re-added to the object. I am unsure as to why we // can't just store these off in the file as well, but in keeping with // the spirit of the original code, I did it this way. (ssuppe) CompactLabelToOrdinal l2o = new CompactLabelToOrdinal { loadFactor = loadFactor, hashArrays = new HashArray[numHashArrays] }; BinaryReader dis = null; try { dis = new BinaryReader(new FileStream(file.FullName, FileMode.Open, FileAccess.Read)); // TaxiReader needs to load the "counter" or occupancy (L2O) to know // the next unique facet. we used to load the delimiter too, but // never used it. l2o.m_counter = dis.ReadInt32(); l2o.capacity = DetermineCapacity((int)Math.Pow(2, l2o.hashArrays.Length), l2o.m_counter); l2o.Init(); // now read the chars l2o.labelRepository = CharBlockArray.Open(dis.BaseStream); l2o.collisionMap = new CollisionMap(l2o.labelRepository); // Calculate hash on the fly based on how CategoryPath hashes // itself. Maybe in the future we can call some static based methods // in CategoryPath so that this doesn't break again? I don't like // having code in two different places... int cid = 0; // Skip the initial offset, it's the CategoryPath(0,0), which isn't // a hashed value. int offset = 1; int lastStartOffset = offset; // This loop really relies on a well-formed input (assumes pretty blindly // that array offsets will work). Since the initial file is machine // generated, I think this should be OK. while (offset < l2o.labelRepository.Length) { // identical code to CategoryPath.hashFromSerialized. since we need to // advance offset, we cannot call the method directly. perhaps if we // could pass a mutable Integer or something... int length = (ushort)l2o.labelRepository[offset++]; int hash = length; if (length != 0) { for (int i = 0; i < length; i++) { int len = (ushort)l2o.labelRepository[offset++]; hash = hash * 31 + l2o.labelRepository.Subsequence(offset, len).GetHashCode(); // LUCENENET: Corrected 2nd Subsequence parameter offset += len; } } // Now that we've hashed the components of the label, do the // final part of the hash algorithm. hash = hash ^ (((int)((uint)hash >> 20)) ^ ((int)((uint)hash >> 12))); hash = hash ^ ((int)((uint)hash >> 7)) ^ ((int)((uint)hash >> 4)); // Add the label, and let's keep going l2o.AddLabelOffset(hash, cid, lastStartOffset); cid++; lastStartOffset = offset; } } catch (SerializationException se) { throw new IOException("Invalid file format. Cannot deserialize.", se); } finally { if (dis != null) { dis.Dispose(); } } l2o.threshold = (int)(l2o.loadFactor * l2o.capacity); return(l2o); }