/// <summary> /// Opens the file and reloads the CompactLabelToOrdinal. The file it expects /// is generated from the <see cref="Flush(Stream)"/> command. /// </summary> internal static CompactLabelToOrdinal Open(FileInfo file, float loadFactor, int numHashArrays) { // Part of the file is the labelRepository, which needs to be rehashed // and label offsets re-added to the object. I am unsure as to why we // can't just store these off in the file as well, but in keeping with // the spirit of the original code, I did it this way. (ssuppe) CompactLabelToOrdinal l2o = new CompactLabelToOrdinal(); l2o.loadFactor = loadFactor; l2o.hashArrays = new HashArray[numHashArrays]; BinaryReader dis = null; try { dis = new BinaryReader(new FileStream(file.FullName, FileMode.Open, FileAccess.Read)); // TaxiReader needs to load the "counter" or occupancy (L2O) to know // the next unique facet. we used to load the delimiter too, but // never used it. l2o.m_counter = dis.ReadInt32(); l2o.capacity = DetermineCapacity((int)Math.Pow(2, l2o.hashArrays.Length), l2o.m_counter); l2o.Init(); // now read the chars l2o.labelRepository = CharBlockArray.Open(dis.BaseStream); l2o.collisionMap = new CollisionMap(l2o.labelRepository); // Calculate hash on the fly based on how CategoryPath hashes // itself. Maybe in the future we can call some static based methods // in CategoryPath so that this doesn't break again? I don't like // having code in two different places... int cid = 0; // Skip the initial offset, it's the CategoryPath(0,0), which isn't // a hashed value. int offset = 1; int lastStartOffset = offset; // This loop really relies on a well-formed input (assumes pretty blindly // that array offsets will work). Since the initial file is machine // generated, I think this should be OK. while (offset < l2o.labelRepository.Length) { // identical code to CategoryPath.hashFromSerialized. since we need to // advance offset, we cannot call the method directly. perhaps if we // could pass a mutable Integer or something... int length = (ushort)l2o.labelRepository[offset++]; int hash = length; if (length != 0) { for (int i = 0; i < length; i++) { int len = (ushort)l2o.labelRepository[offset++]; hash = hash * 31 + l2o.labelRepository.Subsequence(offset, len).GetHashCode(); // LUCENENET: Corrected 2nd Subsequence parameter offset += len; } } // Now that we've hashed the components of the label, do the // final part of the hash algorithm. hash = hash ^ (((int)((uint)hash >> 20)) ^ ((int)((uint)hash >> 12))); hash = hash ^ ((int)((uint)hash >> 7)) ^ ((int)((uint)hash >> 4)); // Add the label, and let's keep going l2o.AddLabelOffset(hash, cid, lastStartOffset); cid++; lastStartOffset = offset; } } catch (SerializationException se) { throw new IOException("Invalid file format. Cannot deserialize.", se); } finally { if (dis != null) { dis.Dispose(); } } l2o.threshold = (int)(l2o.loadFactor * l2o.capacity); return(l2o); }
public virtual void TestArray() { CharBlockArray array = new CharBlockArray(); StringBuilder builder = new StringBuilder(); const int n = 100 * 1000; byte[] buffer = new byte[50]; // This is essentially the equivalent of // CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() // .onUnmappableCharacter(CodingErrorAction.REPLACE) // .onMalformedInput(CodingErrorAction.REPLACE); // // Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage, // new EncoderReplacementFallback("?"), // new DecoderReplacementFallback("?")); for (int i = 0; i < n; i++) { Random().NextBytes(buffer); int size = 1 + Random().Next(50); // This test is turning random bytes into a string, // this is asking for trouble. Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage, new EncoderReplacementFallback("?"), new DecoderReplacementFallback("?")); string s = decoder.GetString(buffer, 0, size); array.Append(s); builder.Append(s); } for (int i = 0; i < n; i++) { Random().NextBytes(buffer); int size = 1 + Random().Next(50); // This test is turning random bytes into a string, // this is asking for trouble. Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage, new EncoderReplacementFallback("?"), new DecoderReplacementFallback("?")); string s = decoder.GetString(buffer, 0, size); array.Append(s); builder.Append(s); } for (int i = 0; i < n; i++) { Random().NextBytes(buffer); int size = 1 + Random().Next(50); // This test is turning random bytes into a string, // this is asking for trouble. Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage, new EncoderReplacementFallback("?"), new DecoderReplacementFallback("?")); string s = decoder.GetString(buffer, 0, size); for (int j = 0; j < s.Length; j++) { array.Append(s[j]); } builder.Append(s); } AssertEqualsInternal("GrowingCharArray<->StringBuilder mismatch.", builder, array); DirectoryInfo tempDir = CreateTempDir("growingchararray"); FileInfo f = new FileInfo(Path.Combine(tempDir.FullName, "GrowingCharArrayTest.tmp")); using (var @out = new FileStream(f.FullName, FileMode.OpenOrCreate, FileAccess.Write)) { array.Flush(@out); @out.Flush(); } using (var @in = new FileStream(f.FullName, FileMode.Open, FileAccess.Read)) { array = CharBlockArray.Open(@in); AssertEqualsInternal("GrowingCharArray<->StringBuilder mismatch after flush/load.", builder, array); } f.Delete(); }
public virtual void TestArray() { CharBlockArray array = new CharBlockArray(); StringBuilder builder = new StringBuilder(); const int n = 100 * 1000; byte[] buffer = new byte[50]; for (int i = 0; i < n; i++) { Random().NextBytes(buffer); int size = 1 + Random().Next(50); // This test is turning random bytes into a string, // this is asking for trouble. string s = Encoding.UTF8.GetString(buffer, 0, size); array.Append(s); builder.Append(s); } for (int i = 0; i < n; i++) { Random().NextBytes(buffer); int size = 1 + Random().Next(50); // This test is turning random bytes into a string, // this is asking for trouble. string s = Encoding.UTF8.GetString(buffer, 0, size); array.Append(s); builder.Append(s); } for (int i = 0; i < n; i++) { Random().NextBytes(buffer); int size = 1 + Random().Next(50); // This test is turning random bytes into a string, // this is asking for trouble. string s = Encoding.UTF8.GetString(buffer, 0, size); for (int j = 0; j < s.Length; j++) { array.Append(s[j]); } builder.Append(s); } AssertEqualsInternal("GrowingCharArray<->StringBuilder mismatch.", builder, array); DirectoryInfo tempDir = CreateTempDir("growingchararray"); FileInfo f = new FileInfo(Path.Combine(tempDir.FullName, "GrowingCharArrayTest.tmp")); using (var @out = new FileStream(f.FullName, FileMode.OpenOrCreate, FileAccess.Write)) { array.Flush(@out); @out.Flush(); } using (var @in = new FileStream(f.FullName, FileMode.Open, FileAccess.Read)) { array = CharBlockArray.Open(@in); AssertEqualsInternal("GrowingCharArray<->StringBuilder mismatch after flush/load.", builder, array); } f.Delete(); }