/// <summary> /// Check whether the <see cref="FacetLabel"/> is equal to the one serialized in /// <see cref="CharBlockArray"/>. /// </summary> public static bool EqualsToSerialized(FacetLabel cp, CharBlockArray charBlockArray, int offset) { int n = charBlockArray.CharAt(offset++); if (cp.Length != n) { return false; } if (cp.Length == 0) { return true; } for (int i = 0; i < cp.Length; i++) { int len = charBlockArray.CharAt(offset++); if (len != cp.Components[i].Length) { return false; } if (!cp.Components[i].Equals(charBlockArray.SubSequence(offset, offset + len), StringComparison.Ordinal)) { return false; } offset += len; } return true; }
/// <summary> /// Check whether the <see cref="FacetLabel"/> is equal to the one serialized in /// <see cref="CharBlockArray"/>. /// </summary> public static bool EqualsToSerialized(FacetLabel cp, CharBlockArray charBlockArray, int offset) { int n = charBlockArray[offset++]; if (cp.Length != n) { return(false); } if (cp.Length == 0) { return(true); } for (int i = 0; i < cp.Length; i++) { int len = charBlockArray[offset++]; if (len != cp.Components[i].Length) { return(false); } if (!cp.Components[i].Equals(charBlockArray.SubSequence(offset, offset + len), StringComparison.Ordinal)) { return(false); } offset += len; } return(true); }
internal static int StringHashCode(CharBlockArray labelRepository, int offset) { int hash = CategoryPathUtils.HashCodeOfSerialized(labelRepository, offset); hash = hash ^ (((int)((uint)hash >> 20)) ^ ((int)((uint)hash >> 12))); hash = hash ^ ((int)((uint)hash >> 7)) ^ ((int)((uint)hash >> 4)); return(hash); }
internal static int StringHashCode(CharBlockArray labelRepository, int offset) { int hash = CategoryPathUtils.HashCodeOfSerialized(labelRepository, offset); hash = hash ^ hash.TripleShift(20) ^ hash.TripleShift(12); hash = hash ^ hash.TripleShift(7) ^ hash.TripleShift(4); return(hash); }
private static void AssertEqualsInternal(string msg, StringBuilder expected, CharBlockArray actual) { Assert.AreEqual(expected.Length, actual.Length, msg); for (int i = 0; i < expected.Length; i++) { Assert.AreEqual(expected[i], actual[i], msg); } }
private CollisionMap(int initialCapacity, float loadFactor, CharBlockArray labelRepository) { this.labelRepository = labelRepository; this.loadFactor = loadFactor; this.capacity_Renamed = CompactLabelToOrdinal.DetermineCapacity(2, initialCapacity); this.entries = new Entry[this.capacity_Renamed]; this.threshold = (int)(this.capacity_Renamed * this.loadFactor); }
internal static int StringHashCode(CharBlockArray labelRepository, int offset) { int hash = CategoryPathUtils.HashCodeOfSerialized(labelRepository, offset); #pragma warning disable IDE0054 // Use compound assignment hash = hash ^ (((int)((uint)hash >> 20)) ^ ((int)((uint)hash >> 12))); hash = hash ^ ((int)((uint)hash >> 7)) ^ ((int)((uint)hash >> 4)); #pragma warning restore IDE0054 // Use compound assignment return(hash); }
private static void AssertEqualsInternal(string msg, StringBuilder expected, CharBlockArray actual) { // LUCENENET specific - Indexing a string is much faster than StringBuilder (#295) var expected2 = expected.ToString(); var expected2Len = expected2.Length; Assert.AreEqual(expected2Len, actual.Length, msg); for (int i = 0; i < expected2Len; i++) { Assert.AreEqual(expected2[i], actual[i], msg); } }
/// <summary> /// Serializes the given <see cref="FacetLabel"/> to the <see cref="CharBlockArray"/>. /// </summary> public static void Serialize(FacetLabel cp, CharBlockArray charBlockArray) { charBlockArray.Append((char)cp.Length); if (cp.Length == 0) { return; } for (int i = 0; i < cp.Length; i++) { charBlockArray.Append((char)cp.Components[i].Length); charBlockArray.Append(cp.Components[i]); } }
private void Init() { labelRepository = new CharBlockArray(); CategoryPathUtils.Serialize(new FacetLabel(), labelRepository); int c = this.capacity; for (int i = 0; i < this.hashArrays.Length; i++) { this.hashArrays[i] = new HashArray(c); c /= 2; } }
/// <summary> /// Calculates a hash function of a path that was serialized with /// <see cref="Serialize(FacetLabel, CharBlockArray)"/>. /// </summary> public static int HashCodeOfSerialized(CharBlockArray charBlockArray, int offset) { int length = charBlockArray.CharAt(offset++); if (length == 0) { return 0; } int hash = length; for (int i = 0; i < length; i++) { int len = charBlockArray.CharAt(offset++); hash = hash * 31 + charBlockArray.SubSequence(offset, offset + len).GetHashCode(); offset += len; } return hash; }
/// <summary> /// Calculates a hash function of a path that was serialized with /// <see cref="Serialize(FacetLabel, CharBlockArray)"/>. /// </summary> public static int HashCodeOfSerialized(CharBlockArray charBlockArray, int offset) { int length = charBlockArray[offset++]; if (length == 0) { return(0); } int hash = length; for (int i = 0; i < length; i++) { int len = charBlockArray[offset++]; hash = hash * 31 + charBlockArray.SubSequence(offset, offset + len).GetHashCode(); offset += len; } return(hash); }
/// <summary> /// Calculates a hash function of a path that was serialized with /// <see cref="Serialize(FacetLabel, CharBlockArray)"/>. /// </summary> public static int HashCodeOfSerialized(CharBlockArray charBlockArray, int offset) { int length = charBlockArray[offset++]; if (length == 0) { return(0); } int hash = length; for (int i = 0; i < length; i++) { int len = charBlockArray[offset++]; hash = hash * 31 + charBlockArray.Subsequence(offset, len).GetHashCode(); // LUCENENET: Corrected 2nd Subsequence parameter offset += len; } return(hash); }
public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) { var jsonObect = JObject.Load(reader); var properties = jsonObect.Properties().ToArray(); int blockSize = -1; string contents = null; Func <bool> arePropertiesSet = () => blockSize > 0 && !string.IsNullOrEmpty(contents); foreach (var property in properties) { if (property.Name.Equals(CharBlockArrayConverter.BLOCK_SIZE, StringComparison.OrdinalIgnoreCase)) { blockSize = property.Value.Value <int>(); } else if (property.Name.Equals(CharBlockArrayConverter.CONTENTS, StringComparison.OrdinalIgnoreCase)) { contents = property.Value.Value <string>(); } if (arePropertiesSet()) { break; } } if (!arePropertiesSet()) { return(null); } var deserialized = new CharBlockArray(blockSize); deserialized.Append(contents); return(deserialized); }
private static void AssertEqualsInternal(string msg, StringBuilder expected, CharBlockArray actual) { Assert.AreEqual(expected.Length, actual.Length, msg); for (int i = 0; i < expected.Length; i++) { Assert.AreEqual(expected[i], actual.CharAt(i), msg); } }
public virtual void TestArray() { CharBlockArray array = new CharBlockArray(); StringBuilder builder = new StringBuilder(); const int n = 100 * 1000; byte[] buffer = new byte[50]; // This is essentially the equivalent of // CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() // .onUnmappableCharacter(CodingErrorAction.REPLACE) // .onMalformedInput(CodingErrorAction.REPLACE); // // Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage, // new EncoderReplacementFallback("?"), // new DecoderReplacementFallback("?")); for (int i = 0; i < n; i++) { Random().NextBytes(buffer); int size = 1 + Random().Next(50); // This test is turning random bytes into a string, // this is asking for trouble. Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage, new EncoderReplacementFallback("?"), new DecoderReplacementFallback("?")); string s = decoder.GetString(buffer, 0, size); array.Append(s); builder.Append(s); } for (int i = 0; i < n; i++) { Random().NextBytes(buffer); int size = 1 + Random().Next(50); // This test is turning random bytes into a string, // this is asking for trouble. Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage, new EncoderReplacementFallback("?"), new DecoderReplacementFallback("?")); string s = decoder.GetString(buffer, 0, size); array.Append(s); builder.Append(s); } for (int i = 0; i < n; i++) { Random().NextBytes(buffer); int size = 1 + Random().Next(50); // This test is turning random bytes into a string, // this is asking for trouble. Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage, new EncoderReplacementFallback("?"), new DecoderReplacementFallback("?")); string s = decoder.GetString(buffer, 0, size); for (int j = 0; j < s.Length; j++) { array.Append(s[j]); } builder.Append(s); } AssertEqualsInternal("GrowingCharArray<->StringBuilder mismatch.", builder, array); DirectoryInfo tempDir = CreateTempDir("growingchararray"); FileInfo f = new FileInfo(Path.Combine(tempDir.FullName, "GrowingCharArrayTest.tmp")); using (var @out = new FileStream(f.FullName, FileMode.OpenOrCreate, FileAccess.Write)) { array.Flush(@out); @out.Flush(); } using (var @in = new FileStream(f.FullName, FileMode.Open, FileAccess.Read)) { array = CharBlockArray.Open(@in); AssertEqualsInternal("GrowingCharArray<->StringBuilder mismatch after flush/load.", builder, array); } f.Delete(); }
internal CollisionMap(int initialCapacity, CharBlockArray labelRepository) : this(initialCapacity, 0.75f, labelRepository) { }
public virtual void TestArray() { CharBlockArray array = new CharBlockArray(); StringBuilder builder = new StringBuilder(); const int n = 100 * 1000; byte[] buffer = new byte[50]; for (int i = 0; i < n; i++) { Random().NextBytes(buffer); int size = 1 + Random().Next(50); // This test is turning random bytes into a string, // this is asking for trouble. string s = Encoding.UTF8.GetString(buffer, 0, size); array.Append(s); builder.Append(s); } for (int i = 0; i < n; i++) { Random().NextBytes(buffer); int size = 1 + Random().Next(50); // This test is turning random bytes into a string, // this is asking for trouble. string s = Encoding.UTF8.GetString(buffer, 0, size); array.Append(s); builder.Append(s); } for (int i = 0; i < n; i++) { Random().NextBytes(buffer); int size = 1 + Random().Next(50); // This test is turning random bytes into a string, // this is asking for trouble. string s = Encoding.UTF8.GetString(buffer, 0, size); for (int j = 0; j < s.Length; j++) { array.Append(s[j]); } builder.Append(s); } AssertEqualsInternal("GrowingCharArray<->StringBuilder mismatch.", builder, array); DirectoryInfo tempDir = CreateTempDir("growingchararray"); FileInfo f = new FileInfo(Path.Combine(tempDir.FullName, "GrowingCharArrayTest.tmp")); using (var @out = new FileStream(f.FullName, FileMode.OpenOrCreate, FileAccess.Write)) { array.Flush(@out); @out.Flush(); } using (var @in = new FileStream(f.FullName, FileMode.Open, FileAccess.Read)) { array = CharBlockArray.Open(@in); AssertEqualsInternal("GrowingCharArray<->StringBuilder mismatch after flush/load.", builder, array); } f.Delete(); }
internal CollisionMap(CharBlockArray labelRepository) : this(16 * 1024, 0.75f, labelRepository) { }
internal static int StringHashCode(CharBlockArray labelRepository, int offset) { int hash = CategoryPathUtils.HashCodeOfSerialized(labelRepository, offset); hash = hash ^ (((int)((uint)hash >> 20)) ^ ((int)((uint)hash >> 12))); hash = hash ^ ((int)((uint)hash >> 7)) ^ ((int)((uint)hash >> 4)); return hash; }
/// <summary> /// Opens the file and reloads the CompactLabelToOrdinal. The file it expects /// is generated from the <see cref="Flush(Stream)"/> command. /// </summary> internal static CompactLabelToOrdinal Open(FileInfo file, float loadFactor, int numHashArrays) { // Part of the file is the labelRepository, which needs to be rehashed // and label offsets re-added to the object. I am unsure as to why we // can't just store these off in the file as well, but in keeping with // the spirit of the original code, I did it this way. (ssuppe) CompactLabelToOrdinal l2o = new CompactLabelToOrdinal(); l2o.loadFactor = loadFactor; l2o.hashArrays = new HashArray[numHashArrays]; BinaryReader dis = null; try { dis = new BinaryReader(new FileStream(file.FullName, FileMode.Open, FileAccess.Read)); // TaxiReader needs to load the "counter" or occupancy (L2O) to know // the next unique facet. we used to load the delimiter too, but // never used it. l2o.m_counter = dis.ReadInt32(); l2o.capacity = DetermineCapacity((int)Math.Pow(2, l2o.hashArrays.Length), l2o.m_counter); l2o.Init(); // now read the chars l2o.labelRepository = CharBlockArray.Open(dis.BaseStream); l2o.collisionMap = new CollisionMap(l2o.labelRepository); // Calculate hash on the fly based on how CategoryPath hashes // itself. Maybe in the future we can call some static based methods // in CategoryPath so that this doesn't break again? I don't like // having code in two different places... int cid = 0; // Skip the initial offset, it's the CategoryPath(0,0), which isn't // a hashed value. int offset = 1; int lastStartOffset = offset; // This loop really relies on a well-formed input (assumes pretty blindly // that array offsets will work). Since the initial file is machine // generated, I think this should be OK. while (offset < l2o.labelRepository.Length) { // identical code to CategoryPath.hashFromSerialized. since we need to // advance offset, we cannot call the method directly. perhaps if we // could pass a mutable Integer or something... int length = (ushort)l2o.labelRepository[offset++]; int hash = length; if (length != 0) { for (int i = 0; i < length; i++) { int len = (ushort)l2o.labelRepository[offset++]; hash = hash * 31 + l2o.labelRepository.Subsequence(offset, len).GetHashCode(); // LUCENENET: Corrected 2nd Subsequence parameter offset += len; } } // Now that we've hashed the components of the label, do the // final part of the hash algorithm. hash = hash ^ (((int)((uint)hash >> 20)) ^ ((int)((uint)hash >> 12))); hash = hash ^ ((int)((uint)hash >> 7)) ^ ((int)((uint)hash >> 4)); // Add the label, and let's keep going l2o.AddLabelOffset(hash, cid, lastStartOffset); cid++; lastStartOffset = offset; } } catch (SerializationException se) { throw new IOException("Invalid file format. Cannot deserialize.", se); } finally { if (dis != null) { dis.Dispose(); } } l2o.threshold = (int)(l2o.loadFactor * l2o.capacity); return(l2o); }