/// <summary>
        /// Check whether the <see cref="FacetLabel"/> is equal to the one serialized in
        /// <see cref="CharBlockArray"/>.
        /// </summary>
        public static bool EqualsToSerialized(FacetLabel cp, CharBlockArray charBlockArray, int offset)
        {
            int n = charBlockArray.CharAt(offset++);
            if (cp.Length != n)
            {
                return false;
            }
            if (cp.Length == 0)
            {
                return true;
            }

            for (int i = 0; i < cp.Length; i++)
            {
                int len = charBlockArray.CharAt(offset++);
                if (len != cp.Components[i].Length)
                {
                    return false;
                }

                if (!cp.Components[i].Equals(charBlockArray.SubSequence(offset, offset + len), StringComparison.Ordinal))
                {
                    return false;
                }
                offset += len;
            }
            return true;
        }
        /// <summary>
        /// Check whether the <see cref="FacetLabel"/> is equal to the one serialized in
        /// <see cref="CharBlockArray"/>.
        /// </summary>
        public static bool EqualsToSerialized(FacetLabel cp, CharBlockArray charBlockArray, int offset)
        {
            int n = charBlockArray[offset++];

            if (cp.Length != n)
            {
                return(false);
            }
            if (cp.Length == 0)
            {
                return(true);
            }

            for (int i = 0; i < cp.Length; i++)
            {
                int len = charBlockArray[offset++];
                if (len != cp.Components[i].Length)
                {
                    return(false);
                }

                if (!cp.Components[i].Equals(charBlockArray.SubSequence(offset, offset + len), StringComparison.Ordinal))
                {
                    return(false);
                }
                offset += len;
            }
            return(true);
        }
        internal static int StringHashCode(CharBlockArray labelRepository, int offset)
        {
            int hash = CategoryPathUtils.HashCodeOfSerialized(labelRepository, offset);

            hash = hash ^ (((int)((uint)hash >> 20)) ^ ((int)((uint)hash >> 12)));
            hash = hash ^ ((int)((uint)hash >> 7)) ^ ((int)((uint)hash >> 4));
            return(hash);
        }
Exemple #4
0
        internal static int StringHashCode(CharBlockArray labelRepository, int offset)
        {
            int hash = CategoryPathUtils.HashCodeOfSerialized(labelRepository, offset);

            hash = hash ^ hash.TripleShift(20) ^ hash.TripleShift(12);
            hash = hash ^ hash.TripleShift(7) ^ hash.TripleShift(4);
            return(hash);
        }
Exemple #5
0
 private static void AssertEqualsInternal(string msg, StringBuilder expected, CharBlockArray actual)
 {
     Assert.AreEqual(expected.Length, actual.Length, msg);
     for (int i = 0; i < expected.Length; i++)
     {
         Assert.AreEqual(expected[i], actual[i], msg);
     }
 }
Exemple #6
0
        private CollisionMap(int initialCapacity, float loadFactor, CharBlockArray labelRepository)
        {
            this.labelRepository  = labelRepository;
            this.loadFactor       = loadFactor;
            this.capacity_Renamed = CompactLabelToOrdinal.DetermineCapacity(2, initialCapacity);

            this.entries   = new Entry[this.capacity_Renamed];
            this.threshold = (int)(this.capacity_Renamed * this.loadFactor);
        }
Exemple #7
0
        internal static int StringHashCode(CharBlockArray labelRepository, int offset)
        {
            int hash = CategoryPathUtils.HashCodeOfSerialized(labelRepository, offset);

#pragma warning disable IDE0054 // Use compound assignment
            hash = hash ^ (((int)((uint)hash >> 20)) ^ ((int)((uint)hash >> 12)));
            hash = hash ^ ((int)((uint)hash >> 7)) ^ ((int)((uint)hash >> 4));
#pragma warning restore IDE0054 // Use compound assignment
            return(hash);
        }
Exemple #8
0
        private static void AssertEqualsInternal(string msg, StringBuilder expected, CharBlockArray actual)
        {
            // LUCENENET specific - Indexing a string is much faster than StringBuilder (#295)
            var expected2    = expected.ToString();
            var expected2Len = expected2.Length;

            Assert.AreEqual(expected2Len, actual.Length, msg);
            for (int i = 0; i < expected2Len; i++)
            {
                Assert.AreEqual(expected2[i], actual[i], msg);
            }
        }
 /// <summary>
 /// Serializes the given <see cref="FacetLabel"/> to the <see cref="CharBlockArray"/>.
 /// </summary>
 public static void Serialize(FacetLabel cp, CharBlockArray charBlockArray)
 {
     charBlockArray.Append((char)cp.Length);
     if (cp.Length == 0)
     {
         return;
     }
     for (int i = 0; i < cp.Length; i++)
     {
         charBlockArray.Append((char)cp.Components[i].Length);
         charBlockArray.Append(cp.Components[i]);
     }
 }
 /// <summary>
 /// Serializes the given <see cref="FacetLabel"/> to the <see cref="CharBlockArray"/>.
 /// </summary>
 public static void Serialize(FacetLabel cp, CharBlockArray charBlockArray)
 {
     charBlockArray.Append((char)cp.Length);
     if (cp.Length == 0)
     {
         return;
     }
     for (int i = 0; i < cp.Length; i++)
     {
         charBlockArray.Append((char)cp.Components[i].Length);
         charBlockArray.Append(cp.Components[i]);
     }
 }
        private void Init()
        {
            labelRepository = new CharBlockArray();
            CategoryPathUtils.Serialize(new FacetLabel(), labelRepository);

            int c = this.capacity;

            for (int i = 0; i < this.hashArrays.Length; i++)
            {
                this.hashArrays[i] = new HashArray(c);
                c /= 2;
            }
        }
        /// <summary>
        /// Calculates a hash function of a path that was serialized with
        /// <see cref="Serialize(FacetLabel, CharBlockArray)"/>.
        /// </summary>
        public static int HashCodeOfSerialized(CharBlockArray charBlockArray, int offset)
        {
            int length = charBlockArray.CharAt(offset++);
            if (length == 0)
            {
                return 0;
            }

            int hash = length;
            for (int i = 0; i < length; i++)
            {
                int len = charBlockArray.CharAt(offset++);
                hash = hash * 31 + charBlockArray.SubSequence(offset, offset + len).GetHashCode();
                offset += len;
            }
            return hash;
        }
        /// <summary>
        /// Calculates a hash function of a path that was serialized with
        /// <see cref="Serialize(FacetLabel, CharBlockArray)"/>.
        /// </summary>
        public static int HashCodeOfSerialized(CharBlockArray charBlockArray, int offset)
        {
            int length = charBlockArray[offset++];

            if (length == 0)
            {
                return(0);
            }

            int hash = length;

            for (int i = 0; i < length; i++)
            {
                int len = charBlockArray[offset++];
                hash    = hash * 31 + charBlockArray.SubSequence(offset, offset + len).GetHashCode();
                offset += len;
            }
            return(hash);
        }
        /// <summary>
        /// Calculates a hash function of a path that was serialized with
        /// <see cref="Serialize(FacetLabel, CharBlockArray)"/>.
        /// </summary>
        public static int HashCodeOfSerialized(CharBlockArray charBlockArray, int offset)
        {
            int length = charBlockArray[offset++];

            if (length == 0)
            {
                return(0);
            }

            int hash = length;

            for (int i = 0; i < length; i++)
            {
                int len = charBlockArray[offset++];
                hash    = hash * 31 + charBlockArray.Subsequence(offset, len).GetHashCode(); // LUCENENET: Corrected 2nd Subsequence parameter
                offset += len;
            }
            return(hash);
        }
Exemple #15
0
        public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
        {
            var jsonObect  = JObject.Load(reader);
            var properties = jsonObect.Properties().ToArray();

            int    blockSize = -1;
            string contents  = null;

            Func <bool> arePropertiesSet = () => blockSize > 0 && !string.IsNullOrEmpty(contents);

            foreach (var property in properties)
            {
                if (property.Name.Equals(CharBlockArrayConverter.BLOCK_SIZE, StringComparison.OrdinalIgnoreCase))
                {
                    blockSize = property.Value.Value <int>();
                }
                else if (property.Name.Equals(CharBlockArrayConverter.CONTENTS, StringComparison.OrdinalIgnoreCase))
                {
                    contents = property.Value.Value <string>();
                }

                if (arePropertiesSet())
                {
                    break;
                }
            }

            if (!arePropertiesSet())
            {
                return(null);
            }

            var deserialized = new CharBlockArray(blockSize);

            deserialized.Append(contents);

            return(deserialized);
        }
 private static void AssertEqualsInternal(string msg, StringBuilder expected, CharBlockArray actual)
 {
     Assert.AreEqual(expected.Length, actual.Length, msg);
     for (int i = 0; i < expected.Length; i++)
     {
         Assert.AreEqual(expected[i], actual.CharAt(i), msg);
     }
 }
Exemple #17
0
        public virtual void TestArray()
        {
            CharBlockArray array   = new CharBlockArray();
            StringBuilder  builder = new StringBuilder();

            const int n = 100 * 1000;

            byte[] buffer = new byte[50];

            // This is essentially the equivalent of
            // CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
            //     .onUnmappableCharacter(CodingErrorAction.REPLACE)
            //     .onMalformedInput(CodingErrorAction.REPLACE);
            //
            // Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage,
            //     new EncoderReplacementFallback("?"),
            //     new DecoderReplacementFallback("?"));

            for (int i = 0; i < n; i++)
            {
                Random().NextBytes(buffer);
                int size = 1 + Random().Next(50);
                // This test is turning random bytes into a string,
                // this is asking for trouble.
                Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage,
                                                        new EncoderReplacementFallback("?"),
                                                        new DecoderReplacementFallback("?"));
                string s = decoder.GetString(buffer, 0, size);
                array.Append(s);
                builder.Append(s);
            }

            for (int i = 0; i < n; i++)
            {
                Random().NextBytes(buffer);
                int size = 1 + Random().Next(50);
                // This test is turning random bytes into a string,
                // this is asking for trouble.
                Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage,
                                                        new EncoderReplacementFallback("?"),
                                                        new DecoderReplacementFallback("?"));
                string s = decoder.GetString(buffer, 0, size);
                array.Append(s);
                builder.Append(s);
            }

            for (int i = 0; i < n; i++)
            {
                Random().NextBytes(buffer);
                int size = 1 + Random().Next(50);
                // This test is turning random bytes into a string,
                // this is asking for trouble.
                Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage,
                                                        new EncoderReplacementFallback("?"),
                                                        new DecoderReplacementFallback("?"));
                string s = decoder.GetString(buffer, 0, size);
                for (int j = 0; j < s.Length; j++)
                {
                    array.Append(s[j]);
                }
                builder.Append(s);
            }

            AssertEqualsInternal("GrowingCharArray<->StringBuilder mismatch.", builder, array);

            DirectoryInfo tempDir = CreateTempDir("growingchararray");
            FileInfo      f       = new FileInfo(Path.Combine(tempDir.FullName, "GrowingCharArrayTest.tmp"));

            using (var @out = new FileStream(f.FullName, FileMode.OpenOrCreate, FileAccess.Write))
            {
                array.Flush(@out);
                @out.Flush();
            }

            using (var @in = new FileStream(f.FullName, FileMode.Open, FileAccess.Read))
            {
                array = CharBlockArray.Open(@in);
                AssertEqualsInternal("GrowingCharArray<->StringBuilder mismatch after flush/load.", builder, array);
            }
            f.Delete();
        }
Exemple #18
0
 internal CollisionMap(int initialCapacity, CharBlockArray labelRepository)
     : this(initialCapacity, 0.75f, labelRepository)
 {
 }
Exemple #19
0
        public virtual void TestArray()
        {
            CharBlockArray array   = new CharBlockArray();
            StringBuilder  builder = new StringBuilder();

            const int n = 100 * 1000;

            byte[] buffer = new byte[50];

            for (int i = 0; i < n; i++)
            {
                Random().NextBytes(buffer);
                int size = 1 + Random().Next(50);
                // This test is turning random bytes into a string,
                // this is asking for trouble.

                string s = Encoding.UTF8.GetString(buffer, 0, size);
                array.Append(s);
                builder.Append(s);
            }

            for (int i = 0; i < n; i++)
            {
                Random().NextBytes(buffer);
                int size = 1 + Random().Next(50);
                // This test is turning random bytes into a string,
                // this is asking for trouble.
                string s = Encoding.UTF8.GetString(buffer, 0, size);
                array.Append(s);
                builder.Append(s);
            }

            for (int i = 0; i < n; i++)
            {
                Random().NextBytes(buffer);
                int size = 1 + Random().Next(50);
                // This test is turning random bytes into a string,
                // this is asking for trouble.
                string s = Encoding.UTF8.GetString(buffer, 0, size);
                for (int j = 0; j < s.Length; j++)
                {
                    array.Append(s[j]);
                }
                builder.Append(s);
            }

            AssertEqualsInternal("GrowingCharArray<->StringBuilder mismatch.", builder, array);

            DirectoryInfo tempDir = CreateTempDir("growingchararray");
            FileInfo      f       = new FileInfo(Path.Combine(tempDir.FullName, "GrowingCharArrayTest.tmp"));

            using (var @out = new FileStream(f.FullName, FileMode.OpenOrCreate, FileAccess.Write))
            {
                array.Flush(@out);
                @out.Flush();
            }

            using (var @in = new FileStream(f.FullName, FileMode.Open, FileAccess.Read))
            {
                array = CharBlockArray.Open(@in);
                AssertEqualsInternal("GrowingCharArray<->StringBuilder mismatch after flush/load.", builder, array);
            }
            f.Delete();
        }
Exemple #20
0
 internal CollisionMap(CharBlockArray labelRepository)
     : this(16 * 1024, 0.75f, labelRepository)
 {
 }
 internal static int StringHashCode(CharBlockArray labelRepository, int offset)
 {
     int hash = CategoryPathUtils.HashCodeOfSerialized(labelRepository, offset);
     hash = hash ^ (((int)((uint)hash >> 20)) ^ ((int)((uint)hash >> 12)));
     hash = hash ^ ((int)((uint)hash >> 7)) ^ ((int)((uint)hash >> 4));
     return hash;
 }
        private void Init()
        {
            labelRepository = new CharBlockArray();
            CategoryPathUtils.Serialize(new FacetLabel(), labelRepository);

            int c = this.capacity;
            for (int i = 0; i < this.hashArrays.Length; i++)
            {
                this.hashArrays[i] = new HashArray(c);
                c /= 2;
            }
        }
        public virtual void TestArray()
        {
            CharBlockArray array = new CharBlockArray();
            StringBuilder builder = new StringBuilder();

            const int n = 100 * 1000;

            byte[] buffer = new byte[50];

            for (int i = 0; i < n; i++)
            {
                Random().NextBytes(buffer);
                int size = 1 + Random().Next(50);
                // This test is turning random bytes into a string,
                // this is asking for trouble.

                string s = Encoding.UTF8.GetString(buffer, 0, size);
                array.Append(s);
                builder.Append(s);
            }

            for (int i = 0; i < n; i++)
            {
                Random().NextBytes(buffer);
                int size = 1 + Random().Next(50);
                // This test is turning random bytes into a string,
                // this is asking for trouble.
                string s = Encoding.UTF8.GetString(buffer, 0, size);
                array.Append(s);
                builder.Append(s);
            }

            for (int i = 0; i < n; i++)
            {
                Random().NextBytes(buffer);
                int size = 1 + Random().Next(50);
                // This test is turning random bytes into a string,
                // this is asking for trouble.
                string s = Encoding.UTF8.GetString(buffer, 0, size);
                for (int j = 0; j < s.Length; j++)
                {
                    array.Append(s[j]);
                }
                builder.Append(s);
            }

            AssertEqualsInternal("GrowingCharArray<->StringBuilder mismatch.", builder, array);

            DirectoryInfo tempDir = CreateTempDir("growingchararray");
            FileInfo f = new FileInfo(Path.Combine(tempDir.FullName, "GrowingCharArrayTest.tmp"));
            using (var @out = new FileStream(f.FullName, FileMode.OpenOrCreate, FileAccess.Write))
            {
                array.Flush(@out);
                @out.Flush();
            }

            using (var @in = new FileStream(f.FullName, FileMode.Open, FileAccess.Read))
            {
                array = CharBlockArray.Open(@in);
                AssertEqualsInternal("GrowingCharArray<->StringBuilder mismatch after flush/load.", builder, array);
            }
            f.Delete();
        }
        /// <summary>
        /// Opens the file and reloads the CompactLabelToOrdinal. The file it expects
        /// is generated from the <see cref="Flush(Stream)"/> command.
        /// </summary>
        internal static CompactLabelToOrdinal Open(FileInfo file, float loadFactor, int numHashArrays)
        {
            // Part of the file is the labelRepository, which needs to be rehashed
            // and label offsets re-added to the object. I am unsure as to why we
            // can't just store these off in the file as well, but in keeping with
            // the spirit of the original code, I did it this way. (ssuppe)
            CompactLabelToOrdinal l2o = new CompactLabelToOrdinal();

            l2o.loadFactor = loadFactor;
            l2o.hashArrays = new HashArray[numHashArrays];

            BinaryReader dis = null;

            try
            {
                dis = new BinaryReader(new FileStream(file.FullName, FileMode.Open, FileAccess.Read));

                // TaxiReader needs to load the "counter" or occupancy (L2O) to know
                // the next unique facet. we used to load the delimiter too, but
                // never used it.
                l2o.m_counter = dis.ReadInt32();

                l2o.capacity = DetermineCapacity((int)Math.Pow(2, l2o.hashArrays.Length), l2o.m_counter);
                l2o.Init();

                // now read the chars
                l2o.labelRepository = CharBlockArray.Open(dis.BaseStream);

                l2o.collisionMap = new CollisionMap(l2o.labelRepository);

                // Calculate hash on the fly based on how CategoryPath hashes
                // itself. Maybe in the future we can call some static based methods
                // in CategoryPath so that this doesn't break again? I don't like
                // having code in two different places...
                int cid = 0;
                // Skip the initial offset, it's the CategoryPath(0,0), which isn't
                // a hashed value.
                int offset          = 1;
                int lastStartOffset = offset;
                // This loop really relies on a well-formed input (assumes pretty blindly
                // that array offsets will work).  Since the initial file is machine
                // generated, I think this should be OK.
                while (offset < l2o.labelRepository.Length)
                {
                    // identical code to CategoryPath.hashFromSerialized. since we need to
                    // advance offset, we cannot call the method directly. perhaps if we
                    // could pass a mutable Integer or something...
                    int length = (ushort)l2o.labelRepository[offset++];
                    int hash   = length;
                    if (length != 0)
                    {
                        for (int i = 0; i < length; i++)
                        {
                            int len = (ushort)l2o.labelRepository[offset++];
                            hash    = hash * 31 + l2o.labelRepository.Subsequence(offset, len).GetHashCode(); // LUCENENET: Corrected 2nd Subsequence parameter
                            offset += len;
                        }
                    }
                    // Now that we've hashed the components of the label, do the
                    // final part of the hash algorithm.
                    hash = hash ^ (((int)((uint)hash >> 20)) ^ ((int)((uint)hash >> 12)));
                    hash = hash ^ ((int)((uint)hash >> 7)) ^ ((int)((uint)hash >> 4));
                    // Add the label, and let's keep going
                    l2o.AddLabelOffset(hash, cid, lastStartOffset);
                    cid++;
                    lastStartOffset = offset;
                }
            }
            catch (SerializationException se)
            {
                throw new IOException("Invalid file format. Cannot deserialize.", se);
            }
            finally
            {
                if (dis != null)
                {
                    dis.Dispose();
                }
            }

            l2o.threshold = (int)(l2o.loadFactor * l2o.capacity);
            return(l2o);
        }