This is a very efficient LabelToOrdinal implementation that uses a CharBlockArray to store all labels and a configurable number of HashArrays to reference the labels.

Since the HashArrays don't handle collisions, a CollisionMap is used to store the colliding labels.

This data structure grows by adding a new HashArray whenever the number of collisions in the CollisionMap exceeds {@code loadFactor} * #getMaxOrdinal(). Growing also includes reinserting all colliding labels into the HashArrays to possibly reduce the number of collisions. For setting the {@code loadFactor} see #CompactLabelToOrdinal(int, float, int).

This data structure has a much lower memory footprint (~30%) compared to a Java HashMap<String, Integer>. It also only uses a small fraction of objects a HashMap would use, thus limiting the GC overhead. Ingestion speed was also ~50% faster compared to a HashMap for 3M unique labels. @lucene.experimental

Inheritance: Lucene.Net.Facet.Taxonomy.WriterCache.LabelToOrdinal
 public virtual void Dispose()
 {
     lock (this)
     {
         cache = null;
     }
 }
 protected virtual void Dispose(bool disposing) // LUCENENET specific - use proper dispose pattern
 {
     if (disposing)
     {
         if (isDisposed)
         {
             return;
         }
         if (@lock.TryEnterWriteLock(LOCK_TIMEOUT))
         {
             if (isDisposed)
             {
                 return;
             }
             try
             {
                 cache = null;
             }
             finally
             {
                 isDisposed = true;
                 @lock.ExitWriteLock();
                 @lock.Dispose();
             }
         }
         else
         {
             //Throwing ArguementException to maintain behavoir with ReaderWriterLock.AquireWriteLock.
             throw new ArgumentException();
         }
     }
 }
Beispiel #3
0
        protected virtual void Dispose(bool disposing) // LUCENENET specific - use proper dispose pattern
        {
            if (disposing)
            {
                if (isDisposed)
                {
                    return;
                }

                // LUCENENET: Use additional lock to ensure our ReaderWriterLockSlim only gets
                // disposed by the first caller.
                lock (disposalLock)
                {
                    if (isDisposed)
                    {
                        return;
                    }
                    syncLock.EnterWriteLock();
                    try
                    {
                        cache = null;
                    }
                    finally
                    {
                        syncLock.ExitWriteLock();
                        isDisposed = true;
                        syncLock.Dispose();
                    }
                }
            }
        }
 public virtual void Close()
 {
     lock (this)
     {
         cache = null;
     }
 }
 /// <summary>
 /// Sole constructor.
 /// </summary>
 public Cl2oTaxonomyWriterCache(int initialCapcity, float loadFactor, int numHashArrays)
 {
     this.cache          = new CompactLabelToOrdinal(initialCapcity, loadFactor, numHashArrays);
     this.initialCapcity = initialCapcity;
     this.numHashArrays  = numHashArrays;
     this.loadFactor     = loadFactor;
 }
 /// <summary>
 /// Sole constructor. </summary>
 public Cl2oTaxonomyWriterCache(int initialCapcity, float loadFactor, int numHashArrays)
 {
     this.cache = new CompactLabelToOrdinal(initialCapcity, loadFactor, numHashArrays);
     this.initialCapcity = initialCapcity;
     this.numHashArrays = numHashArrays;
     this.loadFactor = loadFactor;
 }
Beispiel #7
0
        private CollisionMap(int initialCapacity, float loadFactor, CharBlockArray labelRepository)
        {
            this.labelRepository  = labelRepository;
            this.loadFactor       = loadFactor;
            this.capacity_Renamed = CompactLabelToOrdinal.DetermineCapacity(2, initialCapacity);

            this.entries   = new Entry[this.capacity_Renamed];
            this.threshold = (int)(this.capacity_Renamed * this.loadFactor);
        }
 public virtual void Clear()
 {
     @lock.AcquireWriterLock(LockTimeOut);
     try
     {
         cache = new CompactLabelToOrdinal(initialCapcity, loadFactor, numHashArrays);
     }
     finally
     {
         @lock.ReleaseWriterLock();
     }
 }
 public virtual void Clear()
 {
     @lock.AcquireWriterLock(LockTimeOut);
     try
     {
         cache = new CompactLabelToOrdinal(initialCapcity, loadFactor, numHashArrays);
     }
     finally
     {
         @lock.ReleaseWriterLock();
     }
 }
 public virtual void Clear()
 {
     syncLock.EnterWriteLock();
     try
     {
         cache = new CompactLabelToOrdinal(initialCapcity, loadFactor, numHashArrays);
     }
     finally
     {
         syncLock.ExitWriteLock();
     }
 }
        private bool AddLabelOffsetToHashArray(HashArray a, int hash, int ordinal, int knownOffset)
        {
            int index  = CompactLabelToOrdinal.IndexFor(hash, a.offsets.Length);
            int offset = a.offsets[index];

            if (offset == 0)
            {
                a.offsets[index] = knownOffset;
                a.cids[index]    = ordinal;
                return(true);
            }

            return(false);
        }
Beispiel #12
0
        private static bool AddLabelOffsetToHashArray(HashArray a, int hash, int ordinal, int knownOffset) // LUCENENET: CA1822: Mark members as static
        {
            int index  = CompactLabelToOrdinal.IndexFor(hash, a.offsets.Length);
            int offset = a.offsets[index];

            if (offset == 0)
            {
                a.offsets[index] = knownOffset;
                a.cids[index]    = ordinal;
                return(true);
            }

            return(false);
        }
        private bool AddLabel(HashArray a, FacetLabel label, int hash, int ordinal)
        {
            int index  = CompactLabelToOrdinal.IndexFor(hash, a.offsets.Length);
            int offset = a.offsets[index];

            if (offset == 0)
            {
                a.offsets[index] = this.labelRepository.Length;
                CategoryPathUtils.Serialize(label, labelRepository);
                a.cids[index] = ordinal;
                return(true);
            }

            return(false);
        }
 public virtual void Clear()
 {
     if (@lock.TryEnterWriteLock(LOCK_TIMEOUT))
     {
         try
         {
             cache = new CompactLabelToOrdinal(initialCapcity, loadFactor, numHashArrays);
         }
         finally
         {
             @lock.ExitWriteLock();
         }
     }
     else
     {
         //Throwing ArguementException to maintain behavoir with ReaderWriterLock.AquireWriteLock.
         throw new ArgumentException();
     }
 }
        public override int GetOrdinal(FacetLabel label)
        {
            if (label == null)
            {
                return(LabelToOrdinal.INVALID_ORDINAL);
            }

            int hash = CompactLabelToOrdinal.StringHashCode(label);

            for (int i = 0; i < this.hashArrays.Length; i++)
            {
                int ord = GetOrdinal(this.hashArrays[i], label, hash);
                if (ord != COLLISION)
                {
                    return(ord);
                }
            }

            return(this.collisionMap.Get(label, hash));
        }
        public override void AddLabel(FacetLabel label, int ordinal)
        {
            if (collisionMap.Count > threshold)
            {
                Grow();
            }

            int hash = CompactLabelToOrdinal.StringHashCode(label);

            for (int i = 0; i < this.hashArrays.Length; i++)
            {
                if (AddLabel(this.hashArrays[i], label, hash, ordinal))
                {
                    return;
                }
            }

            int prevVal = collisionMap.AddLabel(label, hash, ordinal);

            if (prevVal != ordinal)
            {
                throw new ArgumentException("Label already exists: " + label + " prev ordinal " + prevVal);
            }
        }
Beispiel #17
0
        public virtual void TestL2O()
        {
            LabelToOrdinal map = new LabelToOrdinalMap();

            CompactLabelToOrdinal compact = new CompactLabelToOrdinal(2000000, 0.15f, 3);

            int       n = AtLeast(10 * 1000);
            const int numUniqueValues = 50 * 1000;

            string[] uniqueValues = new string[numUniqueValues];
            byte[]   buffer       = new byte[50];

            Random random = Random();

            for (int i = 0; i < numUniqueValues;)
            {
                random.NextBytes(buffer);
                int size = 1 + random.Next(buffer.Length);

                // This test is turning random bytes into a string,
                // this is asking for trouble.
                uniqueValues[i] = Encoding.UTF8.GetString(buffer, 0, size);
                // we cannot have empty path components, so eliminate all prefix as well
                // as middle consecutive delimiter chars.
                uniqueValues[i] = Regex.Replace(uniqueValues[i], "/+", "/");
                if (uniqueValues[i].StartsWith("/", StringComparison.Ordinal))
                {
                    uniqueValues[i] = uniqueValues[i].Substring(1);
                }
                if (uniqueValues[i].IndexOf(CompactLabelToOrdinal.TERMINATOR_CHAR) == -1)
                {
                    i++;
                }
            }

            var tmpDir        = CreateTempDir("testLableToOrdinal");
            var f             = new FileInfo(Path.Combine(tmpDir.FullName, "CompactLabelToOrdinalTest.tmp"));
            int flushInterval = 10;

            for (int i = 0; i < n; i++)
            {
                if (i > 0 && i % flushInterval == 0)
                {
                    using (var fileStream = new FileStream(f.FullName, FileMode.OpenOrCreate, FileAccess.ReadWrite))
                    {
                        compact.Flush(fileStream);
                    }
                    compact = CompactLabelToOrdinal.Open(f, 0.15f, 3);
                    //assertTrue(f.Delete());
                    f.Delete();
                    assertFalse(File.Exists(f.FullName));
                    if (flushInterval < (n / 10))
                    {
                        flushInterval *= 10;
                    }
                }

                int        index = random.Next(numUniqueValues);
                FacetLabel label;
                string     s = uniqueValues[index];
                if (s.Length == 0)
                {
                    label = new FacetLabel();
                }
                else
                {
                    label = new FacetLabel(s.Split("/".ToCharArray(), StringSplitOptions.RemoveEmptyEntries));
                }

                int ord1 = map.GetOrdinal(label);
                int ord2 = compact.GetOrdinal(label);

                if (VERBOSE)
                {
                    Console.WriteLine("Testing label: " + label.ToString());
                }

                assertEquals(ord1, ord2);

                if (ord1 == LabelToOrdinal.INVALID_ORDINAL)
                {
                    ord1 = compact.GetNextOrdinal();
                    map.AddLabel(label, ord1);
                    compact.AddLabel(label, ord1);
                }
            }

            for (int i = 0; i < numUniqueValues; i++)
            {
                FacetLabel label;
                string     s = uniqueValues[i];
                if (s.Length == 0)
                {
                    label = new FacetLabel();
                }
                else
                {
                    label = new FacetLabel(s.Split("/".ToCharArray(), StringSplitOptions.RemoveEmptyEntries));
                }
                int ord1 = map.GetOrdinal(label);
                int ord2 = compact.GetOrdinal(label);

                if (VERBOSE)
                {
                    Console.WriteLine("Testing label 2: " + label.ToString());
                }

                assertEquals(ord1, ord2);
            }
        }
Beispiel #18
0
        public virtual void TestL2OBasic()
        {
            LabelToOrdinal map = new LabelToOrdinalMap();

            CompactLabelToOrdinal compact = new CompactLabelToOrdinal(200, 0.15f, 3);

            int n = 50;

            string[] uniqueValues = new string[]
            {
                @"�",
                @"�r�G��F�\u0382�7\u0019�h�\u0015���#\u001d3\r{��q�_���Ԃ������",
                "foo bar one",
                new string(new char[] { (char)65533, (char)65533, (char)65, (char)65533, (char)45, (char)106, (char)40, (char)643, (char)65533, (char)11, (char)65533, (char)88, (char)65533, (char)78, (char)126, (char)56, (char)12, (char)71 }),
                "foo bar two",
                "foo bar three",
                "foo bar four",
                "foo bar five",
                "foo bar six",
                "foo bar seven",
                "foo bar eight",
                "foo bar nine",
                "foo bar ten",
                "foo/bar/one",
                "foo/bar/two",
                "foo/bar/three",
                "foo/bar/four",
                "foo/bar/five",
                "foo/bar/six",
                "foo/bar/seven",
                "foo/bar/eight",
                "foo/bar/nine",
                "foo/bar/ten",
                ""
            };

            var tmpDir        = CreateTempDir("testLableToOrdinal");
            var f             = new FileInfo(Path.Combine(tmpDir.FullName, "CompactLabelToOrdinalTest.tmp"));
            int flushInterval = 10;

            for (int i = 0; i < n; i++)
            {
                if (i > 0 && i % flushInterval == 0)
                {
                    using (var fileStream = new FileStream(f.FullName, FileMode.OpenOrCreate, FileAccess.ReadWrite))
                    {
                        compact.Flush(fileStream);
                    }
                    compact = CompactLabelToOrdinal.Open(f, 0.15f, 3);
                    //assertTrue(f.Delete());
                    f.Delete();
                    assertFalse(File.Exists(f.FullName));
                    if (flushInterval < (n / 10))
                    {
                        flushInterval *= 10;
                    }
                }

                FacetLabel label = new FacetLabel();
                foreach (string s in uniqueValues)
                {
                    if (s.Length == 0)
                    {
                        label = new FacetLabel();
                    }
                    else
                    {
                        label = new FacetLabel(s.Split("/".ToCharArray(), StringSplitOptions.RemoveEmptyEntries));
                    }

                    int ord1 = map.GetOrdinal(label);
                    int ord2 = compact.GetOrdinal(label);

                    if (VERBOSE)
                    {
                        Console.WriteLine("Testing label: " + label.ToString());
                    }

                    assertEquals(ord1, ord2);

                    if (ord1 == LabelToOrdinal.INVALID_ORDINAL)
                    {
                        ord1 = compact.GetNextOrdinal();
                        map.AddLabel(label, ord1);
                        compact.AddLabel(label, ord1);
                    }
                }
            }

            for (int i = 0; i < uniqueValues.Length; i++)
            {
                FacetLabel label;
                string     s = uniqueValues[i];
                if (s.Length == 0)
                {
                    label = new FacetLabel();
                }
                else
                {
                    label = new FacetLabel(s.Split("/".ToCharArray(), StringSplitOptions.RemoveEmptyEntries));
                }
                int ord1 = map.GetOrdinal(label);
                int ord2 = compact.GetOrdinal(label);

                if (VERBOSE)
                {
                    Console.WriteLine("Testing label 2: " + label.ToString());
                }

                assertEquals(ord1, ord2);
            }
        }
        public virtual void TestL2OBasic()
        {
            LabelToOrdinal map = new LabelToOrdinalMap();

            CompactLabelToOrdinal compact = new CompactLabelToOrdinal(200, 0.15f, 3);

            int n = 50;

            string[] uniqueValues = new string[]
            {
                @"�",
                @"�r�G��F�\u0382�7\u0019�h�\u0015���#\u001d3\r{��q�_���Ԃ������",
                "foo bar one",
                new string(new char[] { (char)65533, (char)65533, (char)65, (char)65533, (char)45, (char)106, (char)40, (char)643, (char)65533, (char)11, (char)65533, (char)88, (char)65533, (char)78, (char)126, (char)56, (char)12, (char)71 }),
                "foo bar two",
                "foo bar three",
                "foo bar four",
                "foo bar five",
                "foo bar six",
                "foo bar seven",
                "foo bar eight",
                "foo bar nine",
                "foo bar ten",
                "foo/bar/one",
                "foo/bar/two",
                "foo/bar/three",
                "foo/bar/four",
                "foo/bar/five",
                "foo/bar/six",
                "foo/bar/seven",
                "foo/bar/eight",
                "foo/bar/nine",
                "foo/bar/ten",
                ""
            };

            var tmpDir = CreateTempDir("testLableToOrdinal");
            var f = new FileInfo(Path.Combine(tmpDir.FullName, "CompactLabelToOrdinalTest.tmp"));
            int flushInterval = 10;

            for (int i = 0; i < n; i++)
            {
                if (i > 0 && i % flushInterval == 0)
                {
                    using (var fileStream = new FileStream(f.FullName, FileMode.OpenOrCreate, FileAccess.ReadWrite))
                    {
                        compact.Flush(fileStream);
                    }
                    compact = CompactLabelToOrdinal.Open(f, 0.15f, 3);
                    //assertTrue(f.Delete());
                    f.Delete();
                    assertFalse(File.Exists(f.FullName));
                    if (flushInterval < (n / 10))
                    {
                        flushInterval *= 10;
                    }
                }

                FacetLabel label = new FacetLabel();
                foreach (string s in uniqueValues)
                {
                    if (s.Length == 0)
                    {
                        label = new FacetLabel();
                    }
                    else
                    {
                        label = new FacetLabel(s.Split("/".ToCharArray(), StringSplitOptions.RemoveEmptyEntries));
                    }

                    int ord1 = map.GetOrdinal(label);
                    int ord2 = compact.GetOrdinal(label);

                    if (VERBOSE)
                    {
                        Console.WriteLine("Testing label: " + label.ToString());
                    }

                    assertEquals(ord1, ord2);

                    if (ord1 == LabelToOrdinal.INVALID_ORDINAL)
                    {
                        ord1 = compact.GetNextOrdinal();
                        map.AddLabel(label, ord1);
                        compact.AddLabel(label, ord1);
                    }
                }
            }

            for (int i = 0; i < uniqueValues.Length; i++)
            {
                FacetLabel label;
                string s = uniqueValues[i];
                if (s.Length == 0)
                {
                    label = new FacetLabel();
                }
                else
                {
                    label = new FacetLabel(s.Split("/".ToCharArray(), StringSplitOptions.RemoveEmptyEntries));
                }
                int ord1 = map.GetOrdinal(label);
                int ord2 = compact.GetOrdinal(label);

                if (VERBOSE)
                {
                    Console.WriteLine("Testing label 2: " + label.ToString());
                }

                assertEquals(ord1, ord2);
            }
        }
        public virtual void TestL2O()
        {
            LabelToOrdinal map = new LabelToOrdinalMap();

            CompactLabelToOrdinal compact = new CompactLabelToOrdinal(2000000, 0.15f, 3);

            int n = AtLeast(10 * 1000);
            const int numUniqueValues = 50 * 1000;

            string[] uniqueValues = new string[numUniqueValues];
            byte[] buffer = new byte[50];

            Random random = Random();
            for (int i = 0; i < numUniqueValues;)
            {
                random.NextBytes(buffer);
                int size = 1 + random.Next(buffer.Length);

                // This test is turning random bytes into a string,
                // this is asking for trouble.
                uniqueValues[i] = Encoding.UTF8.GetString(buffer, 0, size);
                // we cannot have empty path components, so eliminate all prefix as well
                // as middle consecutive delimiter chars.
                uniqueValues[i] = Regex.Replace(uniqueValues[i], "/+", "/");
                if (uniqueValues[i].StartsWith("/", StringComparison.Ordinal))
                {
                    uniqueValues[i] = uniqueValues[i].Substring(1);
                }
                if (uniqueValues[i].IndexOf(CompactLabelToOrdinal.TERMINATOR_CHAR) == -1)
                {
                    i++;
                }
            }

            var tmpDir = CreateTempDir("testLableToOrdinal");
            var f = new FileInfo(Path.Combine(tmpDir.FullName, "CompactLabelToOrdinalTest.tmp"));
            int flushInterval = 10;

            for (int i = 0; i < n; i++)
            {
                if (i > 0 && i % flushInterval == 0)
                {
                    using (var fileStream = new FileStream(f.FullName, FileMode.OpenOrCreate, FileAccess.ReadWrite))
                    {
                        compact.Flush(fileStream);
                    }
                    compact = CompactLabelToOrdinal.Open(f, 0.15f, 3);
                    //assertTrue(f.Delete());
                    f.Delete();
                    assertFalse(File.Exists(f.FullName));
                    if (flushInterval < (n / 10))
                    {
                        flushInterval *= 10;
                    }
                }

                int index = random.Next(numUniqueValues);
                FacetLabel label;
                string s = uniqueValues[index];
                if (s.Length == 0)
                {
                    label = new FacetLabel();
                }
                else
                {
                    label = new FacetLabel(s.Split("/".ToCharArray(), StringSplitOptions.RemoveEmptyEntries));
                }

                int ord1 = map.GetOrdinal(label);
                int ord2 = compact.GetOrdinal(label);

                if (VERBOSE)
                {
                    Console.WriteLine("Testing label: " + label.ToString());
                }

                assertEquals(ord1, ord2);

                if (ord1 == LabelToOrdinal.INVALID_ORDINAL)
                {
                    ord1 = compact.GetNextOrdinal();
                    map.AddLabel(label, ord1);
                    compact.AddLabel(label, ord1);
                }
            }

            for (int i = 0; i < numUniqueValues; i++)
            {
                FacetLabel label;
                string s = uniqueValues[i];
                if (s.Length == 0)
                {
                    label = new FacetLabel();
                }
                else
                {
                    label = new FacetLabel(s.Split("/".ToCharArray(), StringSplitOptions.RemoveEmptyEntries));
                }
                int ord1 = map.GetOrdinal(label);
                int ord2 = compact.GetOrdinal(label);

                if (VERBOSE)
                {
                    Console.WriteLine("Testing label 2: " + label.ToString());
                }

                assertEquals(ord1, ord2);
            }
        }
        /// <summary>
        /// Opens the file and reloads the CompactLabelToOrdinal. The file it expects
        /// is generated from the <see cref="Flush(Stream)"/> command.
        /// </summary>
        internal static CompactLabelToOrdinal Open(FileInfo file, float loadFactor, int numHashArrays)
        {
            // Part of the file is the labelRepository, which needs to be rehashed
            // and label offsets re-added to the object. I am unsure as to why we
            // can't just store these off in the file as well, but in keeping with
            // the spirit of the original code, I did it this way. (ssuppe)
            CompactLabelToOrdinal l2o = new CompactLabelToOrdinal();

            l2o.loadFactor = loadFactor;
            l2o.hashArrays = new HashArray[numHashArrays];

            BinaryReader dis = null;

            try
            {
                dis = new BinaryReader(new FileStream(file.FullName, FileMode.Open, FileAccess.Read));

                // TaxiReader needs to load the "counter" or occupancy (L2O) to know
                // the next unique facet. we used to load the delimiter too, but
                // never used it.
                l2o.m_counter = dis.ReadInt32();

                l2o.capacity = DetermineCapacity((int)Math.Pow(2, l2o.hashArrays.Length), l2o.m_counter);
                l2o.Init();

                // now read the chars
                l2o.labelRepository = CharBlockArray.Open(dis.BaseStream);

                l2o.collisionMap = new CollisionMap(l2o.labelRepository);

                // Calculate hash on the fly based on how CategoryPath hashes
                // itself. Maybe in the future we can call some static based methods
                // in CategoryPath so that this doesn't break again? I don't like
                // having code in two different places...
                int cid = 0;
                // Skip the initial offset, it's the CategoryPath(0,0), which isn't
                // a hashed value.
                int offset          = 1;
                int lastStartOffset = offset;
                // This loop really relies on a well-formed input (assumes pretty blindly
                // that array offsets will work).  Since the initial file is machine
                // generated, I think this should be OK.
                while (offset < l2o.labelRepository.Length)
                {
                    // identical code to CategoryPath.hashFromSerialized. since we need to
                    // advance offset, we cannot call the method directly. perhaps if we
                    // could pass a mutable Integer or something...
                    int length = (ushort)l2o.labelRepository[offset++];
                    int hash   = length;
                    if (length != 0)
                    {
                        for (int i = 0; i < length; i++)
                        {
                            int len = (ushort)l2o.labelRepository[offset++];
                            hash    = hash * 31 + l2o.labelRepository.Subsequence(offset, len).GetHashCode(); // LUCENENET: Corrected 2nd Subsequence parameter
                            offset += len;
                        }
                    }
                    // Now that we've hashed the components of the label, do the
                    // final part of the hash algorithm.
                    hash = hash ^ (((int)((uint)hash >> 20)) ^ ((int)((uint)hash >> 12)));
                    hash = hash ^ ((int)((uint)hash >> 7)) ^ ((int)((uint)hash >> 4));
                    // Add the label, and let's keep going
                    l2o.AddLabelOffset(hash, cid, lastStartOffset);
                    cid++;
                    lastStartOffset = offset;
                }
            }
            catch (SerializationException se)
            {
                throw new IOException("Invalid file format. Cannot deserialize.", se);
            }
            finally
            {
                if (dis != null)
                {
                    dis.Dispose();
                }
            }

            l2o.threshold = (int)(l2o.loadFactor * l2o.capacity);
            return(l2o);
        }
        /// <summary>
        /// Opens the file and reloads the CompactLabelToOrdinal. The file it expects
        /// is generated from the <seealso cref="#flush(File)"/> command.
        /// </summary>
        public static CompactLabelToOrdinal Open(string file, float loadFactor, int numHashArrays)
        {
            /// <summary>
            /// Part of the file is the labelRepository, which needs to be rehashed
            /// and label offsets re-added to the object. I am unsure as to why we
            /// can't just store these off in the file as well, but in keeping with
            /// the spirit of the original code, I did it this way. (ssuppe)
            /// </summary>
            CompactLabelToOrdinal l2o = new CompactLabelToOrdinal();
            l2o.loadFactor = loadFactor;
            l2o.hashArrays = new HashArray[numHashArrays];

            BinaryReader dis = null;
            try
            {
                dis = new BinaryReader(new FileStream(file,FileMode.Open,FileAccess.Read));

                // TaxiReader needs to load the "counter" or occupancy (L2O) to know
                // the next unique facet. we used to load the delimiter too, but
                // never used it.
                l2o.counter = dis.ReadInt32();

                l2o.capacity = DetermineCapacity((int)Math.Pow(2, l2o.hashArrays.Length), l2o.counter);
                l2o.Init();

                // now read the chars
                l2o.labelRepository = CharBlockArray.Open(dis);

                l2o.collisionMap = new CollisionMap(l2o.labelRepository);

                // Calculate hash on the fly based on how CategoryPath hashes
                // itself. Maybe in the future we can call some static based methods
                // in CategoryPath so that this doesn't break again? I don't like
                // having code in two different places...
                int cid = 0;
                // Skip the initial offset, it's the CategoryPath(0,0), which isn't
                // a hashed value.
                int offset = 1;
                int lastStartOffset = offset;
                // This loop really relies on a well-formed input (assumes pretty blindly
                // that array offsets will work).  Since the initial file is machine
                // generated, I think this should be OK.
                while (offset < l2o.labelRepository.Length)
                {
                    // identical code to CategoryPath.hashFromSerialized. since we need to
                    // advance offset, we cannot call the method directly. perhaps if we
                    // could pass a mutable Integer or something...
                    int length = (short)l2o.labelRepository.CharAt(offset++);
                    int hash = length;
                    if (length != 0)
                    {
                        for (int i = 0; i < length; i++)
                        {
                            int len = (short)l2o.labelRepository.CharAt(offset++);
                            hash = hash * 31 + l2o.labelRepository.SubSequence(offset, offset + len).GetHashCode();
                            offset += len;
                        }
                    }
                    // Now that we've hashed the components of the label, do the
                    // final part of the hash algorithm.
                    hash = hash ^ (((int)((uint)hash >> 20)) ^ ((int)((uint)hash >> 12)));
                    hash = hash ^ ((int)((uint)hash >> 7)) ^ ((int)((uint)hash >> 4));
                    // Add the label, and let's keep going
                    l2o.AddLabelOffset(hash, cid, lastStartOffset);
                    cid++;
                    lastStartOffset = offset;
                }

            }
            catch (DllNotFoundException)
            {
                throw new IOException("Invalid file format. Cannot deserialize.");
            }
            finally
            {
                if (dis != null)
                {
                    dis.Dispose();
                }
            }

            l2o.threshold = (int)(l2o.loadFactor * l2o.capacity);
            return l2o;
        }