Example #1
0
        public void Add()
        {
            IMultiSet <string> multiSet = new MultiSet <string>();

            Assert.AreEqual(0, multiSet.Add(null, 3));
            Assert.AreEqual(0, multiSet.Add(string.Empty, 4));
            multiSet.Add("ab");
            Assert.AreEqual(1, multiSet.Add("ab", 2));

            Assert.AreEqual(3, multiSet.Count(i => i == null));
            Assert.AreEqual(4, multiSet.Count(i => i == string.Empty));
            Assert.AreEqual(3, multiSet.Count(i => i == "ab"));
        }
Example #2
0
        public void SetItemCount()
        {
            IMultiSet <string> multiSet = new MultiSet <string>();

            multiSet.Add("itemToIncrease", 3);
            multiSet.Add("itemToDecrease", 3);
            multiSet.Add("itemToDelete", 3);
            multiSet.Add("itemNotChanged", 3);

            Assert.AreEqual(3, multiSet.SetItemCount("itemToIncrease", 4));
            Assert.AreEqual(3, multiSet.SetItemCount("itemToDecrease", 2));
            Assert.AreEqual(3, multiSet.SetItemCount("itemToDelete", 0));
            Assert.AreEqual(3, multiSet.SetItemCount("itemNotChanged", 3));
            Assert.AreEqual(0, multiSet.SetItemCount("itemToAdd", 3));

            Assert.AreEqual(4, multiSet.Count(i => i == "itemToIncrease"));
            Assert.AreEqual(2, multiSet.Count(i => i == "itemToDecrease"));
            Assert.IsFalse(multiSet.Any(i => i == "itemToDelete"));
            Assert.AreEqual(3, multiSet.Count(i => i == "itemToAdd"));
            Assert.AreEqual(3, multiSet.Count(i => i == "itemNotChanged"));
        }
Example #3
0
        public void Remove()
        {
            IMultiSet <string> multiSet = new MultiSet <string>();

            multiSet.Add("itemToDecrease", 3);
            multiSet.Add("itemToDelete", 3);
            multiSet.Add("itemToDelete_negative", 3);
            multiSet.Add("itemNotChanged", 3);

            Assert.AreEqual(3, multiSet.Remove("itemToDecrease", 1));
            Assert.IsTrue(multiSet.Remove("itemToDecrease"));
            Assert.AreEqual(3, multiSet.Remove("itemToDelete", 3));
            Assert.IsFalse(multiSet.Remove("itemToDelete"));
            Assert.AreEqual(3, multiSet.Remove("itemToDelete_negative", 4));
            Assert.AreEqual(3, multiSet.Remove("itemNotChanged", 0));
            Assert.AreEqual(0, multiSet.Remove("itemNotExist", 1));
            Assert.IsFalse(multiSet.Remove("itemNotExist"));

            Assert.AreEqual(1, multiSet.Count(i => i == "itemToDecrease"));
            Assert.AreEqual(3, multiSet.Count(i => i == "itemNotChanged"));
            Assert.IsFalse(multiSet.Any(i => i == "itemToDelete"));
            Assert.IsFalse(multiSet.Any(i => i == "itemToDelete_negative"));
            Assert.IsFalse(multiSet.Any(i => i == "itemNotExist"));
        }
Example #4
0
        public void SetItemCount_ExpectedCountMisMatch_DoesNotUpdate()
        {
            IMultiSet <string> multiSet = new MultiSet <string>();

            multiSet.Add("item", 3);

            Assert.IsFalse(multiSet.SetItemCount("item", 4, 5));
            Assert.IsFalse(multiSet.SetItemCount("item", 0, 5));

            Assert.IsFalse(multiSet.SetItemCount("itemNotExist", 1, 5));
            Assert.IsFalse(multiSet.SetItemCount("itemNotExist", 5, 5));

            Assert.AreEqual(3, multiSet.Count(i => i == "item"));
            Assert.IsFalse(multiSet.Any(i => i == "itemNotExist"));
        }
Example #5
0
        public void Clear()
        {
            IMultiSet <string> multiSet = new MultiSet <string>();

            multiSet.Add("item1", 3);
            multiSet.Add("item2", 3);
            multiSet.Add(null, 3);

            Assert.AreEqual(9, multiSet.Count);
            multiSet.Clear();
            Assert.AreEqual(0, multiSet.Count);
            Assert.AreEqual(0, multiSet.ToArray().Length);

            // still usable after clear
            multiSet.Add("item1", 3);
            Assert.AreEqual(3, multiSet.Count);
            Assert.AreEqual(3, multiSet.Count(i => i == "item1"));
        }
Example #6
0
        private static void EncodeInternal(Stream input, Stream output, bool xor, long inputLength)
        {
            var rleSource = new List <NibbleRun>();
            var counts    = new SortedList <NibbleRun, long>();

            using (IEnumerator <byte> unpacked = Unpacked(input))
            {
                // Build RLE nibble runs, RLE-encoding the nibble runs as we go along.
                // Maximum run length is 8, meaning 7 repetitions.
                if (unpacked.MoveNext())
                {
                    NibbleRun current = new NibbleRun(unpacked.Current, 0);
                    while (unpacked.MoveNext())
                    {
                        NibbleRun next = new NibbleRun(unpacked.Current, 0);
                        if (next.Nibble != current.Nibble || current.Count >= 7)
                        {
                            rleSource.Add(current);
                            long count;
                            counts.TryGetValue(current, out count);
                            counts[current] = count + 1;
                            current         = next;
                        }
                        else
                        {
                            ++current.Count;
                        }
                    }
                }
            }

            // We will use the Package-merge algorithm to build the optimal length-limited
            // Huffman code for the current file. To do this, we must map the current
            // problem onto the Coin Collector's problem.
            // Build the basic coin collection.
            var qt = new List <EncodingCodeTreeNode>();

            foreach (var kvp in counts)
            {
                // No point in including anything with weight less than 2, as they
                // would actually increase compressed file size if used.
                if (kvp.Value > 1)
                {
                    qt.Add(new EncodingCodeTreeNode(kvp.Key, kvp.Value));
                }
            }

            qt.Sort();

            // The base coin collection for the length-limited Huffman coding has
            // one coin list per character in length of the limmitation. Each coin list
            // has a constant "face value", and each coin in a list has its own
            // "numismatic value". The "face value" is unimportant in the way the code
            // is structured below; the "numismatic value" of each coin is the number
            // of times the underlying nibble run appears in the source file.

            // This will hold the Huffman code map.
            // NOTE: while the codes that will be written in the header will not be
            // longer than 8 bits, it is possible that a supplementary code map will
            // add "fake" codes that are longer than 8 bits.
            var codeMap = new SortedList <NibbleRun, KeyValuePair <long, byte> >();

            // Size estimate. This is used to build the optimal compressed file.
            long sizeEstimate = long.MaxValue;

            // We will solve the Coin Collector's problem several times, each time
            // ignoring more of the least frequent nibble runs. This allows us to find
            // *the* lowest file size.
            while (qt.Count > 1)
            {
                // Make a copy of the basic coin collection.
                var q0 = new List <EncodingCodeTreeNode>(qt);

                // Ignore the lowest weighted item. Will only affect the next iteration
                // of the loop. If it can be proven that there is a single global
                // minimum (and no local minima for file size), then this could be
                // simplified to a binary search.
                qt.RemoveAt(qt.Count - 1);

                // We now solve the Coin collector's problem using the Package-merge
                // algorithm. The solution goes here.
                var solution = new List <EncodingCodeTreeNode>();

                // This holds the packages from the last iteration.
                var q = new List <EncodingCodeTreeNode>(q0);

                int target = (q0.Count - 1) << 8, idx = 0;
                while (target != 0)
                {
                    // Gets lowest bit set in its proper place:
                    int val = (target & -target), r = 1 << idx;

                    // Is the current denomination equal to the least denomination?
                    if (r == val)
                    {
                        // If yes, take the least valuable node and put it into the solution.
                        solution.Add(q[q.Count - 1]);
                        q.RemoveAt(q.Count - 1);
                        target -= r;
                    }

                    // The coin collection has coins of values 1 to 8; copy from the
                    // original in those cases for the next step.
                    var q1 = new List <EncodingCodeTreeNode>();
                    if (idx < 7)
                    {
                        q1.AddRange(q0);
                    }

                    // Split the current list into pairs and insert the packages into
                    // the next list.
                    while (q.Count > 1)
                    {
                        EncodingCodeTreeNode child1 = q[q.Count - 1];
                        q.RemoveAt(q.Count - 1);
                        EncodingCodeTreeNode child0 = q[q.Count - 1];
                        q.RemoveAt(q.Count - 1);
                        q1.Add(new EncodingCodeTreeNode(child0, child1));
                    }

                    idx++;
                    q.Clear();
                    q.AddRange(q1);
                    q.Sort();
                }

                // The Coin Collector's problem has been solved. Now it is time to
                // map the solution back into the length-limited Huffman coding problem.

                // To do that, we iterate through the solution and count how many times
                // each nibble run has been used (remember that the coin collection had
                // had multiple coins associated with each nibble run) -- this number
                // is the optimal bit length for the nibble run.
                var baseSizeMap = new SortedList <NibbleRun, long>();
                foreach (var item in solution)
                {
                    item.Traverse(baseSizeMap);
                }

                // With the length-limited Huffman coding problem solved, it is now time
                // to build the code table. As input, we have a map associating a nibble
                // run to its optimal encoded bit length. We will build the codes using
                // the canonical Huffman code.

                // To do that, we must invert the size map so we can sort it by code size.
                var sizeOnlyMap = new MultiSet <long>();

                // This map contains lots more information, and is used to associate
                // the nibble run with its optimal code. It is sorted by code size,
                // then by frequency of the nibble run, then by the nibble run.
                var sizeMap = new MultiSet <SizeMapItem>();

                foreach (var item in baseSizeMap)
                {
                    long size = item.Value;
                    sizeOnlyMap.Add(size);
                    sizeMap.Add(new SizeMapItem(size, counts[item.Key], item.Key));
                }

                // We now build the canonical Huffman code table.
                // "baseCode" is the code for the first nibble run with a given bit length.
                // "carry" is how many nibble runs were demoted to a higher bit length
                // at an earlier step.
                // "cnt" is how many nibble runs have a given bit length.
                long baseCode = 0;
                long carry = 0, cnt;

                // This list contains the codes sorted by size.
                var codes = new List <KeyValuePair <long, byte> >();
                for (byte j = 1; j <= 8; j++)
                {
                    // How many nibble runs have the desired bit length.
                    cnt   = sizeOnlyMap.Count(j) + carry;
                    carry = 0;

                    for (int k = 0; k < cnt; k++)
                    {
                        // Sequential binary numbers for codes.
                        long code = baseCode + k;
                        long mask = (1L << j) - 1;

                        // We do not want any codes composed solely of 1's or which
                        // start with 111111, as that sequence is reserved.
                        if ((j <= 6 && code == mask) ||
                            (j > 6 && code == (mask & ~((1L << (j - 6)) - 1))))
                        {
                            // We must demote this many nibble runs to a longer code.
                            carry = cnt - k;
                            cnt   = k;
                            break;
                        }

                        codes.Add(new KeyValuePair <long, byte>(code, j));
                    }

                    // This is the beginning bit pattern for the next bit length.
                    baseCode = (baseCode + cnt) << 1;
                }

                // With the canonical table build, the codemap can finally be built.
                var tempCodemap = new SortedList <NibbleRun, KeyValuePair <long, byte> >();
                using (IEnumerator <SizeMapItem> enumerator = sizeMap.GetEnumerator())
                {
                    int pos = 0;
                    while (enumerator.MoveNext() && pos < codes.Count)
                    {
                        tempCodemap[enumerator.Current.NibbleRun] = codes[pos];
                        ++pos;
                    }
                }

                // We now compute the final file size for this code table.
                // 2 bytes at the start of the file, plus 1 byte at the end of the
                // code table.
                long tempsize_est = 3 * 8;
                byte last         = 0xff;

                // Start with any nibble runs with their own code.
                foreach (var item in tempCodemap)
                {
                    // Each new nibble needs an extra byte.
                    if (item.Key.Nibble != last)
                    {
                        tempsize_est += 8;
                        last          = item.Key.Nibble;
                    }

                    // 2 bytes per nibble run in the table.
                    tempsize_est += 2 * 8;

                    // How many bits this nibble run uses in the file.
                    tempsize_est += counts[item.Key] * item.Value.Value;
                }

                // Supplementary code map for the nibble runs that can be broken up into
                // shorter nibble runs with a smaller bit length than inlining.
                var supCodemap = new Dictionary <NibbleRun, KeyValuePair <long, byte> >();

                // Now we will compute the size requirements for inline nibble runs.
                foreach (var item in counts)
                {
                    if (!tempCodemap.ContainsKey(item.Key))
                    {
                        // Nibble run does not have its own code. We need to find out if
                        // we can break it up into smaller nibble runs with total code
                        // size less than 13 bits or if we need to inline it (13 bits).
                        if (item.Key.Count == 0)
                        {
                            // If this is a nibble run with zero repeats, we can't break
                            // it up into smaller runs, so we inline it.
                            tempsize_est += (6 + 7) * item.Value;
                        }
                        else if (item.Key.Count == 1)
                        {
                            // We stand a chance of breaking the nibble run.

                            // This case is rather trivial, so we hard-code it.
                            // We can break this up only as 2 consecutive runs of a nibble
                            // run with count == 0.
                            KeyValuePair <long, byte> value;
                            if (!tempCodemap.TryGetValue(new NibbleRun(item.Key.Nibble, 0), out value) || value.Value > 6)
                            {
                                // The smaller nibble run either does not have its own code
                                // or it results in a longer bit code when doubled up than
                                // would result from inlining the run. In either case, we
                                // inline the nibble run.
                                tempsize_est += (6 + 7) * item.Value;
                            }
                            else
                            {
                                // The smaller nibble run has a small enough code that it is
                                // more efficient to use it twice than to inline our nibble
                                // run. So we do exactly that, by adding a (temporary) entry
                                // in the supplementary codemap, which will later be merged
                                // into the main codemap.
                                long code = value.Key;
                                byte len  = value.Value;
                                code                 = (code << len) | code;
                                len                <<= 1;
                                tempsize_est        += len * item.Value;
                                supCodemap[item.Key] = new KeyValuePair <long, byte>(code, (byte)(0x80 | len));
                            }
                        }
                        else
                        {
                            // We stand a chance of breaking the nibble run.
                            byte n = item.Key.Count;

                            // This is a linear optimization problem subjected to 2
                            // constraints. If the number of repeats of the current nibble
                            // run is N, then we have N dimensions.
                            // Reference to table of linear coefficients. This table has
                            // N columns for each line.
                            byte[,] myLinearCoeffs = linearCoeffs[n - 2];
                            int rows = myLinearCoeffs.GetLength(0);

                            byte nibble = item.Key.Nibble;

                            // List containing the code length of each nibble run, or 13
                            // if the nibble run is not in the codemap.
                            var runlen = new List <long>();

                            // Initialize the list.
                            for (byte i = 0; i < n; i++)
                            {
                                // Is this run in the codemap?
                                KeyValuePair <long, byte> value;
                                if (tempCodemap.TryGetValue(new NibbleRun(nibble, i), out value))
                                {
                                    // It is.
                                    // Put code length in the vector.
                                    runlen.Add(value.Value);
                                }
                                else
                                {
                                    // It is not.
                                    // Put inline length in the vector.
                                    runlen.Add(6 + 7);
                                }
                            }

                            // Now go through the linear coefficient table and tally up
                            // the total code size, looking for the best case.
                            // The best size is initialized to be the inlined case.
                            long bestSize = 6 + 7;
                            int  bestLine = -1;
                            for (int i = 0; i < rows; i++)
                            {
                                // Tally up the code length for this coefficient line.
                                long len = 0;
                                for (byte j = 0; j < n; j++)
                                {
                                    byte c = myLinearCoeffs[i, j];
                                    if (c == 0)
                                    {
                                        continue;
                                    }

                                    len += c * runlen[j];
                                }

                                // Is the length better than the best yet?
                                if (len < bestSize)
                                {
                                    // If yes, store it as the best.
                                    bestSize = len;
                                    bestLine = i;
                                }
                            }

                            // Have we found a better code than inlining?
                            if (bestLine >= 0)
                            {
                                // We have; use it. To do so, we have to build the code
                                // and add it to the supplementary code table.
                                long code = 0, len = 0;
                                for (byte i = 0; i < n; i++)
                                {
                                    byte c = myLinearCoeffs[bestLine, i];
                                    if (c == 0)
                                    {
                                        continue;
                                    }

                                    // Is this run in the codemap?
                                    KeyValuePair <long, byte> value;
                                    if (tempCodemap.TryGetValue(new NibbleRun(nibble, i), out value))
                                    {
                                        // It is; it MUST be, as the other case is impossible
                                        // by construction.
                                        for (int j = 0; j < c; j++)
                                        {
                                            len   += value.Value;
                                            code <<= value.Value;
                                            code  |= value.Key;
                                        }
                                    }
                                }

                                if (len != bestSize)
                                {
                                    // ERROR! DANGER! THIS IS IMPOSSIBLE!
                                    // But just in case...
                                    tempsize_est += (6 + 7) * item.Value;
                                }
                                else
                                {
                                    // By construction, best_size is at most 12.
                                    byte c = (byte)bestSize;

                                    // Add it to supplementary code map.
                                    supCodemap[item.Key] = new KeyValuePair <long, byte>(code, (byte)(0x80 | c));
                                    tempsize_est        += bestSize * item.Value;
                                }
                            }
                            else
                            {
                                // No, we will have to inline it.
                                tempsize_est += (6 + 7) * item.Value;
                            }
                        }
                    }
                }

                // Merge the supplementary code map into the temporary code map.
                foreach (var item in supCodemap)
                {
                    tempCodemap[item.Key] = item.Value;
                }

                // Round up to a full byte.
                tempsize_est = (tempsize_est + 7) & ~7;

                // Is this iteration better than the best?
                if (tempsize_est < sizeEstimate)
                {
                    // If yes, save the codemap and file size.
                    codeMap      = tempCodemap;
                    sizeEstimate = tempsize_est;
                }
            }

            // We now have a prefix-free code map associating the RLE-encoded nibble
            // runs with their code. Now we write the file.
            // Write header.
            BigEndian.Write2(output, (ushort)((Convert.ToInt32(xor) << 15) | ((int)inputLength >> 5)));
            byte lastNibble = 0xff;

            foreach (var item in codeMap)
            {
                byte length = item.Value.Value;

                // length with bit 7 set is a special device for further reducing file size, and
                // should NOT be on the table.
                if ((length & 0x80) != 0)
                {
                    continue;
                }

                NibbleRun nibbleRun = item.Key;
                if (nibbleRun.Nibble != lastNibble)
                {
                    // 0x80 marks byte as setting a new nibble.
                    NeutralEndian.Write1(output, (byte)(0x80 | nibbleRun.Nibble));
                    lastNibble = nibbleRun.Nibble;
                }

                long code = item.Value.Key;
                NeutralEndian.Write1(output, (byte)((nibbleRun.Count << 4) | length));
                NeutralEndian.Write1(output, (byte)code);
            }

            // Mark end of header.
            NeutralEndian.Write1(output, 0xff);

            // Write the encoded bitstream.
            UInt8_E_L_OutputBitStream bitStream = new UInt8_E_L_OutputBitStream(output);

            // The RLE-encoded source makes for a far faster encode as we simply
            // use the nibble runs as an index into the map, meaning a quick binary
            // search gives us the code to use (if in the map) or tells us that we
            // need to use inline RLE.
            foreach (var nibbleRun in rleSource)
            {
                KeyValuePair <long, byte> value;
                if (codeMap.TryGetValue(nibbleRun, out value))
                {
                    long code = value.Key;
                    byte len  = value.Value;

                    // len with bit 7 set is a device to bypass the code table at the
                    // start of the file. We need to clear the bit here before writing
                    // the code to the file.
                    len &= 0x7f;

                    // We can have codes in the 9-12 range due to the break up of large
                    // inlined runs into smaller non-inlined runs. Deal with those high
                    // bits first, if needed.
                    if (len > 8)
                    {
                        bitStream.Write((byte)((code >> 8) & 0xff), len - 8);
                        len = 8;
                    }

                    bitStream.Write((byte)(code & 0xff), len);
                }
                else
                {
                    bitStream.Write(0x3f, 6);
                    bitStream.Write(nibbleRun.Count, 3);
                    bitStream.Write(nibbleRun.Nibble, 4);
                }
            }

            // Fill remainder of last byte with zeroes and write if needed.
            bitStream.Flush(false);
        }