public void Add() { IMultiSet <string> multiSet = new MultiSet <string>(); Assert.AreEqual(0, multiSet.Add(null, 3)); Assert.AreEqual(0, multiSet.Add(string.Empty, 4)); multiSet.Add("ab"); Assert.AreEqual(1, multiSet.Add("ab", 2)); Assert.AreEqual(3, multiSet.Count(i => i == null)); Assert.AreEqual(4, multiSet.Count(i => i == string.Empty)); Assert.AreEqual(3, multiSet.Count(i => i == "ab")); }
public void SetItemCount() { IMultiSet <string> multiSet = new MultiSet <string>(); multiSet.Add("itemToIncrease", 3); multiSet.Add("itemToDecrease", 3); multiSet.Add("itemToDelete", 3); multiSet.Add("itemNotChanged", 3); Assert.AreEqual(3, multiSet.SetItemCount("itemToIncrease", 4)); Assert.AreEqual(3, multiSet.SetItemCount("itemToDecrease", 2)); Assert.AreEqual(3, multiSet.SetItemCount("itemToDelete", 0)); Assert.AreEqual(3, multiSet.SetItemCount("itemNotChanged", 3)); Assert.AreEqual(0, multiSet.SetItemCount("itemToAdd", 3)); Assert.AreEqual(4, multiSet.Count(i => i == "itemToIncrease")); Assert.AreEqual(2, multiSet.Count(i => i == "itemToDecrease")); Assert.IsFalse(multiSet.Any(i => i == "itemToDelete")); Assert.AreEqual(3, multiSet.Count(i => i == "itemToAdd")); Assert.AreEqual(3, multiSet.Count(i => i == "itemNotChanged")); }
public void Remove() { IMultiSet <string> multiSet = new MultiSet <string>(); multiSet.Add("itemToDecrease", 3); multiSet.Add("itemToDelete", 3); multiSet.Add("itemToDelete_negative", 3); multiSet.Add("itemNotChanged", 3); Assert.AreEqual(3, multiSet.Remove("itemToDecrease", 1)); Assert.IsTrue(multiSet.Remove("itemToDecrease")); Assert.AreEqual(3, multiSet.Remove("itemToDelete", 3)); Assert.IsFalse(multiSet.Remove("itemToDelete")); Assert.AreEqual(3, multiSet.Remove("itemToDelete_negative", 4)); Assert.AreEqual(3, multiSet.Remove("itemNotChanged", 0)); Assert.AreEqual(0, multiSet.Remove("itemNotExist", 1)); Assert.IsFalse(multiSet.Remove("itemNotExist")); Assert.AreEqual(1, multiSet.Count(i => i == "itemToDecrease")); Assert.AreEqual(3, multiSet.Count(i => i == "itemNotChanged")); Assert.IsFalse(multiSet.Any(i => i == "itemToDelete")); Assert.IsFalse(multiSet.Any(i => i == "itemToDelete_negative")); Assert.IsFalse(multiSet.Any(i => i == "itemNotExist")); }
public void SetItemCount_ExpectedCountMisMatch_DoesNotUpdate() { IMultiSet <string> multiSet = new MultiSet <string>(); multiSet.Add("item", 3); Assert.IsFalse(multiSet.SetItemCount("item", 4, 5)); Assert.IsFalse(multiSet.SetItemCount("item", 0, 5)); Assert.IsFalse(multiSet.SetItemCount("itemNotExist", 1, 5)); Assert.IsFalse(multiSet.SetItemCount("itemNotExist", 5, 5)); Assert.AreEqual(3, multiSet.Count(i => i == "item")); Assert.IsFalse(multiSet.Any(i => i == "itemNotExist")); }
public void Clear() { IMultiSet <string> multiSet = new MultiSet <string>(); multiSet.Add("item1", 3); multiSet.Add("item2", 3); multiSet.Add(null, 3); Assert.AreEqual(9, multiSet.Count); multiSet.Clear(); Assert.AreEqual(0, multiSet.Count); Assert.AreEqual(0, multiSet.ToArray().Length); // still usable after clear multiSet.Add("item1", 3); Assert.AreEqual(3, multiSet.Count); Assert.AreEqual(3, multiSet.Count(i => i == "item1")); }
private static void EncodeInternal(Stream input, Stream output, bool xor, long inputLength) { var rleSource = new List <NibbleRun>(); var counts = new SortedList <NibbleRun, long>(); using (IEnumerator <byte> unpacked = Unpacked(input)) { // Build RLE nibble runs, RLE-encoding the nibble runs as we go along. // Maximum run length is 8, meaning 7 repetitions. if (unpacked.MoveNext()) { NibbleRun current = new NibbleRun(unpacked.Current, 0); while (unpacked.MoveNext()) { NibbleRun next = new NibbleRun(unpacked.Current, 0); if (next.Nibble != current.Nibble || current.Count >= 7) { rleSource.Add(current); long count; counts.TryGetValue(current, out count); counts[current] = count + 1; current = next; } else { ++current.Count; } } } } // We will use the Package-merge algorithm to build the optimal length-limited // Huffman code for the current file. To do this, we must map the current // problem onto the Coin Collector's problem. // Build the basic coin collection. var qt = new List <EncodingCodeTreeNode>(); foreach (var kvp in counts) { // No point in including anything with weight less than 2, as they // would actually increase compressed file size if used. if (kvp.Value > 1) { qt.Add(new EncodingCodeTreeNode(kvp.Key, kvp.Value)); } } qt.Sort(); // The base coin collection for the length-limited Huffman coding has // one coin list per character in length of the limmitation. Each coin list // has a constant "face value", and each coin in a list has its own // "numismatic value". The "face value" is unimportant in the way the code // is structured below; the "numismatic value" of each coin is the number // of times the underlying nibble run appears in the source file. // This will hold the Huffman code map. // NOTE: while the codes that will be written in the header will not be // longer than 8 bits, it is possible that a supplementary code map will // add "fake" codes that are longer than 8 bits. var codeMap = new SortedList <NibbleRun, KeyValuePair <long, byte> >(); // Size estimate. This is used to build the optimal compressed file. long sizeEstimate = long.MaxValue; // We will solve the Coin Collector's problem several times, each time // ignoring more of the least frequent nibble runs. This allows us to find // *the* lowest file size. while (qt.Count > 1) { // Make a copy of the basic coin collection. var q0 = new List <EncodingCodeTreeNode>(qt); // Ignore the lowest weighted item. Will only affect the next iteration // of the loop. If it can be proven that there is a single global // minimum (and no local minima for file size), then this could be // simplified to a binary search. qt.RemoveAt(qt.Count - 1); // We now solve the Coin collector's problem using the Package-merge // algorithm. The solution goes here. var solution = new List <EncodingCodeTreeNode>(); // This holds the packages from the last iteration. var q = new List <EncodingCodeTreeNode>(q0); int target = (q0.Count - 1) << 8, idx = 0; while (target != 0) { // Gets lowest bit set in its proper place: int val = (target & -target), r = 1 << idx; // Is the current denomination equal to the least denomination? if (r == val) { // If yes, take the least valuable node and put it into the solution. solution.Add(q[q.Count - 1]); q.RemoveAt(q.Count - 1); target -= r; } // The coin collection has coins of values 1 to 8; copy from the // original in those cases for the next step. var q1 = new List <EncodingCodeTreeNode>(); if (idx < 7) { q1.AddRange(q0); } // Split the current list into pairs and insert the packages into // the next list. while (q.Count > 1) { EncodingCodeTreeNode child1 = q[q.Count - 1]; q.RemoveAt(q.Count - 1); EncodingCodeTreeNode child0 = q[q.Count - 1]; q.RemoveAt(q.Count - 1); q1.Add(new EncodingCodeTreeNode(child0, child1)); } idx++; q.Clear(); q.AddRange(q1); q.Sort(); } // The Coin Collector's problem has been solved. Now it is time to // map the solution back into the length-limited Huffman coding problem. // To do that, we iterate through the solution and count how many times // each nibble run has been used (remember that the coin collection had // had multiple coins associated with each nibble run) -- this number // is the optimal bit length for the nibble run. var baseSizeMap = new SortedList <NibbleRun, long>(); foreach (var item in solution) { item.Traverse(baseSizeMap); } // With the length-limited Huffman coding problem solved, it is now time // to build the code table. As input, we have a map associating a nibble // run to its optimal encoded bit length. We will build the codes using // the canonical Huffman code. // To do that, we must invert the size map so we can sort it by code size. var sizeOnlyMap = new MultiSet <long>(); // This map contains lots more information, and is used to associate // the nibble run with its optimal code. It is sorted by code size, // then by frequency of the nibble run, then by the nibble run. var sizeMap = new MultiSet <SizeMapItem>(); foreach (var item in baseSizeMap) { long size = item.Value; sizeOnlyMap.Add(size); sizeMap.Add(new SizeMapItem(size, counts[item.Key], item.Key)); } // We now build the canonical Huffman code table. // "baseCode" is the code for the first nibble run with a given bit length. // "carry" is how many nibble runs were demoted to a higher bit length // at an earlier step. // "cnt" is how many nibble runs have a given bit length. long baseCode = 0; long carry = 0, cnt; // This list contains the codes sorted by size. var codes = new List <KeyValuePair <long, byte> >(); for (byte j = 1; j <= 8; j++) { // How many nibble runs have the desired bit length. cnt = sizeOnlyMap.Count(j) + carry; carry = 0; for (int k = 0; k < cnt; k++) { // Sequential binary numbers for codes. long code = baseCode + k; long mask = (1L << j) - 1; // We do not want any codes composed solely of 1's or which // start with 111111, as that sequence is reserved. if ((j <= 6 && code == mask) || (j > 6 && code == (mask & ~((1L << (j - 6)) - 1)))) { // We must demote this many nibble runs to a longer code. carry = cnt - k; cnt = k; break; } codes.Add(new KeyValuePair <long, byte>(code, j)); } // This is the beginning bit pattern for the next bit length. baseCode = (baseCode + cnt) << 1; } // With the canonical table build, the codemap can finally be built. var tempCodemap = new SortedList <NibbleRun, KeyValuePair <long, byte> >(); using (IEnumerator <SizeMapItem> enumerator = sizeMap.GetEnumerator()) { int pos = 0; while (enumerator.MoveNext() && pos < codes.Count) { tempCodemap[enumerator.Current.NibbleRun] = codes[pos]; ++pos; } } // We now compute the final file size for this code table. // 2 bytes at the start of the file, plus 1 byte at the end of the // code table. long tempsize_est = 3 * 8; byte last = 0xff; // Start with any nibble runs with their own code. foreach (var item in tempCodemap) { // Each new nibble needs an extra byte. if (item.Key.Nibble != last) { tempsize_est += 8; last = item.Key.Nibble; } // 2 bytes per nibble run in the table. tempsize_est += 2 * 8; // How many bits this nibble run uses in the file. tempsize_est += counts[item.Key] * item.Value.Value; } // Supplementary code map for the nibble runs that can be broken up into // shorter nibble runs with a smaller bit length than inlining. var supCodemap = new Dictionary <NibbleRun, KeyValuePair <long, byte> >(); // Now we will compute the size requirements for inline nibble runs. foreach (var item in counts) { if (!tempCodemap.ContainsKey(item.Key)) { // Nibble run does not have its own code. We need to find out if // we can break it up into smaller nibble runs with total code // size less than 13 bits or if we need to inline it (13 bits). if (item.Key.Count == 0) { // If this is a nibble run with zero repeats, we can't break // it up into smaller runs, so we inline it. tempsize_est += (6 + 7) * item.Value; } else if (item.Key.Count == 1) { // We stand a chance of breaking the nibble run. // This case is rather trivial, so we hard-code it. // We can break this up only as 2 consecutive runs of a nibble // run with count == 0. KeyValuePair <long, byte> value; if (!tempCodemap.TryGetValue(new NibbleRun(item.Key.Nibble, 0), out value) || value.Value > 6) { // The smaller nibble run either does not have its own code // or it results in a longer bit code when doubled up than // would result from inlining the run. In either case, we // inline the nibble run. tempsize_est += (6 + 7) * item.Value; } else { // The smaller nibble run has a small enough code that it is // more efficient to use it twice than to inline our nibble // run. So we do exactly that, by adding a (temporary) entry // in the supplementary codemap, which will later be merged // into the main codemap. long code = value.Key; byte len = value.Value; code = (code << len) | code; len <<= 1; tempsize_est += len * item.Value; supCodemap[item.Key] = new KeyValuePair <long, byte>(code, (byte)(0x80 | len)); } } else { // We stand a chance of breaking the nibble run. byte n = item.Key.Count; // This is a linear optimization problem subjected to 2 // constraints. If the number of repeats of the current nibble // run is N, then we have N dimensions. // Reference to table of linear coefficients. This table has // N columns for each line. byte[,] myLinearCoeffs = linearCoeffs[n - 2]; int rows = myLinearCoeffs.GetLength(0); byte nibble = item.Key.Nibble; // List containing the code length of each nibble run, or 13 // if the nibble run is not in the codemap. var runlen = new List <long>(); // Initialize the list. for (byte i = 0; i < n; i++) { // Is this run in the codemap? KeyValuePair <long, byte> value; if (tempCodemap.TryGetValue(new NibbleRun(nibble, i), out value)) { // It is. // Put code length in the vector. runlen.Add(value.Value); } else { // It is not. // Put inline length in the vector. runlen.Add(6 + 7); } } // Now go through the linear coefficient table and tally up // the total code size, looking for the best case. // The best size is initialized to be the inlined case. long bestSize = 6 + 7; int bestLine = -1; for (int i = 0; i < rows; i++) { // Tally up the code length for this coefficient line. long len = 0; for (byte j = 0; j < n; j++) { byte c = myLinearCoeffs[i, j]; if (c == 0) { continue; } len += c * runlen[j]; } // Is the length better than the best yet? if (len < bestSize) { // If yes, store it as the best. bestSize = len; bestLine = i; } } // Have we found a better code than inlining? if (bestLine >= 0) { // We have; use it. To do so, we have to build the code // and add it to the supplementary code table. long code = 0, len = 0; for (byte i = 0; i < n; i++) { byte c = myLinearCoeffs[bestLine, i]; if (c == 0) { continue; } // Is this run in the codemap? KeyValuePair <long, byte> value; if (tempCodemap.TryGetValue(new NibbleRun(nibble, i), out value)) { // It is; it MUST be, as the other case is impossible // by construction. for (int j = 0; j < c; j++) { len += value.Value; code <<= value.Value; code |= value.Key; } } } if (len != bestSize) { // ERROR! DANGER! THIS IS IMPOSSIBLE! // But just in case... tempsize_est += (6 + 7) * item.Value; } else { // By construction, best_size is at most 12. byte c = (byte)bestSize; // Add it to supplementary code map. supCodemap[item.Key] = new KeyValuePair <long, byte>(code, (byte)(0x80 | c)); tempsize_est += bestSize * item.Value; } } else { // No, we will have to inline it. tempsize_est += (6 + 7) * item.Value; } } } } // Merge the supplementary code map into the temporary code map. foreach (var item in supCodemap) { tempCodemap[item.Key] = item.Value; } // Round up to a full byte. tempsize_est = (tempsize_est + 7) & ~7; // Is this iteration better than the best? if (tempsize_est < sizeEstimate) { // If yes, save the codemap and file size. codeMap = tempCodemap; sizeEstimate = tempsize_est; } } // We now have a prefix-free code map associating the RLE-encoded nibble // runs with their code. Now we write the file. // Write header. BigEndian.Write2(output, (ushort)((Convert.ToInt32(xor) << 15) | ((int)inputLength >> 5))); byte lastNibble = 0xff; foreach (var item in codeMap) { byte length = item.Value.Value; // length with bit 7 set is a special device for further reducing file size, and // should NOT be on the table. if ((length & 0x80) != 0) { continue; } NibbleRun nibbleRun = item.Key; if (nibbleRun.Nibble != lastNibble) { // 0x80 marks byte as setting a new nibble. NeutralEndian.Write1(output, (byte)(0x80 | nibbleRun.Nibble)); lastNibble = nibbleRun.Nibble; } long code = item.Value.Key; NeutralEndian.Write1(output, (byte)((nibbleRun.Count << 4) | length)); NeutralEndian.Write1(output, (byte)code); } // Mark end of header. NeutralEndian.Write1(output, 0xff); // Write the encoded bitstream. UInt8_E_L_OutputBitStream bitStream = new UInt8_E_L_OutputBitStream(output); // The RLE-encoded source makes for a far faster encode as we simply // use the nibble runs as an index into the map, meaning a quick binary // search gives us the code to use (if in the map) or tells us that we // need to use inline RLE. foreach (var nibbleRun in rleSource) { KeyValuePair <long, byte> value; if (codeMap.TryGetValue(nibbleRun, out value)) { long code = value.Key; byte len = value.Value; // len with bit 7 set is a device to bypass the code table at the // start of the file. We need to clear the bit here before writing // the code to the file. len &= 0x7f; // We can have codes in the 9-12 range due to the break up of large // inlined runs into smaller non-inlined runs. Deal with those high // bits first, if needed. if (len > 8) { bitStream.Write((byte)((code >> 8) & 0xff), len - 8); len = 8; } bitStream.Write((byte)(code & 0xff), len); } else { bitStream.Write(0x3f, 6); bitStream.Write(nibbleRun.Count, 3); bitStream.Write(nibbleRun.Nibble, 4); } } // Fill remainder of last byte with zeroes and write if needed. bitStream.Flush(false); }