Esempio n. 1
0
        // Huffman table setup routines

        // Define a Huffman table
        static void add_huff_table(jpeg_compress cinfo, ref JHUFF_TBL htblptr, byte[] bits, byte[] val)
        {
            if (htblptr == null)
            {
                htblptr = jpeg_alloc_huff_table(cinfo);
            }

            // Copy the number-of-symbols-of-each-code-length counts
            Array.Copy(bits, htblptr.bits, htblptr.bits.Length);

            // Validate the counts. We do this here mainly so we can copy the right
            // number of symbols from the val[] array, without risking marching off
            // the end of memory. jchuff.cs will do a more thorough test later.
            int nsymbols = 0;

            for (int len = 1; len <= 16; len++)
            {
                nsymbols += bits[len];
            }
            if (nsymbols < 1 || nsymbols > 256)
            {
                ERREXIT(cinfo, J_MESSAGE_CODE.JERR_BAD_HUFF_TABLE);
            }

            Array.Copy(val, htblptr.huffval, nsymbols);

            // Initialize sent_table false so table will be written to JPEG file.
            htblptr.sent_table = false;
        }
Esempio n. 2
0
		// Generate an optimal table definition given the specified counts
		// Generate the best Huffman code table for the given counts, fill htbl.

		// The JPEG standard requires that no symbol be assigned a codeword of all
		// one bits (so that padding bits added at the end of a compressed segment
		// can't look like a valid code). Because of the canonical ordering of
		// codewords, this just means that there must be an unused slot in the
		// longest codeword length category. Section K.2 of the JPEG spec suggests
		// reserving such a slot by pretending that symbol 256 is a valid symbol
		// with count 1. In theory that's not optimal; giving it count zero but
		// including it in the symbol set anyway should give a better Huffman code.
		// But the theoretically better code actually seems to come out worse in
		// practice, because it produces more all-ones bytes (which incur stuffed
		// zero bytes in the final file). In any case the difference is tiny.

		// The JPEG standard requires Huffman codes to be no more than 16 bits long.
		// If some symbols have a very small but nonzero probability, the Huffman tree
		// must be adjusted to meet the code length restriction. We currently use
		// the adjustment method suggested in JPEG section K.2. This method is *not*
		// optimal; it may not choose the best possible limited-length code. But
		// typically only very-low-frequency symbols will be given less-than-optimal
		// lengths, so the code is almost optimal. Experimental comparisons against
		// an optimal limited-length-code algorithm indicate that the difference is
		// microscopic --- usually less than a hundredth of a percent of total size.
		// So the extra complexity of an optimal algorithm doesn't seem worthwhile.
		static void jpeg_gen_optimal_table(jpeg_compress cinfo, JHUFF_TBL htbl, int[] freq)
		{
			int MAX_CLEN=32;					// assumed maximum initial code length
			byte[] bits=new byte[MAX_CLEN+1];	// bits[k] = # of symbols with code length k
			int[] codesize=new int[257];		// codesize[k] = code length of symbol k
			int[] others=new int[257];			// next symbol in current branch of tree

			// This algorithm is explained in section K.2 of the JPEG standard
			for(int i=0; i<257; i++) others[i]=-1;	// init links to empty

			freq[256]=1;						// make sure 256 has a nonzero count
			// Including the pseudo-symbol 256 in the Huffman procedure guarantees
			// that no real symbol is given code-value of all ones, because 256
			// will be placed last in the largest codeword category.

			// Huffman's basic algorithm to assign optimal code lengths to symbols

			for(; ; )
			{
				// Find the smallest nonzero frequency, set c1 = its symbol
				// In case of ties, take the larger symbol number
				int c1=-1;
				int v=1000000000;
				for(int i=0; i<=256; i++)
				{
					if(freq[i]!=0&&freq[i]<=v)
					{
						v=freq[i];
						c1=i;
					}
				}

				// Find the next smallest nonzero frequency, set c2 = its symbol
				// In case of ties, take the larger symbol number
				int c2=-1;
				v=1000000000;
				for(int i=0; i<=256; i++)
				{
					if(freq[i]!=0&&freq[i]<=v&&i!=c1)
					{
						v=freq[i];
						c2=i;
					}
				}

				// Done if we've merged everything into one frequency
				if(c2<0) break;

				// Else merge the two counts/trees
				freq[c1]+=freq[c2];
				freq[c2]=0;

				// Increment the codesize of everything in c1's tree branch
				codesize[c1]++;
				while(others[c1]>=0)
				{
					c1=others[c1];
					codesize[c1]++;
				}

				others[c1]=c2;		// chain c2 onto c1's tree branch

				// Increment the codesize of everything in c2's tree branch
				codesize[c2]++;
				while(others[c2]>=0)
				{
					c2=others[c2];
					codesize[c2]++;
				}
			}

			// Now count the number of symbols of each code length
			for(int i=0; i<=256; i++)
			{
				if(codesize[i]!=0)
				{
					// The JPEG standard seems to think that this can't happen,
					// but I'm paranoid...
					if(codesize[i]>MAX_CLEN) ERREXIT(cinfo, J_MESSAGE_CODE.JERR_HUFF_CLEN_OVERFLOW);

					bits[codesize[i]]++;
				}
			}

			// JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
			// Huffman procedure assigned any such lengths, we must adjust the coding.
			// Here is what the JPEG spec says about how this next bit works:
			// Since symbols are paired for the longest Huffman code, the symbols are
			// removed from this length category two at a time. The prefix for the pair
			// (which is one bit shorter) is allocated to one of the pair; then,
			// skipping the BITS entry for that prefix length, a code word from the next
			// shortest nonzero BITS entry is converted into a prefix for two code words
			// one bit longer.
			int k;
			for(k=MAX_CLEN; k>16; k--)
			{
				while(bits[k]>0)
				{
					int j=k-2;		// find length of new prefix to be used
					while(bits[j]==0) j--;

					bits[k]-=2;		// remove two symbols
					bits[k-1]++;	// one goes in this length
					bits[j+1]+=2;	// two new symbols in this length
					bits[j]--;		// symbol of this length is now a prefix
				}
			}

			// Remove the count for the pseudo-symbol 256 from the largest codelength
			while(bits[k]==0) k--;	// find largest codelength still in use
			bits[k]--;

			// Return final symbol counts (only for lengths 0..16)
			Array.Copy(bits, htbl.bits, 17);

			// Return a list of the symbols sorted by code length
			// It's not real clear to me why we don't need to consider the codelength
			// changes made above, but the JPEG spec seems to think this works.
			for(int i=1, p=0; i<=MAX_CLEN; i++)
			{
				for(int j=0; j<=255; j++)
				{
					if(codesize[j]==i)
					{
						htbl.huffval[p]=(byte)j;
						p++;
					}
				}
			}

			// Set sent_table false so updated table will be written to JPEG file.
			htbl.sent_table=false;
		}
Esempio n. 3
0
        // Expand a Huffman table definition into the derived format
        // Compute the derived values for a Huffman table.
        // This routine also performs some validation checks on the table.
        static void jpeg_make_c_derived_tbl(jpeg_compress cinfo, bool isDC, int tblno, ref c_derived_tbl pdtbl)
        {
            // Note that huffsize[] and huffcode[] are filled in code-length order,
            // paralleling the order of the symbols themselves in htbl.huffval[].

            // Find the input Huffman table
            if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
            {
                ERREXIT1(cinfo, J_MESSAGE_CODE.JERR_NO_HUFF_TABLE, tblno);
            }
            JHUFF_TBL htbl = isDC?cinfo.dc_huff_tbl_ptrs[tblno]:cinfo.ac_huff_tbl_ptrs[tblno];

            if (htbl == null)
            {
                ERREXIT1(cinfo, J_MESSAGE_CODE.JERR_NO_HUFF_TABLE, tblno);
            }

            // Allocate a workspace if we haven't already done so.
            if (pdtbl == null)
            {
                try
                {
                    pdtbl = new c_derived_tbl();
                }
                catch
                {
                    ERREXIT1(cinfo, J_MESSAGE_CODE.JERR_OUT_OF_MEMORY, 4);
                }
            }

            c_derived_tbl dtbl = pdtbl;

            // Figure C.1: make table of Huffman code length for each symbol
            byte[] huffsize = new byte[257];
            int    p        = 0;

            for (byte l = 1; l <= 16; l++)
            {
                int i = htbl.bits[l];
                // protect against table overrun
                if (i < 0 || (p + i) > 256)
                {
                    ERREXIT(cinfo, J_MESSAGE_CODE.JERR_BAD_HUFF_TABLE);
                }
                while ((i--) != 0)
                {
                    huffsize[p++] = l;
                }
            }
            huffsize[p] = 0;
            int lastp = p;

            // Figure C.2: generate the codes themselves
            // We also validate that the counts represent a legal Huffman code tree.
            uint[] huffcode = new uint[257];
            uint   code     = 0;
            int    si       = huffsize[0];

            p = 0;
            while (huffsize[p] != 0)
            {
                while (((int)huffsize[p]) == si)
                {
                    huffcode[p++] = code;
                    code++;
                }
                // code is now 1 more than the last code used for codelength si; but
                // it must still fit in si bits, since no code is allowed to be all ones.
                if (((int)code) >= (1 << si))
                {
                    ERREXIT(cinfo, J_MESSAGE_CODE.JERR_BAD_HUFF_TABLE);
                }
                code <<= 1;
                si++;
            }

            // Figure C.3: generate encoding tables
            // These are code and size indexed by symbol value

            // Set all codeless symbols to have code length 0;
            // this lets us detect duplicate VAL entries here, and later
            // allows emit_bits to detect any attempt to emit such symbols.
            for (int i = 0; i < 256; i++)
            {
                dtbl.ehufsi[i] = 0;
            }

            // This is also a convenient place to check for out-of-range
            // and duplicated VAL entries. We allow 0..255 for AC symbols
            // but only 0..16 for DC. (We could constrain them further
            // based on data depth and mode, but this seems enough.)
            int maxsymbol = isDC?16:255;

            for (p = 0; p < lastp; p++)
            {
                int i = htbl.huffval[p];
                if (i < 0 || i > maxsymbol || dtbl.ehufsi[i] != 0)
                {
                    ERREXIT(cinfo, J_MESSAGE_CODE.JERR_BAD_HUFF_TABLE);
                }
                dtbl.ehufco[i] = huffcode[p];
                dtbl.ehufsi[i] = huffsize[p];
            }
        }
Esempio n. 4
0
        // Generate an optimal table definition given the specified counts
        // Generate the best Huffman code table for the given counts, fill htbl.

        // The JPEG standard requires that no symbol be assigned a codeword of all
        // one bits (so that padding bits added at the end of a compressed segment
        // can't look like a valid code). Because of the canonical ordering of
        // codewords, this just means that there must be an unused slot in the
        // longest codeword length category. Section K.2 of the JPEG spec suggests
        // reserving such a slot by pretending that symbol 256 is a valid symbol
        // with count 1. In theory that's not optimal; giving it count zero but
        // including it in the symbol set anyway should give a better Huffman code.
        // But the theoretically better code actually seems to come out worse in
        // practice, because it produces more all-ones bytes (which incur stuffed
        // zero bytes in the final file). In any case the difference is tiny.

        // The JPEG standard requires Huffman codes to be no more than 16 bits long.
        // If some symbols have a very small but nonzero probability, the Huffman tree
        // must be adjusted to meet the code length restriction. We currently use
        // the adjustment method suggested in JPEG section K.2. This method is *not*
        // optimal; it may not choose the best possible limited-length code. But
        // typically only very-low-frequency symbols will be given less-than-optimal
        // lengths, so the code is almost optimal. Experimental comparisons against
        // an optimal limited-length-code algorithm indicate that the difference is
        // microscopic --- usually less than a hundredth of a percent of total size.
        // So the extra complexity of an optimal algorithm doesn't seem worthwhile.
        static void jpeg_gen_optimal_table(jpeg_compress cinfo, JHUFF_TBL htbl, int[] freq)
        {
            int MAX_CLEN = 32;                                  // assumed maximum initial code length

            byte[] bits     = new byte[MAX_CLEN + 1];           // bits[k] = # of symbols with code length k
            int[]  codesize = new int[257];                     // codesize[k] = code length of symbol k
            int[]  others   = new int[257];                     // next symbol in current branch of tree

            // This algorithm is explained in section K.2 of the JPEG standard
            for (int i = 0; i < 257; i++)
            {
                others[i] = -1;                                 // init links to empty
            }
            freq[256] = 1;                                      // make sure 256 has a nonzero count
            // Including the pseudo-symbol 256 in the Huffman procedure guarantees
            // that no real symbol is given code-value of all ones, because 256
            // will be placed last in the largest codeword category.

            // Huffman's basic algorithm to assign optimal code lengths to symbols

            for (; ;)
            {
                // Find the smallest nonzero frequency, set c1 = its symbol
                // In case of ties, take the larger symbol number
                int c1 = -1;
                int v  = 1000000000;
                for (int i = 0; i <= 256; i++)
                {
                    if (freq[i] != 0 && freq[i] <= v)
                    {
                        v  = freq[i];
                        c1 = i;
                    }
                }

                // Find the next smallest nonzero frequency, set c2 = its symbol
                // In case of ties, take the larger symbol number
                int c2 = -1;
                v = 1000000000;
                for (int i = 0; i <= 256; i++)
                {
                    if (freq[i] != 0 && freq[i] <= v && i != c1)
                    {
                        v  = freq[i];
                        c2 = i;
                    }
                }

                // Done if we've merged everything into one frequency
                if (c2 < 0)
                {
                    break;
                }

                // Else merge the two counts/trees
                freq[c1] += freq[c2];
                freq[c2]  = 0;

                // Increment the codesize of everything in c1's tree branch
                codesize[c1]++;
                while (others[c1] >= 0)
                {
                    c1 = others[c1];
                    codesize[c1]++;
                }

                others[c1] = c2;                        // chain c2 onto c1's tree branch

                // Increment the codesize of everything in c2's tree branch
                codesize[c2]++;
                while (others[c2] >= 0)
                {
                    c2 = others[c2];
                    codesize[c2]++;
                }
            }

            // Now count the number of symbols of each code length
            for (int i = 0; i <= 256; i++)
            {
                if (codesize[i] != 0)
                {
                    // The JPEG standard seems to think that this can't happen,
                    // but I'm paranoid...
                    if (codesize[i] > MAX_CLEN)
                    {
                        ERREXIT(cinfo, J_MESSAGE_CODE.JERR_HUFF_CLEN_OVERFLOW);
                    }

                    bits[codesize[i]]++;
                }
            }

            // JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
            // Huffman procedure assigned any such lengths, we must adjust the coding.
            // Here is what the JPEG spec says about how this next bit works:
            // Since symbols are paired for the longest Huffman code, the symbols are
            // removed from this length category two at a time. The prefix for the pair
            // (which is one bit shorter) is allocated to one of the pair; then,
            // skipping the BITS entry for that prefix length, a code word from the next
            // shortest nonzero BITS entry is converted into a prefix for two code words
            // one bit longer.
            int k;

            for (k = MAX_CLEN; k > 16; k--)
            {
                while (bits[k] > 0)
                {
                    int j = k - 2;                              // find length of new prefix to be used
                    while (bits[j] == 0)
                    {
                        j--;
                    }

                    bits[k] -= 2;                       // remove two symbols
                    bits[k - 1]++;                      // one goes in this length
                    bits[j + 1] += 2;                   // two new symbols in this length
                    bits[j]--;                          // symbol of this length is now a prefix
                }
            }

            // Remove the count for the pseudo-symbol 256 from the largest codelength
            while (bits[k] == 0)
            {
                k--;                            // find largest codelength still in use
            }
            bits[k]--;

            // Return final symbol counts (only for lengths 0..16)
            Array.Copy(bits, htbl.bits, 17);

            // Return a list of the symbols sorted by code length
            // It's not real clear to me why we don't need to consider the codelength
            // changes made above, but the JPEG spec seems to think this works.
            for (int i = 1, p = 0; i <= MAX_CLEN; i++)
            {
                for (int j = 0; j <= 255; j++)
                {
                    if (codesize[j] == i)
                    {
                        htbl.huffval[p] = (byte)j;
                        p++;
                    }
                }
            }

            // Set sent_table false so updated table will be written to JPEG file.
            htbl.sent_table = false;
        }
Esempio n. 5
0
		// Huffman table setup routines

		// Define a Huffman table
		static void add_huff_table(jpeg_compress cinfo, ref JHUFF_TBL htblptr, byte[] bits, byte[] val)
		{
			if(htblptr==null) htblptr=jpeg_alloc_huff_table(cinfo);

			// Copy the number-of-symbols-of-each-code-length counts
			Array.Copy(bits, htblptr.bits, htblptr.bits.Length);

			// Validate the counts. We do this here mainly so we can copy the right
			// number of symbols from the val[] array, without risking marching off
			// the end of memory. jchuff.cs will do a more thorough test later.
			int nsymbols=0;
			for(int len=1; len<=16; len++) nsymbols+=bits[len];
			if(nsymbols<1||nsymbols>256) ERREXIT(cinfo, J_MESSAGE_CODE.JERR_BAD_HUFF_TABLE);

			Array.Copy(val, htblptr.huffval, nsymbols);

			// Initialize sent_table false so table will be written to JPEG file.
			htblptr.sent_table=false;
		}