示例#1
0
        static void Main(string[] args)
        {
            Console.WriteLine("select mode: encode or decode");
            string encodetmp = Console.ReadLine().ToString().ToLower();

            while (!(encodetmp.Equals("encode") || encodetmp.Equals("decode")))
            {
                Console.WriteLine("select mode: encode or decode");
                encodetmp = Console.ReadLine().ToString().ToLower();
            }
            Console.WriteLine();
            string path = Environment.CurrentDirectory + "\\";

            if (encodetmp.Equals("encode"))
            {
                //clean up old encoded files (temporary code)
                foreach (var file in new DirectoryInfo(path).GetFiles("*.*").Where(file => file.FullName.ToLower().EndsWith(".bofsd") || file.FullName.ToLower().EndsWith(".ofsd") || file.FullName.ToLower().EndsWith(".ofsr") || file.FullName.ToLower().EndsWith(".dcer")).ToArray())
                {
                    file.Delete();
                }

                // encode
                UInt64 bitcount1 = 0;
                UInt64 bitcount2 = 0;
                //get rid of spaces in filenames
                foreach (var file in new DirectoryInfo(path).GetFiles("*.*").Where(file => file.FullName.ToLower().EndsWith(".txt") || file.FullName.ToLower().EndsWith(".html")).ToArray())
                {
                    try
                    {
                        File.Copy(file.FullName, Path.Combine(path, file.Name.Replace(" ", "_")));
                        file.Delete();
                    }
                    catch (Exception) { }
                }
                // calculate the base values for the master offset record and write it to disk
                string[]      filepaths    = Directory.GetFiles(path, "*.*", SearchOption.TopDirectoryOnly).Where(file => file.ToLower().EndsWith(".txt") || file.ToLower().EndsWith(".html")).ToArray();
                List <byte[]> filecontents = new List <byte[]>();               // contents of text files stored in this list
                foreach (string filepath in filepaths)
                {
                    filecontents.Add(File.ReadAllBytes(filepath));
                    bitcount1 += (UInt64)filecontents[filecontents.Count - 1].Length * 8;
                }

                // create dynamic character encoding record
                UInt64[] charcounts = new UInt64[256];
                for (int q = 0; q < filecontents.Count; q++)
                {
                    int c = filecontents[q].Length;
                    for (int i = 0; i < c; i++)
                    {
                        charcounts[Encoding.UTF8.GetBytes("" + (char)filecontents[q][i])[0]]++;
                    }
                }
                UInt64[] charcountssorted = new UInt64[256]; charcounts.CopyTo(charcountssorted, 0); Array.Sort(charcountssorted); Array.Reverse(charcountssorted);
                int      stopindex        = Array.IndexOf(charcountssorted, (UInt64)0);
                for (int f = 0; f < stopindex; f++)
                {
                    int tmpindex = Array.IndexOf(charcounts, charcountssorted[f]);
                    charorders[f]        = (byte)tmpindex;
                    charcounts[tmpindex] = 0;
                }
                using (var sw = new FileStream(path + "master.dcer", FileMode.Create, FileAccess.Write))
                {
                    for (int i = 0; i < stopindex; i++)
                    {
                        bitcount2 += 8;
                        sw.WriteByte(charorders[i]);
                    }
                }
                for (int c = 0; c < filecontents.Count; c++)
                {
                    for (int bindex = 0; bindex < filecontents[c].Length; bindex++)
                    {
                        filecontents[c][bindex] = DynamicEncodeByte(filecontents[c][bindex]);
                    }
                }

                // create master offset record
                int maxl = 0;
                foreach (byte[] s in filecontents)
                {
                    maxl = maxl < s.Length ? s.Length : maxl;
                }
                UInt32[] avtemp      = new UInt32[maxl];
                int[]    filelengths = new int[filecontents.Count];
                for (int q = 0; q < filecontents.Count; q++)
                {
                    int c = filecontents[q].Length;
                    for (int i = 0; i < c; i++)
                    {
                        avtemp[i] += filecontents[q][i];
                    }
                    filelengths[q] = c;
                }
                for (int i = 0; i < maxl; i++)
                {
                    int overlapcount = 0;
                    foreach (int g in filelengths)
                    {
                        if (g > i)
                        {
                            overlapcount++;
                        }
                    }
                    avtemp[i] /= (UInt32)overlapcount;
                }
                using (var sw = new FileStream(path + "master.ofsr", FileMode.Create, FileAccess.Write))
                {
                    foreach (byte b in avtemp)
                    {
                        bitcount2 += 8;
                        sw.WriteByte(b);
                    }
                }

                int biggestlength       = filecontents.Aggregate(new byte[] { }, (max, cur) => max.Length > cur.Length ? max : cur).Length;
                int secondbiggestlength = filecontents.Where(x => x.Length < biggestlength).Aggregate(new byte[] { }, (max, cur) => max.Length > cur.Length ? max : cur).Length;

                // now compress the text files from earlier and write the results to disk
                int index = -1;
                foreach (string filepath in filepaths)
                {
                    string ext = ".ofsd";
                    index++;
                    if (filecontents[index].Length == biggestlength)
                    {
                        ext = ".bofsd";
                    }
                    using (var sw = new FileStream(filepath + ext, FileMode.Create, FileAccess.Write))
                    {
                        /*
                         * metadata structure:
                         *  1 bit for compression state (0: not compressed / 1: compressed)
                         *  3 bits for word skip (skip X words from end of file)
                         *  3 bits for word length (length of a word: X + 1)
                         *  N bits for number of bad words (N: ceiling(log2((total file length(bits)/(word length))))
                         *  a sequence of N Z length bad word indexes (N: number of bad words; Z: ceiling(log2((total file length(bits)/(word length))))
                         */

                        // 0 length files should be left empty
                        if (filecontents[index].Length < 1)
                        {
                            continue;
                        }

                        // populate a list with offset values(calculated against the master offset record) for the current file
                        List <int> offsets = new List <int>();
                        for (int i = 0; i < filecontents[index].Length; i++)
                        {
                            offsets.Add(filecontents[index][i] - (int)avtemp[i]);
                        }

                        // now find the optimal number of bits to constitute a word
                        int   start     = 3;
                        int[] datasizes = new int[8 - start];
                        for (; start < 8; start++)
                        {
                            int datasize = 0;
                            for (int i = 0; i < offsets.Count; i++)
                            {
                                if ((int)Math.Ceiling(Math.Log(Math.Abs(offsets[i]) + 1, 2)) + 1 > start)
                                {
                                    datasize += (int)Math.Ceiling(Math.Log((offsets.Count * start), 2)) + 8;                                     // crude estimate of the number of bits it would take to represent a single bad word
                                }
                                else
                                {
                                    datasize += start;
                                }
                            }
                            datasizes[start - (8 - datasizes.Length)] = datasize;
                        }
                        int finalbits        = 0;
                        int smallestdatasize = -1;
                        for (int i = 0; i < datasizes.Length; i++)
                        {
                            if (smallestdatasize > datasizes[i] || smallestdatasize == -1)
                            {
                                smallestdatasize = datasizes[i];
                                finalbits        = i + (8 - datasizes.Length);
                            }
                        }                                                  // optimal number of bits now stored in finalbits

                        CustomBitArray outbitarray = new CustomBitArray(); // prepare bitarray for output
                        outbitarray.AddBitsFromInt(finalbits, 3);          // add word length metadata to outbitarray

                        // add offset data to outbitarray
                        List <int> badwordindexes = new List <int>();
                        for (int i = 0; i < offsets.Count; i++)
                        {
                            if (i > secondbiggestlength)
                            {
                                break;
                            }
                            if (offsets[i] == 0 || (int)Math.Ceiling(Math.Log(Math.Abs(offsets[i]) + 1, 2)) + 1 <= finalbits)                         //handle good words
                            {
                                if (offsets[i] < 0)
                                {
                                    outbitarray.bits.Add(true);
                                }
                                else
                                {
                                    outbitarray.bits.Add(false);
                                }
                                foreach (byte c in Convert.ToString(Math.Abs(offsets[i]), 2).PadLeft(8, '0').Substring(8 - (finalbits - 1)))
                                {
                                    if (c == '1')
                                    {
                                        outbitarray.bits.Add(true);
                                    }
                                    else
                                    {
                                        outbitarray.bits.Add(false);
                                    }
                                }
                            }
                            else                             // handle bad words
                            {
                                badwordindexes.Add(i);       // badwordindexes will be populated with... bad word indexes
                                foreach (char c in Convert.ToString(((int)avtemp[i] + offsets[i]), 2).PadLeft(8, '0'))
                                {
                                    if (c == '1')
                                    {
                                        outbitarray.bits.Add(true);
                                    }
                                    else
                                    {
                                        outbitarray.bits.Add(false);
                                    }
                                }
                            }
                        }

                        // temporary metadata filler
                        outbitarray.bits.Insert(0, false);
                        outbitarray.bits.Insert(0, false);
                        outbitarray.bits.Insert(0, false);
                        outbitarray.bits.Insert(0, true);

                        // calculate bad word count and bad word index bit lengths
                        bool stable  = false;
                        int  tempvar = 0;
                        while (!stable)
                        {
                            int lon = (int)Math.Ceiling(Math.Log((((outbitarray.bits.Count + tempvar) + (8 - ((outbitarray.bits.Count + tempvar) % 8))) / finalbits), 2));
                            tempvar = lon * (1 + badwordindexes.Count);
                            if ((int)Math.Ceiling(Math.Log((((outbitarray.bits.Count + tempvar) + (8 - ((outbitarray.bits.Count + tempvar) % 8))) / finalbits), 2)) == lon)
                            {
                                tempvar = lon;
                                stable  = true;
                            }
                        }

                        for (int i = badwordindexes.Count - 1; i >= 0; i--)
                        {
                            outbitarray.InsertBitsFromStr(Convert.ToString(badwordindexes[i], 2).PadLeft(tempvar, '0'), tempvar, 7);
                        }
                        outbitarray.InsertBitsFromStr(Convert.ToString(badwordindexes.Count, 2).PadLeft(tempvar, '0'), tempvar, 7);
                        int ce = 0;
                        while (outbitarray.bits.Count % 8 != 0)
                        {
                            ce++;
                            outbitarray.bits.Add(false);
                        }
                        outbitarray.bits.RemoveAt(1);
                        outbitarray.bits.RemoveAt(1);
                        outbitarray.bits.RemoveAt(1);
                        outbitarray.InsertBitsFromStr(Convert.ToString(ce / finalbits, 2).PadLeft(3, '0'), 3, 1);

                        if (outbitarray.bits.Count > ((filecontents[index].Length * 8) + 8))
                        {
                            bitcount2 += (UInt64)((filecontents[index].Length * 8) + 8);
                            sw.WriteByte(0);
                            foreach (byte b in filecontents[index])
                            {
                                sw.WriteByte(b);
                            }
                            continue;
                        }
                        foreach (byte b in ConvertBoolArrayToByteArray(outbitarray.bits.ToArray()))
                        {
                            bitcount2 += 8;
                            sw.WriteByte(b);
                        }
                    }
                }
                foreach (string s in filepaths)
                {
                    //File.Delete(s);
                }
                Console.WriteLine("total original bit count: " + bitcount1);
                Console.WriteLine("total compressed bit count: " + bitcount2);
                Console.WriteLine("compression ratio: " + (int)((double)bitcount2 / bitcount1 * 100) + "%");
                Console.ReadKey();
            }
            else
            {
                // decode
                string[]      filepaths2    = Directory.GetFiles(path, "*.*", SearchOption.TopDirectoryOnly).Where(file => file.ToLower().EndsWith(".ofsd") || file.ToLower().EndsWith(".bofsd")).ToArray();
                List <byte[]> filecontents2 = new List <byte[]>();
                foreach (string filepath in filepaths2)
                {
                    filecontents2.Add(File.ReadAllBytes(filepath));
                }
                byte[] masteroffsetrecord = File.ReadAllBytes(path + "master.ofsr");
                charorders = File.ReadAllBytes(path + "master.dcer");
                int intex = -1;
                if (filecontents2.Count < 1)
                {
                    Console.WriteLine("nothing to display");
                }
                foreach (byte[] str in filecontents2)
                {
                    intex++;
                    if (str.Length < 1)
                    {
                        continue;
                    }
                    string bitstring = "";
                    foreach (char c in str)
                    {
                        bitstring += Convert.ToString(c, 2).PadLeft(8, '0');
                    }

                    if (bitstring[0] == '0')
                    {
                        Console.Write("file contents(" + filepaths2[intex] + "): ");
                        Console.WriteLine(Encoding.UTF8.GetString(str, 0, str.Length).Substring(1) + "\n");
                        continue;
                    }
                    int        wordlength       = Convert.ToInt32(bitstring.Substring(4, 3).PadLeft(32, '0'), 2);
                    int        nlength          = (int)Math.Ceiling(Math.Log((((bitstring.Length) + (8 - ((bitstring.Length) % 8))) / wordlength), 2));
                    int        badwordcount     = Convert.ToInt32(bitstring.Substring(7, nlength).PadLeft(32, '0'), 2);
                    int        ignorewordscount = Convert.ToInt32(bitstring.Substring(1, 3).PadLeft(32, '0'), 2);
                    List <int> badwordindexes   = new List <int>();
                    for (int i = 7 + nlength; i < 7 + nlength + (nlength * badwordcount); i++)
                    {
                        badwordindexes.Add(Convert.ToInt32(bitstring.Substring(i, nlength).PadLeft(32, '0'), 2));
                        i += nlength - 1;
                    }
                    int nextbadwordindex = -1;
                    if (badwordindexes.Count > 0)
                    {
                        badwordindexes.Sort();
                        nextbadwordindex = badwordindexes[0];
                    }
                    int    localindex = 0;
                    string bitstring2 = "";

                    Console.Write("file contents(" + filepaths2[intex] + "): ");
                    for (int i = 7 + nlength + (nlength * badwordcount); i < bitstring.Length - (ignorewordscount * wordlength); i++)
                    {
                        bitstring2 += bitstring[i];
                        if (bitstring2.Length == wordlength && nextbadwordindex != localindex && localindex < masteroffsetrecord.Length)
                        {
                            if (bitstring2[0] == '0')
                            {
                                Console.Write((char)ReverseDynamicEncodeByte((byte)((int)masteroffsetrecord[localindex] + (int)ConvertToChar(bitstring2.Substring(1).PadLeft(8, '0')))));
                            }
                            else
                            {
                                Console.Write((char)ReverseDynamicEncodeByte((byte)((int)masteroffsetrecord[localindex] - (int)ConvertToChar(bitstring2.Substring(1).PadLeft(8, '0')))));
                            }
                            bitstring2 = "";
                            localindex++;
                        }
                        if (bitstring2.Length == 8 && nextbadwordindex == localindex)
                        {
                            Console.Write((char)ReverseDynamicEncodeByte((byte)ConvertToChar(bitstring2)));
                            badwordindexes.RemoveAt(0);
                            if (badwordindexes.Count > 0)
                            {
                                nextbadwordindex = badwordindexes[0];
                            }
                            bitstring2 = "";
                            localindex++;
                        }
                    }
                    if (filepaths2[intex].EndsWith(".bofsd"))
                    {
                        for (int z = localindex; z < masteroffsetrecord.Length; z++)
                        {
                            Console.Write((char)ReverseDynamicEncodeByte(masteroffsetrecord[z]));
                        }
                    }
                    Console.WriteLine("\n");
                }
                Console.ReadKey();
            }
        }