static void Main(string[] args) { Console.WriteLine("select mode: encode or decode"); string encodetmp = Console.ReadLine().ToString().ToLower(); while (!(encodetmp.Equals("encode") || encodetmp.Equals("decode"))) { Console.WriteLine("select mode: encode or decode"); encodetmp = Console.ReadLine().ToString().ToLower(); } Console.WriteLine(); string path = Environment.CurrentDirectory + "\\"; if (encodetmp.Equals("encode")) { //clean up old encoded files (temporary code) foreach (var file in new DirectoryInfo(path).GetFiles("*.*").Where(file => file.FullName.ToLower().EndsWith(".bofsd") || file.FullName.ToLower().EndsWith(".ofsd") || file.FullName.ToLower().EndsWith(".ofsr") || file.FullName.ToLower().EndsWith(".dcer")).ToArray()) { file.Delete(); } // encode UInt64 bitcount1 = 0; UInt64 bitcount2 = 0; //get rid of spaces in filenames foreach (var file in new DirectoryInfo(path).GetFiles("*.*").Where(file => file.FullName.ToLower().EndsWith(".txt") || file.FullName.ToLower().EndsWith(".html")).ToArray()) { try { File.Copy(file.FullName, Path.Combine(path, file.Name.Replace(" ", "_"))); file.Delete(); } catch (Exception) { } } // calculate the base values for the master offset record and write it to disk string[] filepaths = Directory.GetFiles(path, "*.*", SearchOption.TopDirectoryOnly).Where(file => file.ToLower().EndsWith(".txt") || file.ToLower().EndsWith(".html")).ToArray(); List <byte[]> filecontents = new List <byte[]>(); // contents of text files stored in this list foreach (string filepath in filepaths) { filecontents.Add(File.ReadAllBytes(filepath)); bitcount1 += (UInt64)filecontents[filecontents.Count - 1].Length * 8; } // create dynamic character encoding record UInt64[] charcounts = new UInt64[256]; for (int q = 0; q < filecontents.Count; q++) { int c = filecontents[q].Length; for (int i = 0; i < c; i++) { charcounts[Encoding.UTF8.GetBytes("" + (char)filecontents[q][i])[0]]++; } } UInt64[] charcountssorted = new UInt64[256]; charcounts.CopyTo(charcountssorted, 0); Array.Sort(charcountssorted); Array.Reverse(charcountssorted); int stopindex = Array.IndexOf(charcountssorted, (UInt64)0); for (int f = 0; f < stopindex; f++) { int tmpindex = Array.IndexOf(charcounts, charcountssorted[f]); charorders[f] = (byte)tmpindex; charcounts[tmpindex] = 0; } using (var sw = new FileStream(path + "master.dcer", FileMode.Create, FileAccess.Write)) { for (int i = 0; i < stopindex; i++) { bitcount2 += 8; sw.WriteByte(charorders[i]); } } for (int c = 0; c < filecontents.Count; c++) { for (int bindex = 0; bindex < filecontents[c].Length; bindex++) { filecontents[c][bindex] = DynamicEncodeByte(filecontents[c][bindex]); } } // create master offset record int maxl = 0; foreach (byte[] s in filecontents) { maxl = maxl < s.Length ? s.Length : maxl; } UInt32[] avtemp = new UInt32[maxl]; int[] filelengths = new int[filecontents.Count]; for (int q = 0; q < filecontents.Count; q++) { int c = filecontents[q].Length; for (int i = 0; i < c; i++) { avtemp[i] += filecontents[q][i]; } filelengths[q] = c; } for (int i = 0; i < maxl; i++) { int overlapcount = 0; foreach (int g in filelengths) { if (g > i) { overlapcount++; } } avtemp[i] /= (UInt32)overlapcount; } using (var sw = new FileStream(path + "master.ofsr", FileMode.Create, FileAccess.Write)) { foreach (byte b in avtemp) { bitcount2 += 8; sw.WriteByte(b); } } int biggestlength = filecontents.Aggregate(new byte[] { }, (max, cur) => max.Length > cur.Length ? max : cur).Length; int secondbiggestlength = filecontents.Where(x => x.Length < biggestlength).Aggregate(new byte[] { }, (max, cur) => max.Length > cur.Length ? max : cur).Length; // now compress the text files from earlier and write the results to disk int index = -1; foreach (string filepath in filepaths) { string ext = ".ofsd"; index++; if (filecontents[index].Length == biggestlength) { ext = ".bofsd"; } using (var sw = new FileStream(filepath + ext, FileMode.Create, FileAccess.Write)) { /* * metadata structure: * 1 bit for compression state (0: not compressed / 1: compressed) * 3 bits for word skip (skip X words from end of file) * 3 bits for word length (length of a word: X + 1) * N bits for number of bad words (N: ceiling(log2((total file length(bits)/(word length)))) * a sequence of N Z length bad word indexes (N: number of bad words; Z: ceiling(log2((total file length(bits)/(word length)))) */ // 0 length files should be left empty if (filecontents[index].Length < 1) { continue; } // populate a list with offset values(calculated against the master offset record) for the current file List <int> offsets = new List <int>(); for (int i = 0; i < filecontents[index].Length; i++) { offsets.Add(filecontents[index][i] - (int)avtemp[i]); } // now find the optimal number of bits to constitute a word int start = 3; int[] datasizes = new int[8 - start]; for (; start < 8; start++) { int datasize = 0; for (int i = 0; i < offsets.Count; i++) { if ((int)Math.Ceiling(Math.Log(Math.Abs(offsets[i]) + 1, 2)) + 1 > start) { datasize += (int)Math.Ceiling(Math.Log((offsets.Count * start), 2)) + 8; // crude estimate of the number of bits it would take to represent a single bad word } else { datasize += start; } } datasizes[start - (8 - datasizes.Length)] = datasize; } int finalbits = 0; int smallestdatasize = -1; for (int i = 0; i < datasizes.Length; i++) { if (smallestdatasize > datasizes[i] || smallestdatasize == -1) { smallestdatasize = datasizes[i]; finalbits = i + (8 - datasizes.Length); } } // optimal number of bits now stored in finalbits CustomBitArray outbitarray = new CustomBitArray(); // prepare bitarray for output outbitarray.AddBitsFromInt(finalbits, 3); // add word length metadata to outbitarray // add offset data to outbitarray List <int> badwordindexes = new List <int>(); for (int i = 0; i < offsets.Count; i++) { if (i > secondbiggestlength) { break; } if (offsets[i] == 0 || (int)Math.Ceiling(Math.Log(Math.Abs(offsets[i]) + 1, 2)) + 1 <= finalbits) //handle good words { if (offsets[i] < 0) { outbitarray.bits.Add(true); } else { outbitarray.bits.Add(false); } foreach (byte c in Convert.ToString(Math.Abs(offsets[i]), 2).PadLeft(8, '0').Substring(8 - (finalbits - 1))) { if (c == '1') { outbitarray.bits.Add(true); } else { outbitarray.bits.Add(false); } } } else // handle bad words { badwordindexes.Add(i); // badwordindexes will be populated with... bad word indexes foreach (char c in Convert.ToString(((int)avtemp[i] + offsets[i]), 2).PadLeft(8, '0')) { if (c == '1') { outbitarray.bits.Add(true); } else { outbitarray.bits.Add(false); } } } } // temporary metadata filler outbitarray.bits.Insert(0, false); outbitarray.bits.Insert(0, false); outbitarray.bits.Insert(0, false); outbitarray.bits.Insert(0, true); // calculate bad word count and bad word index bit lengths bool stable = false; int tempvar = 0; while (!stable) { int lon = (int)Math.Ceiling(Math.Log((((outbitarray.bits.Count + tempvar) + (8 - ((outbitarray.bits.Count + tempvar) % 8))) / finalbits), 2)); tempvar = lon * (1 + badwordindexes.Count); if ((int)Math.Ceiling(Math.Log((((outbitarray.bits.Count + tempvar) + (8 - ((outbitarray.bits.Count + tempvar) % 8))) / finalbits), 2)) == lon) { tempvar = lon; stable = true; } } for (int i = badwordindexes.Count - 1; i >= 0; i--) { outbitarray.InsertBitsFromStr(Convert.ToString(badwordindexes[i], 2).PadLeft(tempvar, '0'), tempvar, 7); } outbitarray.InsertBitsFromStr(Convert.ToString(badwordindexes.Count, 2).PadLeft(tempvar, '0'), tempvar, 7); int ce = 0; while (outbitarray.bits.Count % 8 != 0) { ce++; outbitarray.bits.Add(false); } outbitarray.bits.RemoveAt(1); outbitarray.bits.RemoveAt(1); outbitarray.bits.RemoveAt(1); outbitarray.InsertBitsFromStr(Convert.ToString(ce / finalbits, 2).PadLeft(3, '0'), 3, 1); if (outbitarray.bits.Count > ((filecontents[index].Length * 8) + 8)) { bitcount2 += (UInt64)((filecontents[index].Length * 8) + 8); sw.WriteByte(0); foreach (byte b in filecontents[index]) { sw.WriteByte(b); } continue; } foreach (byte b in ConvertBoolArrayToByteArray(outbitarray.bits.ToArray())) { bitcount2 += 8; sw.WriteByte(b); } } } foreach (string s in filepaths) { //File.Delete(s); } Console.WriteLine("total original bit count: " + bitcount1); Console.WriteLine("total compressed bit count: " + bitcount2); Console.WriteLine("compression ratio: " + (int)((double)bitcount2 / bitcount1 * 100) + "%"); Console.ReadKey(); } else { // decode string[] filepaths2 = Directory.GetFiles(path, "*.*", SearchOption.TopDirectoryOnly).Where(file => file.ToLower().EndsWith(".ofsd") || file.ToLower().EndsWith(".bofsd")).ToArray(); List <byte[]> filecontents2 = new List <byte[]>(); foreach (string filepath in filepaths2) { filecontents2.Add(File.ReadAllBytes(filepath)); } byte[] masteroffsetrecord = File.ReadAllBytes(path + "master.ofsr"); charorders = File.ReadAllBytes(path + "master.dcer"); int intex = -1; if (filecontents2.Count < 1) { Console.WriteLine("nothing to display"); } foreach (byte[] str in filecontents2) { intex++; if (str.Length < 1) { continue; } string bitstring = ""; foreach (char c in str) { bitstring += Convert.ToString(c, 2).PadLeft(8, '0'); } if (bitstring[0] == '0') { Console.Write("file contents(" + filepaths2[intex] + "): "); Console.WriteLine(Encoding.UTF8.GetString(str, 0, str.Length).Substring(1) + "\n"); continue; } int wordlength = Convert.ToInt32(bitstring.Substring(4, 3).PadLeft(32, '0'), 2); int nlength = (int)Math.Ceiling(Math.Log((((bitstring.Length) + (8 - ((bitstring.Length) % 8))) / wordlength), 2)); int badwordcount = Convert.ToInt32(bitstring.Substring(7, nlength).PadLeft(32, '0'), 2); int ignorewordscount = Convert.ToInt32(bitstring.Substring(1, 3).PadLeft(32, '0'), 2); List <int> badwordindexes = new List <int>(); for (int i = 7 + nlength; i < 7 + nlength + (nlength * badwordcount); i++) { badwordindexes.Add(Convert.ToInt32(bitstring.Substring(i, nlength).PadLeft(32, '0'), 2)); i += nlength - 1; } int nextbadwordindex = -1; if (badwordindexes.Count > 0) { badwordindexes.Sort(); nextbadwordindex = badwordindexes[0]; } int localindex = 0; string bitstring2 = ""; Console.Write("file contents(" + filepaths2[intex] + "): "); for (int i = 7 + nlength + (nlength * badwordcount); i < bitstring.Length - (ignorewordscount * wordlength); i++) { bitstring2 += bitstring[i]; if (bitstring2.Length == wordlength && nextbadwordindex != localindex && localindex < masteroffsetrecord.Length) { if (bitstring2[0] == '0') { Console.Write((char)ReverseDynamicEncodeByte((byte)((int)masteroffsetrecord[localindex] + (int)ConvertToChar(bitstring2.Substring(1).PadLeft(8, '0'))))); } else { Console.Write((char)ReverseDynamicEncodeByte((byte)((int)masteroffsetrecord[localindex] - (int)ConvertToChar(bitstring2.Substring(1).PadLeft(8, '0'))))); } bitstring2 = ""; localindex++; } if (bitstring2.Length == 8 && nextbadwordindex == localindex) { Console.Write((char)ReverseDynamicEncodeByte((byte)ConvertToChar(bitstring2))); badwordindexes.RemoveAt(0); if (badwordindexes.Count > 0) { nextbadwordindex = badwordindexes[0]; } bitstring2 = ""; localindex++; } } if (filepaths2[intex].EndsWith(".bofsd")) { for (int z = localindex; z < masteroffsetrecord.Length; z++) { Console.Write((char)ReverseDynamicEncodeByte(masteroffsetrecord[z])); } } Console.WriteLine("\n"); } Console.ReadKey(); } }