public void TestWriteUTF() { ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputStream dos = new DataOutputStream(baos); dos.WriteUTF("Hello, World!"); // 15 dos.Flush(); if (baos.Length != dos.Length) fail("Miscounted bytes in DataOutputStream."); }
public void TestWriteUTF() { ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputStream dos = new DataOutputStream(baos); dos.WriteUTF("Hello, World!"); // 15 dos.Flush(); if (baos.Length != dos.Length) { fail("Miscounted bytes in DataOutputStream."); } }
private static void WriteAndReadAString() { // Write out a string whose UTF-8 encoding is quite possibly // longer than 65535 bytes int length = Random().nextInt(A_NUMBER_NEAR_65535) + 1; MemoryStream baos = new MemoryStream(); StringBuilder testBuffer = new StringBuilder(); for (int i = 0; i < length; i++) { testBuffer.append((char)Random().Next()); } string testString = testBuffer.toString(); DataOutputStream dos = new DataOutputStream(baos); dos.WriteUTF(testString); // Corrupt the data to produce malformed characters byte[] testBytes = baos.ToArray(); int dataLength = testBytes.Length; int corruptions = Random().nextInt(MAX_CORRUPTIONS_PER_CYCLE); for (int i = 0; i < corruptions; i++) { int index = Random().nextInt(dataLength); testBytes[index] = (byte)Random().Next(); } // Pay special attention to mangling the end to produce // partial characters at end testBytes[dataLength - 1] = (byte)Random().Next(); testBytes[dataLength - 2] = (byte)Random().Next(); // Attempt to decode the bytes back into a String MemoryStream bais = new MemoryStream(testBytes); DataInputStream dis = new DataInputStream(bais); dis.ReadUTF(); }
private static void WriteAndReadAString() { // Write out a string whose UTF-8 encoding is quite possibly // longer than 65535 bytes int length = Random().nextInt(A_NUMBER_NEAR_65535) + 1; MemoryStream baos = new MemoryStream(); StringBuilder testBuffer = new StringBuilder(); for (int i = 0; i < length; i++) testBuffer.append((char)Random().Next()); string testString = testBuffer.toString(); DataOutputStream dos = new DataOutputStream(baos); dos.WriteUTF(testString); // Corrupt the data to produce malformed characters byte[] testBytes = baos.ToArray(); int dataLength = testBytes.Length; int corruptions = Random().nextInt(MAX_CORRUPTIONS_PER_CYCLE); for (int i = 0; i < corruptions; i++) { int index = Random().nextInt(dataLength); testBytes[index] = (byte)Random().Next(); } // Pay special attention to mangling the end to produce // partial characters at end testBytes[dataLength - 1] = (byte)Random().Next(); testBytes[dataLength - 2] = (byte)Random().Next(); // Attempt to decode the bytes back into a String MemoryStream bais = new MemoryStream(testBytes); DataInputStream dis = new DataInputStream(bais); dis.ReadUTF(); }
/** * Entry point to the Compile application. * <p> * This program takes any number of arguments: the first is the name of the * desired stemming algorithm to use (a list is available in the package * description) , all of the rest should be the path or paths to a file or * files containing a stemmer table to compile. * * @param args the command line arguments */ public static void Main(string[] args) { if (args.Length < 1) { return; } args[0].ToUpperInvariant(); backward = args[0][0] == '-'; int qq = (backward) ? 1 : 0; bool storeorig = false; if (args[0][qq] == '0') { storeorig = true; qq++; } multi = args[0][qq] == 'M'; if (multi) { qq++; } // LUCENENET TODO: Is this any different than Encoding.UTF8? //String charset = System.getProperty("egothor.stemmer.charset", "UTF-8"); char[] optimizer = new char[args[0].Length - qq]; for (int i = 0; i < optimizer.Length; i++) { optimizer[i] = args[0][qq + i]; } for (int i = 1; i < args.Length; i++) { TextReader @in; // System.out.println("[" + args[i] + "]"); Diff diff = new Diff(); //int stems = 0; // not used int words = 0; AllocTrie(); Console.WriteLine(args[i]); using (@in = new StreamReader( new FileStream(args[i], FileMode.Open, FileAccess.Read), Encoding.UTF8)) { for (string line = @in.ReadLine(); line != null; line = @in.ReadLine()) { try { line = line.ToLowerInvariant(); StringTokenizer st = new StringTokenizer(line); string stem = st.NextToken(); if (storeorig) { trie.Add(stem, "-a"); words++; } while (st.HasMoreTokens()) { string token = st.NextToken(); if (token.Equals(stem) == false) { trie.Add(token, diff.Exec(token, stem)); words++; } } } catch (InvalidOperationException /*x*/) { // no base token (stem) on a line } } } Optimizer o = new Optimizer(); Optimizer2 o2 = new Optimizer2(); Lift l = new Lift(true); Lift e = new Lift(false); Gener g = new Gener(); for (int j = 0; j < optimizer.Length; j++) { string prefix; switch (optimizer[j]) { case 'G': trie = trie.Reduce(g); prefix = "G: "; break; case 'L': trie = trie.Reduce(l); prefix = "L: "; break; case 'E': trie = trie.Reduce(e); prefix = "E: "; break; case '2': trie = trie.Reduce(o2); prefix = "2: "; break; case '1': trie = trie.Reduce(o); prefix = "1: "; break; default: continue; } trie.PrintInfo(System.Console.Out, prefix + " "); } using (DataOutputStream os = new DataOutputStream( new FileStream(args[i] + ".out", FileMode.OpenOrCreate, FileAccess.Write))) { os.WriteUTF(args[0]); trie.Store(os); } } }