Java's DataOutputStream is similar to .NET's BinaryWriter. However, it writes in a modified UTF-8 format that cannot be read (or duplicated) using BinaryWriter. This is a port of DataOutputStream that is fully compatible with Java's DataInputStream.

Usage Note: Always favor BinaryWriter over DataOutputStream unless you specifically need the modified UTF-8 format and/or the WriteUTF(IDataOutput) method.

Inheritance: IDataOutput, IDisposable
Ejemplo n.º 1
0
 public void TestWriteUTF()
 {
     ByteArrayOutputStream baos = new ByteArrayOutputStream();
     DataOutputStream dos = new DataOutputStream(baos);
     dos.WriteUTF("Hello, World!");  // 15
     dos.Flush();
     if (baos.Length != dos.Length)
         fail("Miscounted bytes in DataOutputStream.");
 }
        private static void WriteAndReadAString()
        {
            // Write out a string whose UTF-8 encoding is quite possibly
            // longer than 65535 bytes
            int length = Random().nextInt(A_NUMBER_NEAR_65535) + 1;
            MemoryStream baos = new MemoryStream();
            StringBuilder testBuffer = new StringBuilder();
            for (int i = 0; i < length; i++)
                testBuffer.append((char)Random().Next());
            string testString = testBuffer.toString();
            DataOutputStream dos = new DataOutputStream(baos);
            dos.WriteUTF(testString);

            // Corrupt the data to produce malformed characters
            byte[] testBytes = baos.ToArray();
            int dataLength = testBytes.Length;
            int corruptions = Random().nextInt(MAX_CORRUPTIONS_PER_CYCLE);
            for (int i = 0; i < corruptions; i++)
            {
                int index = Random().nextInt(dataLength);
                testBytes[index] = (byte)Random().Next();
            }

            // Pay special attention to mangling the end to produce
            // partial characters at end
            testBytes[dataLength - 1] = (byte)Random().Next();
            testBytes[dataLength - 2] = (byte)Random().Next();

            // Attempt to decode the bytes back into a String
            MemoryStream bais = new MemoryStream(testBytes);
            DataInputStream dis = new DataInputStream(bais);
            dis.ReadUTF();
        }
Ejemplo n.º 3
0
        internal static int WriteUTF(string str, IDataOutput @out)
        {
            int strlen = str.Length;
            int utflen = 0;
            int c, count = 0;

            /* use charAt instead of copying String to char array */
            for (int i = 0; i < strlen; i++)
            {
                c = str[i];
                if ((c >= 0x0001) && (c <= 0x007F))
                {
                    utflen++;
                }
                else if (c > 0x07FF)
                {
                    utflen += 3;
                }
                else
                {
                    utflen += 2;
                }
            }

            if (utflen > 65535)
            {
                throw new FormatException(
                          "encoded string too long: " + utflen + " bytes");
            }

            byte[] bytearr = null;
            if (@out is DataOutputStream)
            {
                DataOutputStream dos = (DataOutputStream)@out;
                if (dos.bytearr == null || (dos.bytearr.Length < (utflen + 2)))
                {
                    dos.bytearr = new byte[(utflen * 2) + 2];
                }
                bytearr = dos.bytearr;
            }
            else
            {
                bytearr = new byte[utflen + 2];
            }

            bytearr[count++] = (byte)(int)(((uint)utflen >> 8) & 0xFF);
            bytearr[count++] = (byte)(int)(((uint)utflen >> 0) & 0xFF);

            int i2 = 0;

            for (i2 = 0; i2 < strlen; i2++)
            {
                c = str[i2];
                if (!((c >= 0x0001) && (c <= 0x007F)))
                {
                    break;
                }
                bytearr[count++] = (byte)c;
            }

            for (; i2 < strlen; i2++)
            {
                c = str[i2];
                if ((c >= 0x0001) && (c <= 0x007F))
                {
                    bytearr[count++] = (byte)c;
                }
                else if (c > 0x07FF)
                {
                    bytearr[count++] = (byte)(0xE0 | ((c >> 12) & 0x0F));
                    bytearr[count++] = (byte)(0x80 | ((c >> 6) & 0x3F));
                    bytearr[count++] = (byte)(0x80 | ((c >> 0) & 0x3F));
                }
                else
                {
                    bytearr[count++] = (byte)(0xC0 | ((c >> 6) & 0x1F));
                    bytearr[count++] = (byte)(0x80 | ((c >> 0) & 0x3F));
                }
            }
            @out.Write(bytearr, 0, utflen + 2);
            return(utflen + 2);
        }
Ejemplo n.º 4
0
        /**
         * Entry point to the Compile application.
         * <p>
         * This program takes any number of arguments: the first is the name of the
         * desired stemming algorithm to use (a list is available in the package
         * description) , all of the rest should be the path or paths to a file or
         * files containing a stemmer table to compile.
         * 
         * @param args the command line arguments
         */
        public static void Main(string[] args)
        {
            if (args.Length < 1)
            {
                return;
            }

            args[0].ToUpperInvariant();

            backward = args[0][0] == '-';
            int qq = (backward) ? 1 : 0;
            bool storeorig = false;

            if (args[0][qq] == '0')
            {
                storeorig = true;
                qq++;
            }

            multi = args[0][qq] == 'M';
            if (multi)
            {
                qq++;
            }

            // LUCENENET TODO: Is this any different than Encoding.UTF8?
            //String charset = System.getProperty("egothor.stemmer.charset", "UTF-8");

            char[] optimizer = new char[args[0].Length - qq];
            for (int i = 0; i < optimizer.Length; i++)
            {
                optimizer[i] = args[0][qq + i];
            }

            for (int i = 1; i < args.Length; i++)
            {
                TextReader @in;
                // System.out.println("[" + args[i] + "]");
                Diff diff = new Diff();
                //int stems = 0; // not used
                int words = 0;


                AllocTrie();

                Console.WriteLine(args[i]);
                using (@in = new StreamReader(
                    new FileStream(args[i], FileMode.Open, FileAccess.Read), Encoding.UTF8))
                {
                    for (string line = @in.ReadLine(); line != null; line = @in.ReadLine())
                    {
                        try
                        {
                            line = line.ToLowerInvariant();
                            StringTokenizer st = new StringTokenizer(line);
                            string stem = st.NextToken();
                            if (storeorig)
                            {
                                trie.Add(stem, "-a");
                                words++;
                            }
                            while (st.HasMoreTokens())
                            {
                                string token = st.NextToken();
                                if (token.Equals(stem) == false)
                                {
                                    trie.Add(token, diff.Exec(token, stem));
                                    words++;
                                }
                            }
                        }
                        catch (InvalidOperationException /*x*/)
                        {
                            // no base token (stem) on a line
                        }
                    }
                }

                Optimizer o = new Optimizer();
                Optimizer2 o2 = new Optimizer2();
                Lift l = new Lift(true);
                Lift e = new Lift(false);
                Gener g = new Gener();

                for (int j = 0; j < optimizer.Length; j++)
                {
                    string prefix;
                    switch (optimizer[j])
                    {
                        case 'G':
                            trie = trie.Reduce(g);
                            prefix = "G: ";
                            break;
                        case 'L':
                            trie = trie.Reduce(l);
                            prefix = "L: ";
                            break;
                        case 'E':
                            trie = trie.Reduce(e);
                            prefix = "E: ";
                            break;
                        case '2':
                            trie = trie.Reduce(o2);
                            prefix = "2: ";
                            break;
                        case '1':
                            trie = trie.Reduce(o);
                            prefix = "1: ";
                            break;
                        default:
                            continue;
                    }
                    trie.PrintInfo(System.Console.Out, prefix + " ");
                }

                using (DataOutputStream os = new DataOutputStream(
                    new FileStream(args[i] + ".out", FileMode.OpenOrCreate, FileAccess.Write)))
                {
                    os.WriteUTF(args[0]);
                    trie.Store(os);
                }
            }
        }