WriteUTF() static private method

static private WriteUTF ( string str, IDataOutput @out ) : int
str string
@out IDataOutput
return int
 public void TestWriteUTF()
 {
     ByteArrayOutputStream baos = new ByteArrayOutputStream();
     DataOutputStream dos = new DataOutputStream(baos);
     dos.WriteUTF("Hello, World!");  // 15
     dos.Flush();
     if (baos.Length != dos.Length)
         fail("Miscounted bytes in DataOutputStream.");
 }
        public void TestWriteUTF()
        {
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            DataOutputStream      dos  = new DataOutputStream(baos);

            dos.WriteUTF("Hello, World!");  // 15
            dos.Flush();
            if (baos.Length != dos.Length)
            {
                fail("Miscounted bytes in DataOutputStream.");
            }
        }
        private static void WriteAndReadAString()
        {
            // Write out a string whose UTF-8 encoding is quite possibly
            // longer than 65535 bytes
            int           length     = Random().nextInt(A_NUMBER_NEAR_65535) + 1;
            MemoryStream  baos       = new MemoryStream();
            StringBuilder testBuffer = new StringBuilder();

            for (int i = 0; i < length; i++)
            {
                testBuffer.append((char)Random().Next());
            }
            string           testString = testBuffer.toString();
            DataOutputStream dos        = new DataOutputStream(baos);

            dos.WriteUTF(testString);

            // Corrupt the data to produce malformed characters
            byte[] testBytes   = baos.ToArray();
            int    dataLength  = testBytes.Length;
            int    corruptions = Random().nextInt(MAX_CORRUPTIONS_PER_CYCLE);

            for (int i = 0; i < corruptions; i++)
            {
                int index = Random().nextInt(dataLength);
                testBytes[index] = (byte)Random().Next();
            }

            // Pay special attention to mangling the end to produce
            // partial characters at end
            testBytes[dataLength - 1] = (byte)Random().Next();
            testBytes[dataLength - 2] = (byte)Random().Next();

            // Attempt to decode the bytes back into a String
            MemoryStream    bais = new MemoryStream(testBytes);
            DataInputStream dis  = new DataInputStream(bais);

            dis.ReadUTF();
        }
        private static void WriteAndReadAString()
        {
            // Write out a string whose UTF-8 encoding is quite possibly
            // longer than 65535 bytes
            int length = Random().nextInt(A_NUMBER_NEAR_65535) + 1;
            MemoryStream baos = new MemoryStream();
            StringBuilder testBuffer = new StringBuilder();
            for (int i = 0; i < length; i++)
                testBuffer.append((char)Random().Next());
            string testString = testBuffer.toString();
            DataOutputStream dos = new DataOutputStream(baos);
            dos.WriteUTF(testString);

            // Corrupt the data to produce malformed characters
            byte[] testBytes = baos.ToArray();
            int dataLength = testBytes.Length;
            int corruptions = Random().nextInt(MAX_CORRUPTIONS_PER_CYCLE);
            for (int i = 0; i < corruptions; i++)
            {
                int index = Random().nextInt(dataLength);
                testBytes[index] = (byte)Random().Next();
            }

            // Pay special attention to mangling the end to produce
            // partial characters at end
            testBytes[dataLength - 1] = (byte)Random().Next();
            testBytes[dataLength - 2] = (byte)Random().Next();

            // Attempt to decode the bytes back into a String
            MemoryStream bais = new MemoryStream(testBytes);
            DataInputStream dis = new DataInputStream(bais);
            dis.ReadUTF();
        }
Beispiel #5
0
        /**
         * Entry point to the Compile application.
         * <p>
         * This program takes any number of arguments: the first is the name of the
         * desired stemming algorithm to use (a list is available in the package
         * description) , all of the rest should be the path or paths to a file or
         * files containing a stemmer table to compile.
         * 
         * @param args the command line arguments
         */
        public static void Main(string[] args)
        {
            if (args.Length < 1)
            {
                return;
            }

            args[0].ToUpperInvariant();

            backward = args[0][0] == '-';
            int qq = (backward) ? 1 : 0;
            bool storeorig = false;

            if (args[0][qq] == '0')
            {
                storeorig = true;
                qq++;
            }

            multi = args[0][qq] == 'M';
            if (multi)
            {
                qq++;
            }

            // LUCENENET TODO: Is this any different than Encoding.UTF8?
            //String charset = System.getProperty("egothor.stemmer.charset", "UTF-8");

            char[] optimizer = new char[args[0].Length - qq];
            for (int i = 0; i < optimizer.Length; i++)
            {
                optimizer[i] = args[0][qq + i];
            }

            for (int i = 1; i < args.Length; i++)
            {
                TextReader @in;
                // System.out.println("[" + args[i] + "]");
                Diff diff = new Diff();
                //int stems = 0; // not used
                int words = 0;


                AllocTrie();

                Console.WriteLine(args[i]);
                using (@in = new StreamReader(
                    new FileStream(args[i], FileMode.Open, FileAccess.Read), Encoding.UTF8))
                {
                    for (string line = @in.ReadLine(); line != null; line = @in.ReadLine())
                    {
                        try
                        {
                            line = line.ToLowerInvariant();
                            StringTokenizer st = new StringTokenizer(line);
                            string stem = st.NextToken();
                            if (storeorig)
                            {
                                trie.Add(stem, "-a");
                                words++;
                            }
                            while (st.HasMoreTokens())
                            {
                                string token = st.NextToken();
                                if (token.Equals(stem) == false)
                                {
                                    trie.Add(token, diff.Exec(token, stem));
                                    words++;
                                }
                            }
                        }
                        catch (InvalidOperationException /*x*/)
                        {
                            // no base token (stem) on a line
                        }
                    }
                }

                Optimizer o = new Optimizer();
                Optimizer2 o2 = new Optimizer2();
                Lift l = new Lift(true);
                Lift e = new Lift(false);
                Gener g = new Gener();

                for (int j = 0; j < optimizer.Length; j++)
                {
                    string prefix;
                    switch (optimizer[j])
                    {
                        case 'G':
                            trie = trie.Reduce(g);
                            prefix = "G: ";
                            break;
                        case 'L':
                            trie = trie.Reduce(l);
                            prefix = "L: ";
                            break;
                        case 'E':
                            trie = trie.Reduce(e);
                            prefix = "E: ";
                            break;
                        case '2':
                            trie = trie.Reduce(o2);
                            prefix = "2: ";
                            break;
                        case '1':
                            trie = trie.Reduce(o);
                            prefix = "1: ";
                            break;
                        default:
                            continue;
                    }
                    trie.PrintInfo(System.Console.Out, prefix + " ");
                }

                using (DataOutputStream os = new DataOutputStream(
                    new FileStream(args[i] + ".out", FileMode.OpenOrCreate, FileAccess.Write)))
                {
                    os.WriteUTF(args[0]);
                    trie.Store(os);
                }
            }
        }