public virtual void TestEmails()
        {
            TextReader reader = null;
            string     randomTextWithEmails;

            try
            {
                reader = new System.IO.StreamReader(this.GetType().getResourceAsStream("random.text.with.email.addresses.txt"), Encoding.UTF8);
                StringBuilder builder = new StringBuilder();

                //var test = reader.ReadToEnd();
                char[] buffer = new char[1024];
                int    numCharsRead;
                while (-1 != (numCharsRead = reader.read(buffer)))
                {
                    builder.Append(buffer, 0, numCharsRead);
                }
                randomTextWithEmails = builder.ToString();
            }
            finally
            {
                if (null != reader)
                {
                    reader.Dispose();
                }
            }
            assertTrue(null != randomTextWithEmails && randomTextWithEmails.Length > 0);
            System.IO.StreamReader bufferedReader = null;
            string[] emails;
            try
            {
                IList <string> emailList = new JCG.List <string>();
                bufferedReader = new System.IO.StreamReader(this.GetType().getResourceAsStream("email.addresses.from.random.text.with.email.addresses.txt"), Encoding.UTF8);
                string line;
                while (null != (line = bufferedReader.ReadLine()))
                {
                    line = line.Trim();
                    if (line.Length > 0)
                    {
                        emailList.Add(line);
                    }
                }
                emails = emailList.ToArray();
            }
            finally
            {
                if (null != bufferedReader)
                {
                    bufferedReader.Dispose();
                }
            }
            assertTrue(null != emails && emails.Length > 0);
            BaseTokenStreamTestCase.AssertAnalyzesTo(emailAnalyzer, randomTextWithEmails, emails);
        }
        public virtual void TestWikiURLs()
        {
            TextReader reader = null;
            string     luceneResourcesWikiPage;

            try
            {
                reader = new System.IO.StreamReader(this.GetType().getResourceAsStream("LuceneResourcesWikiPage.html"), Encoding.UTF8);
                StringBuilder builder = new StringBuilder();
                char[]        buffer  = new char[1024];
                int           numCharsRead;
                while (-1 != (numCharsRead = reader.read(buffer)))
                {
                    builder.Append(buffer, 0, numCharsRead);
                }
                luceneResourcesWikiPage = builder.ToString();
            }
            finally
            {
                if (null != reader)
                {
                    reader.Dispose();
                }
            }
            assertTrue(null != luceneResourcesWikiPage && luceneResourcesWikiPage.Length > 0);
            System.IO.StreamReader bufferedReader = null;
            string[] urls;
            try
            {
                IList <string> urlList = new JCG.List <string>();
                bufferedReader = new StreamReader(this.GetType().getResourceAsStream("LuceneResourcesWikiPageURLs.txt"), Encoding.UTF8);
                string line;
                while (null != (line = bufferedReader.ReadLine()))
                {
                    line = line.Trim();
                    if (line.Length > 0)
                    {
                        urlList.Add(line);
                    }
                }
                urls = urlList.ToArray();
            }
            finally
            {
                if (null != bufferedReader)
                {
                    bufferedReader.Dispose();
                }
            }
            assertTrue(null != urls && urls.Length > 0);
            BaseTokenStreamTestCase.AssertAnalyzesTo(urlAnalyzer, luceneResourcesWikiPage, urls);
        }
        public virtual void TestURLs()
        {
            TextReader reader = null;
            string     randomTextWithURLs;

            try
            {
                reader = new System.IO.StreamReader(this.GetType().getResourceAsStream("random.text.with.urls.txt"), Encoding.UTF8);
                StringBuilder builder = new StringBuilder();
                char[]        buffer  = new char[1024];
                int           numCharsRead;
                while (-1 != (numCharsRead = reader.read(buffer)))
                {
                    builder.Append(buffer, 0, numCharsRead);
                }
                randomTextWithURLs = builder.ToString();
            }
            finally
            {
                if (null != reader)
                {
                    reader.Dispose();
                }
            }
            assertTrue(null != randomTextWithURLs && randomTextWithURLs.Length > 0);
            System.IO.StreamReader bufferedReader = null;
            string[] urls;
            try
            {
                IList <string> urlList = new List <string>();
                bufferedReader = new System.IO.StreamReader(this.GetType().getResourceAsStream("urls.from.random.text.with.urls.txt"), Encoding.UTF8);
                string line;
                while (null != (line = bufferedReader.ReadLine()))
                {
                    line = line.Trim();
                    if (line.Length > 0)
                    {
                        urlList.Add(line);
                    }
                }
                urls = urlList.ToArray();
            }
            finally
            {
                if (null != bufferedReader)
                {
                    bufferedReader.Close();
                }
            }
            assertTrue(null != urls && urls.Length > 0);
            BaseTokenStreamTestCase.AssertAnalyzesTo(urlAnalyzer, randomTextWithURLs, urls);
        }
 public virtual void TestURLs()
 {
     TextReader reader = null;
     string randomTextWithURLs;
     try
     {
         reader = new System.IO.StreamReader(this.GetType().getResourceAsStream("random.text.with.urls.txt"), Encoding.UTF8);
         StringBuilder builder = new StringBuilder();
         char[] buffer = new char[1024];
         int numCharsRead;
         while (-1 != (numCharsRead = reader.read(buffer)))
         {
             builder.Append(buffer, 0, numCharsRead);
         }
         randomTextWithURLs = builder.ToString();
     }
     finally
     {
         if (null != reader)
         {
             reader.Dispose();
         }
     }
     assertTrue(null != randomTextWithURLs && randomTextWithURLs.Length > 0);
     System.IO.StreamReader bufferedReader = null;
     string[] urls;
     try
     {
         IList<string> urlList = new List<string>();
         bufferedReader = new System.IO.StreamReader(this.GetType().getResourceAsStream("urls.from.random.text.with.urls.txt"), Encoding.UTF8);
         string line;
         while (null != (line = bufferedReader.ReadLine()))
         {
             line = line.Trim();
             if (line.Length > 0)
             {
                 urlList.Add(line);
             }
         }
         urls = urlList.ToArray();
     }
     finally
     {
         if (null != bufferedReader)
         {
             bufferedReader.Close();
         }
     }
     assertTrue(null != urls && urls.Length > 0);
     BaseTokenStreamTestCase.AssertAnalyzesTo(urlAnalyzer, randomTextWithURLs, urls);
 }
Esempio n. 5
0
        //legacy function, has it all and its working.
        public void run(System.String[] argv)
        {
            int            i, index;
            BufferedReader fp = null, fp_restore = null;

            System.String save_filename       = null;
            System.String restore_filename    = null;
            System.String data_filename       = null;
            System.String save_filenameoutput = null;

            for (i = 0; i < argv.Count(); i++)
            {
                if (argv[i][0] != '-')
                {
                    break;
                }
                ++i;

                if (argv[i - 1][1] == 'l')
                {
                    lower = System.Double.Parse(argv[i]);
                }
                else if (argv[i - 1][1] == 'u')
                {
                    upper = System.Double.Parse(argv[i]);
                }
                else if (argv[i - 1][1] == 'y')
                {
                    y_lower = System.Double.Parse(argv[i]);
                    ++i;
                    y_upper   = System.Double.Parse(argv[i]);
                    y_scaling = true;
                }
                else if (argv[i - 1][1] == 's')
                {
                    save_filename = argv[i];
                }
                else if (argv[i - 1][1] == 'r')
                {
                    restore_filename = argv[i];
                }
                else if (argv[i - 1][1] == 'o')
                {
                    save_filenameoutput = argv[i];
                }
                else
                {
                    System.Console.Error.WriteLine("unknown option");
                    exit_with_help();
                }

                /*switch(argv[i-1][1])
                 * {
                 *      case 'l': lower = System.Double.Parse(argv[i]);	break;
                 * case 'u': upper = System.Double.Parse(argv[i]); break;
                 *      case 'y':
                 * y_lower = System.Double.Parse(argv[i]);
                 ++i;
                 * y_upper = System.Double.Parse(argv[i]);
                 *                y_scaling = true;
                 *                break;
                 *      case 's': save_filename = argv[i];	break;
                 *      case 'r': restore_filename = argv[i];	break;
                 *      default:
                 *                System.Console.Error.WriteLine("unknown option");
                 *                exit_with_help();
                 * }*/
            }

            if (!(upper > lower) || (y_scaling && !(y_upper > y_lower)))
            {
                System.Console.Error.WriteLine("inconsistent lower/upper specification");
                //Environment.Exit(1);
            }
            if (restore_filename != null && save_filename != null)
            {
                System.Console.Error.WriteLine("cannot use -r and -s simultaneously");
                //Environment.Exit(1);
            }

            if (argv.Count() != i + 1)
            {
                exit_with_help();
            }

            data_filename = argv[i];
            try {
                fp = new BufferedReader(new FileReader(data_filename));
            } catch (java.lang.Exception e) {
                System.Console.Error.WriteLine("can't open file " + data_filename);
                //Environment.Exit(1);
            }

            /* assumption: min index of attributes is 1 */
            /* pass 1: find out max index of attributes */
            max_index = 0;

            if (restore_filename != null)
            {
                int idx, c;

                try {
                    fp_restore = new BufferedReader(new FileReader(restore_filename));
                }
                catch (java.lang.Exception e)
                {
                    System.Console.Error.WriteLine("can't open file " + restore_filename);
                    //Environment.Exit(1);
                }
                if ((c = fp_restore.read()) == 'y')
                {
                    fp_restore.readLine();
                    fp_restore.readLine();
                    fp_restore.readLine();
                }
                fp_restore.readLine();
                fp_restore.readLine();

                System.String restore_line = null;
                while ((restore_line = fp_restore.readLine()) != null)
                {
                    java.util.StringTokenizer st2 = new java.util.StringTokenizer(restore_line);
                    idx       = Int32.Parse(st2.nextToken());
                    max_index = System.Math.Max(max_index, idx);
                }
                fp_restore = rewind(fp_restore, restore_filename);
            }

            while (readline(fp) != null)
            {
                java.util.StringTokenizer st = new java.util.StringTokenizer(line, " \t\n\r\f:");
                st.nextToken();
                while (st.hasMoreTokens())
                {
                    index     = Int32.Parse(st.nextToken());
                    max_index = System.Math.Max(max_index, index);
                    st.nextToken();
                    num_nonzeros++;
                }
            }

            try {
                feature_max = new double[(max_index + 1)];
                feature_min = new double[(max_index + 1)];
            } catch (OutOfMemoryException e) {
                System.Console.Error.WriteLine("can't allocate enough memory");
                //Environment.Exit(1);
            }

            for (i = 0; i <= max_index; i++)
            {
                feature_max[i] = -System.Double.MaxValue;
                feature_min[i] = System.Double.MaxValue;
            }

            fp = rewind(fp, data_filename);

            /* pass 2: find out min/max value */
            while (readline(fp) != null)
            {
                int    next_index = 1;
                double target;
                double value;

                java.util.StringTokenizer st = new java.util.StringTokenizer(line, " \t\n\r\f:");
                target = System.Double.Parse(st.nextToken());
                y_max  = System.Math.Max(y_max, target);
                y_min  = System.Math.Min(y_min, target);

                while (st.hasMoreTokens())
                {
                    index = Int32.Parse(st.nextToken());
                    value = System.Double.Parse(st.nextToken());

                    for (i = next_index; i < index; i++)
                    {
                        feature_max[i] = System.Math.Max(feature_max[i], 0);
                        feature_min[i] = System.Math.Min(feature_min[i], 0);
                    }

                    feature_max[index] = System.Math.Max(feature_max[index], value);
                    feature_min[index] = System.Math.Min(feature_min[index], value);
                    next_index         = index + 1;
                }

                for (i = next_index; i <= max_index; i++)
                {
                    feature_max[i] = System.Math.Max(feature_max[i], 0);
                    feature_min[i] = System.Math.Min(feature_min[i], 0);
                }
            }

            fp = rewind(fp, data_filename);

            /* pass 2.5: save/restore feature_min/feature_max */
            if (restore_filename != null)
            {
                // fp_restore rewinded in finding max_index
                int    idx, c;
                double fmin, fmax;

                fp_restore.mark(2);                                     // for reset
                if ((c = fp_restore.read()) == 'y')
                {
                    fp_restore.readLine();                              // pass the '\n' after 'y'
                    java.util.StringTokenizer st = new java.util.StringTokenizer(fp_restore.readLine());
                    y_lower   = System.Double.Parse(st.nextToken());
                    y_upper   = System.Double.Parse(st.nextToken());
                    st        = new java.util.StringTokenizer(fp_restore.readLine());
                    y_min     = System.Double.Parse(st.nextToken());
                    y_max     = System.Double.Parse(st.nextToken());
                    y_scaling = true;
                }
                else
                {
                    fp_restore.reset();
                }

                if (fp_restore.read() == 'x')
                {
                    fp_restore.readLine();                              // pass the '\n' after 'x'
                    java.util.StringTokenizer st = new java.util.StringTokenizer(fp_restore.readLine());
                    lower = System.Double.Parse(st.nextToken());
                    upper = System.Double.Parse(st.nextToken());
                    System.String restore_line = null;
                    while ((restore_line = fp_restore.readLine()) != null)
                    {
                        java.util.StringTokenizer st2 = new java.util.StringTokenizer(restore_line);
                        idx  = Int32.Parse(st2.nextToken());
                        fmin = System.Double.Parse(st2.nextToken());
                        fmax = System.Double.Parse(st2.nextToken());
                        if (idx <= max_index)
                        {
                            feature_min[idx] = fmin;
                            feature_max[idx] = fmax;
                        }
                    }
                }
                fp_restore.close();
            }

            // needs to figure out why .16g thing isnt working in runtime..it is java after all and working inthe original.

            /*if(save_filename != null)
             * {
             * java.util.Formatter formatter = new java.util.Formatter(new java.lang.StringBuilder());
             * java.io.BufferedWriter fp_save = null;
             *
             *      try {
             * fp_save = new java.io.BufferedWriter(new java.io.FileWriter(save_filename));
             *      } catch(java.io.IOException e) {
             * System.Console.Error.WriteLine("can't open file " + save_filename);
             * Environment.Exit(1);
             *      }
             *
             *      if(y_scaling)
             *      {
             *              formatter.format("y\n");
             *              formatter.format("%.16g %.16g\n", y_lower, y_upper);
             *              formatter.format("%.16g %.16g\n", y_min, y_max);
             *      }
             *      formatter.format("x\n");
             *      formatter.format("%.16g %.16g\n", lower, upper);
             *      for(i=1;i<=max_index;i++)
             *      {
             *              if(feature_min[i] != feature_max[i])
             *                      formatter.format("%d %.16g %.16g\n", i, feature_min[i], feature_max[i]);
             *      }
             *      fp_save.write(formatter.toString());
             *      fp_save.close();
             * }*/

            FileStream   ostrm  = null;
            StreamWriter writer = null;
            TextWriter   oldOut = System.Console.Out;

            if (save_filenameoutput != null)
            {
                try
                {
                    ostrm  = new FileStream(save_filenameoutput, FileMode.OpenOrCreate, FileAccess.Write);
                    writer = new StreamWriter(ostrm);
                }
                catch (System.Exception e)
                {
                    System.Console.WriteLine("Cannot open Redirect.txt for writing");
                    System.Console.WriteLine(e.Message);
                    return;
                }
                System.Console.SetOut(writer);
            }

            /* pass 3: scale */
            while (readline(fp) != null)
            {
                int    next_index = 1;
                double target;
                double value;

                java.util.StringTokenizer st = new java.util.StringTokenizer(line, " \t\n\r\f:");
                target = System.Double.Parse(st.nextToken());
                output_target(target);
                while (st.hasMoreElements())
                {
                    index = Int32.Parse(st.nextToken());
                    value = System.Double.Parse(st.nextToken());
                    for (i = next_index; i < index; i++)
                    {
                        output(i, 0);
                    }
                    output(index, value);
                    next_index = index + 1;
                }

                for (i = next_index; i <= max_index; i++)
                {
                    output(i, 0);
                }
                System.Console.Write("\n");
            }


            if (save_filenameoutput != null)
            {
                if (writer != null)
                {
                    writer.Close();
                }
                if (ostrm != null)
                {
                    ostrm.Close();
                }
                System.Console.SetOut(oldOut);
            }

            if (new_num_nonzeros > num_nonzeros)
            {
                System.Console.Error.WriteLine(
                    "WARNING: original #nonzeros " + num_nonzeros + "\n"
                    + "         new      #nonzeros " + new_num_nonzeros + "\n"
                    + "Use -l 0 if many original feature values are zeros\n");
            }

            fp.close();
        }