Ejemplo n.º 1
0
        /**
         * Get Test Queries.  The InputStream is closed.
         */
        public static IEnumerator<SpatialTestQuery> GetTestQueries(
            SpatialArgsParser parser,
            SpatialContext ctx,
            string name,
            Stream @in)
        {

            List<SpatialTestQuery> results = new List<SpatialTestQuery>();

            TextReader bufInput = new StreamReader(@in, Encoding.UTF8);
            try
            {
                String line;
                for (int lineNumber = 1; (line = bufInput.ReadLine()) != null; lineNumber++)
                {
                    SpatialTestQuery test = new SpatialTestQuery();
                    test.line = line;
                    test.lineNumber = lineNumber;

                    try
                    {
                        // skip a comment
                        if (line.StartsWith("[", StringComparison.Ordinal))
                        {
                            int idx2 = line.IndexOf(']');
                            if (idx2 > 0)
                            {
                                line = line.Substring(idx2 + 1);
                            }
                        }

                        int idx = line.IndexOf('@');
                        StringTokenizer st = new StringTokenizer(line.Substring(0, idx - 0));
                        while (st.HasMoreTokens())
                        {
                            test.ids.Add(st.NextToken().Trim());
                        }
                        test.args = parser.Parse(line.Substring(idx + 1).Trim(), ctx);
                        results.Add(test);
                    }
                    catch (Exception ex)
                    {
                        throw new ApplicationException("invalid query line: " + test.line, ex);
                    }
                }
            }
            finally
            {
                bufInput.Dispose();
            }
            return results.GetEnumerator();
        }
Ejemplo n.º 2
0
        private static void AssertTrie(Trie trie, string file, bool usefull,
            bool storeorig)
        {
            using (TextReader @in =
                new StreamReader(new FileStream(file, FileMode.Open), Encoding.UTF8))
            {

                for (string line = @in.ReadLine(); line != null; line = @in.ReadLine())
                {
                    try
                    {
                        line = line.ToLowerInvariant();
                        StringTokenizer st = new StringTokenizer(line);
                        string stem = st.NextToken();
                        if (storeorig)
                        {
                            string cmd = (usefull) ? trie.GetFully(stem) : trie
                                .GetLastOnPath(stem);
                            StringBuilder stm = new StringBuilder(stem);
                            Diff.Apply(stm, cmd);
                            assertEquals(stem.ToLowerInvariant(), stm.ToString().ToLowerInvariant());
                        }
                        while (st.HasMoreTokens())
                        {
                            string token = st.NextToken();
                            if (token.Equals(stem))
                            {
                                continue;
                            }
                            string cmd = (usefull) ? trie.GetFully(token) : trie
                                .GetLastOnPath(token);
                            StringBuilder stm = new StringBuilder(token);
                            Diff.Apply(stm, cmd);
                            assertEquals(stem.ToLowerInvariant(), stm.ToString().ToLowerInvariant());
                        }
                    }
                    catch (InvalidOperationException /*x*/)
                    {
                        // no base token (stem) on a line
                    }
                }

            }
        }
Ejemplo n.º 3
0
        /**
         * Entry point to the Compile application.
         * <p>
         * This program takes any number of arguments: the first is the name of the
         * desired stemming algorithm to use (a list is available in the package
         * description) , all of the rest should be the path or paths to a file or
         * files containing a stemmer table to compile.
         * 
         * @param args the command line arguments
         */
        public static void Main(string[] args)
        {
            if (args.Length < 1)
            {
                return;
            }

            args[0].ToUpperInvariant();

            backward = args[0][0] == '-';
            int qq = (backward) ? 1 : 0;
            bool storeorig = false;

            if (args[0][qq] == '0')
            {
                storeorig = true;
                qq++;
            }

            multi = args[0][qq] == 'M';
            if (multi)
            {
                qq++;
            }

            // LUCENENET TODO: Is this any different than Encoding.UTF8?
            //String charset = System.getProperty("egothor.stemmer.charset", "UTF-8");

            char[] optimizer = new char[args[0].Length - qq];
            for (int i = 0; i < optimizer.Length; i++)
            {
                optimizer[i] = args[0][qq + i];
            }

            for (int i = 1; i < args.Length; i++)
            {
                TextReader @in;
                // System.out.println("[" + args[i] + "]");
                Diff diff = new Diff();
                //int stems = 0; // not used
                int words = 0;


                AllocTrie();

                Console.WriteLine(args[i]);
                using (@in = new StreamReader(
                    new FileStream(args[i], FileMode.Open, FileAccess.Read), Encoding.UTF8))
                {
                    for (string line = @in.ReadLine(); line != null; line = @in.ReadLine())
                    {
                        try
                        {
                            line = line.ToLowerInvariant();
                            StringTokenizer st = new StringTokenizer(line);
                            string stem = st.NextToken();
                            if (storeorig)
                            {
                                trie.Add(stem, "-a");
                                words++;
                            }
                            while (st.HasMoreTokens())
                            {
                                string token = st.NextToken();
                                if (token.Equals(stem) == false)
                                {
                                    trie.Add(token, diff.Exec(token, stem));
                                    words++;
                                }
                            }
                        }
                        catch (InvalidOperationException /*x*/)
                        {
                            // no base token (stem) on a line
                        }
                    }
                }

                Optimizer o = new Optimizer();
                Optimizer2 o2 = new Optimizer2();
                Lift l = new Lift(true);
                Lift e = new Lift(false);
                Gener g = new Gener();

                for (int j = 0; j < optimizer.Length; j++)
                {
                    string prefix;
                    switch (optimizer[j])
                    {
                        case 'G':
                            trie = trie.Reduce(g);
                            prefix = "G: ";
                            break;
                        case 'L':
                            trie = trie.Reduce(l);
                            prefix = "L: ";
                            break;
                        case 'E':
                            trie = trie.Reduce(e);
                            prefix = "E: ";
                            break;
                        case '2':
                            trie = trie.Reduce(o2);
                            prefix = "2: ";
                            break;
                        case '1':
                            trie = trie.Reduce(o);
                            prefix = "1: ";
                            break;
                        default:
                            continue;
                    }
                    trie.PrintInfo(System.Console.Out, prefix + " ");
                }

                using (DataOutputStream os = new DataOutputStream(
                    new FileStream(args[i] + ".out", FileMode.OpenOrCreate, FileAccess.Write)))
                {
                    os.WriteUTF(args[0]);
                    trie.Store(os);
                }
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Entry point to the DiffIt application.
        /// <para>
        /// This application takes one argument, the path to a file containing a
        /// stemmer table. The program reads the file and generates the patch commands
        /// for the stems.
        /// </para>
        /// </summary>
        /// <param name="args">the path to a file containing a stemmer table</param>
        public static void Main(string[] args)
        {


            int ins = Get(0, args[0]);
            int del = Get(1, args[0]);
            int rep = Get(2, args[0]);
            int nop = Get(3, args[0]);

            for (int i = 1; i < args.Length; i++)
            {
                TextReader @in;
                // System.out.println("[" + args[i] + "]");
                Diff diff = new Diff(ins, del, rep, nop);
                // LUCENENET TODO: Is using Encoding.UTF8 good enough?
                //String charset = System.getProperty("egothor.stemmer.charset", "UTF-8");
                @in = new StreamReader(new FileStream(args[i], FileMode.Open, FileAccess.Read), Encoding.UTF8);
                for (string line = @in.ReadLine(); line != null; line = @in.ReadLine())
                {
                    try
                    {
                        line = line.ToLowerInvariant();
                        StringTokenizer st = new StringTokenizer(line);
                        string stem = st.NextToken();
                        Console.WriteLine(stem + " -a");
                        while (st.HasMoreTokens())
                        {
                            String token = st.NextToken();
                            if (token.Equals(stem) == false)
                            {
                                Console.WriteLine(stem + " " + diff.Exec(token, stem));
                            }
                        }
                    }
                    catch (InvalidOperationException /*x*/)
                    {
                        // no base token (stem) on a line
                    }
                }
            }
        }
Ejemplo n.º 5
0
 //Helper method to construct Lucene documents used in our tests
 Document getDocumentFromString(String nameValuePairs)
 {
     Document result = new Document();
     StringTokenizer st = new StringTokenizer(nameValuePairs, "\t=");
     while (st.HasMoreTokens())
     {
         String name = st.NextToken().Trim();
         if (st.HasMoreTokens())
         {
             String value = st.NextToken().Trim();
             result.Add(NewTextField(name, value, Field.Store.YES));
         }
     }
     return result;
 }
Ejemplo n.º 6
0
 //Helper method to construct Lucene query forms used in our test
 IDictionary<string, string> getPropsFromString(String nameValuePairs)
 {
     IDictionary<string, string> result = new Dictionary<string, string>();
     StringTokenizer st = new StringTokenizer(nameValuePairs, "\t=");
     while (st.HasMoreTokens())
     {
         String name = st.NextToken().Trim();
         if (st.HasMoreTokens())
         {
             String value = st.NextToken().Trim();
             result[name] = value;
         }
     }
     return result;
 }
Ejemplo n.º 7
0
        /// <summary>
        /// Parses "a=b c=d f" (whitespace separated) into name-value pairs. If there
        /// is no '=' as in 'f' above then it's short for f=f.
        /// </summary>
        protected static IDictionary<string, string> ParseMap(string body)
        {
            var map = new Dictionary<string, string>();
            StringTokenizer st = new StringTokenizer(body, " \n\t");

            while (st.HasMoreTokens())
            {
                string a = st.NextToken();
                int idx = a.IndexOf('=');
                if (idx > 0)
                {
                    string k = a.Substring(0, idx - 0);
                    string v = a.Substring(idx + 1);
                    map[k] = v;
                }
                else
                {
                    map[a] = a;
                }
            }

            return map;
        }