/** * Get Test Queries. The InputStream is closed. */ public static IEnumerator<SpatialTestQuery> GetTestQueries( SpatialArgsParser parser, SpatialContext ctx, string name, Stream @in) { List<SpatialTestQuery> results = new List<SpatialTestQuery>(); TextReader bufInput = new StreamReader(@in, Encoding.UTF8); try { String line; for (int lineNumber = 1; (line = bufInput.ReadLine()) != null; lineNumber++) { SpatialTestQuery test = new SpatialTestQuery(); test.line = line; test.lineNumber = lineNumber; try { // skip a comment if (line.StartsWith("[", StringComparison.Ordinal)) { int idx2 = line.IndexOf(']'); if (idx2 > 0) { line = line.Substring(idx2 + 1); } } int idx = line.IndexOf('@'); StringTokenizer st = new StringTokenizer(line.Substring(0, idx - 0)); while (st.HasMoreTokens()) { test.ids.Add(st.NextToken().Trim()); } test.args = parser.Parse(line.Substring(idx + 1).Trim(), ctx); results.Add(test); } catch (Exception ex) { throw new ApplicationException("invalid query line: " + test.line, ex); } } } finally { bufInput.Dispose(); } return results.GetEnumerator(); }
private static void AssertTrie(Trie trie, string file, bool usefull, bool storeorig) { using (TextReader @in = new StreamReader(new FileStream(file, FileMode.Open), Encoding.UTF8)) { for (string line = @in.ReadLine(); line != null; line = @in.ReadLine()) { try { line = line.ToLowerInvariant(); StringTokenizer st = new StringTokenizer(line); string stem = st.NextToken(); if (storeorig) { string cmd = (usefull) ? trie.GetFully(stem) : trie .GetLastOnPath(stem); StringBuilder stm = new StringBuilder(stem); Diff.Apply(stm, cmd); assertEquals(stem.ToLowerInvariant(), stm.ToString().ToLowerInvariant()); } while (st.HasMoreTokens()) { string token = st.NextToken(); if (token.Equals(stem)) { continue; } string cmd = (usefull) ? trie.GetFully(token) : trie .GetLastOnPath(token); StringBuilder stm = new StringBuilder(token); Diff.Apply(stm, cmd); assertEquals(stem.ToLowerInvariant(), stm.ToString().ToLowerInvariant()); } } catch (InvalidOperationException /*x*/) { // no base token (stem) on a line } } } }
/** * Entry point to the Compile application. * <p> * This program takes any number of arguments: the first is the name of the * desired stemming algorithm to use (a list is available in the package * description) , all of the rest should be the path or paths to a file or * files containing a stemmer table to compile. * * @param args the command line arguments */ public static void Main(string[] args) { if (args.Length < 1) { return; } args[0].ToUpperInvariant(); backward = args[0][0] == '-'; int qq = (backward) ? 1 : 0; bool storeorig = false; if (args[0][qq] == '0') { storeorig = true; qq++; } multi = args[0][qq] == 'M'; if (multi) { qq++; } // LUCENENET TODO: Is this any different than Encoding.UTF8? //String charset = System.getProperty("egothor.stemmer.charset", "UTF-8"); char[] optimizer = new char[args[0].Length - qq]; for (int i = 0; i < optimizer.Length; i++) { optimizer[i] = args[0][qq + i]; } for (int i = 1; i < args.Length; i++) { TextReader @in; // System.out.println("[" + args[i] + "]"); Diff diff = new Diff(); //int stems = 0; // not used int words = 0; AllocTrie(); Console.WriteLine(args[i]); using (@in = new StreamReader( new FileStream(args[i], FileMode.Open, FileAccess.Read), Encoding.UTF8)) { for (string line = @in.ReadLine(); line != null; line = @in.ReadLine()) { try { line = line.ToLowerInvariant(); StringTokenizer st = new StringTokenizer(line); string stem = st.NextToken(); if (storeorig) { trie.Add(stem, "-a"); words++; } while (st.HasMoreTokens()) { string token = st.NextToken(); if (token.Equals(stem) == false) { trie.Add(token, diff.Exec(token, stem)); words++; } } } catch (InvalidOperationException /*x*/) { // no base token (stem) on a line } } } Optimizer o = new Optimizer(); Optimizer2 o2 = new Optimizer2(); Lift l = new Lift(true); Lift e = new Lift(false); Gener g = new Gener(); for (int j = 0; j < optimizer.Length; j++) { string prefix; switch (optimizer[j]) { case 'G': trie = trie.Reduce(g); prefix = "G: "; break; case 'L': trie = trie.Reduce(l); prefix = "L: "; break; case 'E': trie = trie.Reduce(e); prefix = "E: "; break; case '2': trie = trie.Reduce(o2); prefix = "2: "; break; case '1': trie = trie.Reduce(o); prefix = "1: "; break; default: continue; } trie.PrintInfo(System.Console.Out, prefix + " "); } using (DataOutputStream os = new DataOutputStream( new FileStream(args[i] + ".out", FileMode.OpenOrCreate, FileAccess.Write))) { os.WriteUTF(args[0]); trie.Store(os); } } }
/// <summary> /// Entry point to the DiffIt application. /// <para> /// This application takes one argument, the path to a file containing a /// stemmer table. The program reads the file and generates the patch commands /// for the stems. /// </para> /// </summary> /// <param name="args">the path to a file containing a stemmer table</param> public static void Main(string[] args) { int ins = Get(0, args[0]); int del = Get(1, args[0]); int rep = Get(2, args[0]); int nop = Get(3, args[0]); for (int i = 1; i < args.Length; i++) { TextReader @in; // System.out.println("[" + args[i] + "]"); Diff diff = new Diff(ins, del, rep, nop); // LUCENENET TODO: Is using Encoding.UTF8 good enough? //String charset = System.getProperty("egothor.stemmer.charset", "UTF-8"); @in = new StreamReader(new FileStream(args[i], FileMode.Open, FileAccess.Read), Encoding.UTF8); for (string line = @in.ReadLine(); line != null; line = @in.ReadLine()) { try { line = line.ToLowerInvariant(); StringTokenizer st = new StringTokenizer(line); string stem = st.NextToken(); Console.WriteLine(stem + " -a"); while (st.HasMoreTokens()) { String token = st.NextToken(); if (token.Equals(stem) == false) { Console.WriteLine(stem + " " + diff.Exec(token, stem)); } } } catch (InvalidOperationException /*x*/) { // no base token (stem) on a line } } } }
//Helper method to construct Lucene documents used in our tests Document getDocumentFromString(String nameValuePairs) { Document result = new Document(); StringTokenizer st = new StringTokenizer(nameValuePairs, "\t="); while (st.HasMoreTokens()) { String name = st.NextToken().Trim(); if (st.HasMoreTokens()) { String value = st.NextToken().Trim(); result.Add(NewTextField(name, value, Field.Store.YES)); } } return result; }
//Helper method to construct Lucene query forms used in our test IDictionary<string, string> getPropsFromString(String nameValuePairs) { IDictionary<string, string> result = new Dictionary<string, string>(); StringTokenizer st = new StringTokenizer(nameValuePairs, "\t="); while (st.HasMoreTokens()) { String name = st.NextToken().Trim(); if (st.HasMoreTokens()) { String value = st.NextToken().Trim(); result[name] = value; } } return result; }
/// <summary> /// Parses "a=b c=d f" (whitespace separated) into name-value pairs. If there /// is no '=' as in 'f' above then it's short for f=f. /// </summary> protected static IDictionary<string, string> ParseMap(string body) { var map = new Dictionary<string, string>(); StringTokenizer st = new StringTokenizer(body, " \n\t"); while (st.HasMoreTokens()) { string a = st.NextToken(); int idx = a.IndexOf('='); if (idx > 0) { string k = a.Substring(0, idx - 0); string v = a.Substring(idx + 1); map[k] = v; } else { map[a] = a; } } return map; }