/// <summary> /// Set the params (analyzerName only), Comma-separate list of Analyzer class names. If the Analyzer lives in /// Lucene.Net.Analysis, the name can be shortened by dropping the Lucene.Net.Analysis part of the Fully Qualified Class Name. /// <para/> /// Analyzer names may also refer to previously defined AnalyzerFactory's. /// <para/> /// Example Declaration: /// <code> /// {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, Standard.StandardAnalyzer) > /// </code> /// <para/> /// Example AnalyzerFactory usage: /// <code> /// -AnalyzerFactory(name:'whitespace tokenized',WhitespaceTokenizer) /// -NewAnalyzer('whitespace tokenized') /// </code> /// </summary> /// <param name="params">analyzerClassName, or empty for the StandardAnalyzer</param> public override void SetParams(string @params) { base.SetParams(@params); StreamTokenizer stok = new StreamTokenizer(new StringReader(@params)); stok.QuoteChar('"'); stok.QuoteChar('\''); stok.EndOfLineIsSignificant = false; stok.OrdinaryChar(','); try { while (stok.NextToken() != StreamTokenizer.TokenType_EndOfStream) { switch (stok.TokenType) { case ',': { // Do nothing break; } case '\'': case '\"': case StreamTokenizer.TokenType_Word: { analyzerNames.Add(stok.StringValue); break; } default: { throw RuntimeException.Create("Unexpected token: " + stok.ToString()); } } } } catch (Exception e) when(e.IsRuntimeException()) { if (e.Message.StartsWith("Line #", StringComparison.Ordinal)) { throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } else { throw RuntimeException.Create("Line #" + (stok.LineNumber + AlgLineNum) + ": ", e); } } catch (Exception t) when(t.IsThrowable()) { throw RuntimeException.Create("Line #" + (stok.LineNumber + AlgLineNum) + ": ", t); } }
/// <summary> /// Set the params (analyzerName only), Comma-separate list of Analyzer class names. If the Analyzer lives in /// Lucene.Net.Analysis, the name can be shortened by dropping the Lucene.Net.Analysis part of the Fully Qualified Class Name. /// <para/> /// Analyzer names may also refer to previously defined AnalyzerFactory's. /// <para/> /// Example Declaration: /// <code> /// {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, Standard.StandardAnalyzer) > /// </code> /// <para/> /// Example AnalyzerFactory usage: /// <code> /// -AnalyzerFactory(name:'whitespace tokenized',WhitespaceTokenizer) /// -NewAnalyzer('whitespace tokenized') /// </code> /// </summary> /// <param name="params">analyzerClassName, or empty for the StandardAnalyzer</param> public override void SetParams(string @params) { base.SetParams(@params); StreamTokenizer stok = new StreamTokenizer(new StringReader(@params)); stok.QuoteChar('"'); stok.QuoteChar('\''); stok.IsEOLSignificant = false; stok.OrdinaryChar(','); try { while (stok.NextToken() != StreamTokenizer.TT_EOF) { switch (stok.TokenType) { case ',': { // Do nothing break; } case '\'': case '\"': case StreamTokenizer.TT_WORD: { analyzerNames.Add(stok.StringValue); break; } default: { //throw new RuntimeException("Unexpected token: " + stok.ToString()); throw new Exception("Unexpected token: " + stok.ToString()); } } } } catch (Exception e) { if (e.Message.StartsWith("Line #", StringComparison.Ordinal)) { throw e; } else { throw new Exception("Line #" + (stok.LineNumber + AlgLineNum) + ": ", e); } } }
/// <summary> /// Read algorithm from file. /// Property examined: alt.tasks.packages == comma separated list of /// alternate Assembly names where tasks would be searched for, when not found /// in the default Assembly (that of <see cref="PerfTask"/>). /// If the same task class appears in more than one Assembly, the Assembly /// indicated first in this list will be used. /// <para/> /// The Lucene.Net implementation differs from Lucene in that all /// referenced assemblies are also scanned for the type. However, /// alt.tasks.packages may be included for assemblies that are /// not referenced in your project. /// </summary> /// <param name="runData">perf-run-data used at running the tasks.</param> /// <exception cref="Exception">if errors while parsing the algorithm.</exception> public Algorithm(PerfRunData runData) { Config config = runData.Config; taskPackages = InitTasksPackages(config); string algTxt = config.AlgorithmText; sequence = new TaskSequence(runData, null, null, false); TaskSequence currSequence = sequence; PerfTask prevTask = null; StreamTokenizer stok = new StreamTokenizer(new StringReader(algTxt)); stok.CommentChar('#'); stok.IsEOLSignificant = false; stok.QuoteChar('"'); stok.QuoteChar('\''); stok.OrdinaryChar('/'); stok.OrdinaryChar('('); stok.OrdinaryChar(')'); bool colonOk = false; bool isDisableCountNextTask = false; // only for primitive tasks currSequence.Depth = 0; while (stok.NextToken() != StreamTokenizer.TT_EOF) { switch (stok.TokenType) { case StreamTokenizer.TT_WORD: string s = stok.StringValue; PerfTask task = (PerfTask)Activator.CreateInstance(TaskClass(config, s), runData); task.AlgLineNum = stok.LineNumber; task.DisableCounting = isDisableCountNextTask; isDisableCountNextTask = false; currSequence.AddTask(task); if (task is RepSumByPrefTask) { stok.NextToken(); string prefix = stok.StringValue; if (prefix == null || prefix.Length == 0) { throw new Exception("named report prefix problem - " + stok.ToString()); } ((RepSumByPrefTask)task).SetPrefix(prefix); } // check for task param: '(' someParam ')' stok.NextToken(); if (stok.TokenType != '(') { stok.PushBack(); } else { // get params, for tasks that supports them - allow recursive parenthetical expressions stok.IsEOLSignificant = true; // Allow params tokenizer to keep track of line number StringBuilder @params = new StringBuilder(); stok.NextToken(); if (stok.TokenType != ')') { int count = 1; while (true) { switch (stok.TokenType) { case StreamTokenizer.TT_NUMBER: { @params.Append(stok.NumberValue); break; } case StreamTokenizer.TT_WORD: { @params.Append(stok.StringValue); break; } case StreamTokenizer.TT_EOF: { throw new Exception("Unexpexted EOF: - " + stok.ToString()); } case '"': case '\'': { @params.Append((char)stok.TokenType); // re-escape delimiters, if any @params.Append(stok.StringValue.Replace("" + (char)stok.TokenType, @"\" + (char)stok.TokenType)); @params.Append((char)stok.TokenType); break; } case '(': { @params.Append((char)stok.TokenType); ++count; break; } case ')': { if (--count >= 1) { // exclude final closing parenthesis @params.Append((char)stok.TokenType); } else { goto BALANCED_PARENS_BREAK; } break; } default: { @params.Append((char)stok.TokenType); break; } } stok.NextToken(); } BALANCED_PARENS_BREAK : { } } stok.IsEOLSignificant = false; string prm = @params.ToString().Trim(); if (prm.Length > 0) { task.SetParams(prm); } } // --------------------------------------- colonOk = false; prevTask = task; break; default: char c = (char)stok.TokenType; switch (c) { case ':': if (!colonOk) { throw new Exception("colon unexpexted: - " + stok.ToString()); } colonOk = false; // get repetitions number stok.NextToken(); if ((char)stok.TokenType == '*') { ((TaskSequence)prevTask).SetRepetitions(TaskSequence.REPEAT_EXHAUST); } else { if (stok.TokenType != StreamTokenizer.TT_NUMBER) { throw new Exception("expected repetitions number or XXXs: - " + stok.ToString()); } else { double num = stok.NumberValue; stok.NextToken(); if (stok.TokenType == StreamTokenizer.TT_WORD && stok.StringValue.Equals("s", StringComparison.Ordinal)) { ((TaskSequence)prevTask).SetRunTime(num); } else { stok.PushBack(); ((TaskSequence)prevTask).SetRepetitions((int)num); } } } // check for rate specification (ops/min) stok.NextToken(); if (stok.TokenType != ':') { stok.PushBack(); } else { // get rate number stok.NextToken(); if (stok.TokenType != StreamTokenizer.TT_NUMBER) { throw new Exception("expected rate number: - " + stok.ToString()); } // check for unit - min or sec, sec is default stok.NextToken(); if (stok.TokenType != '/') { stok.PushBack(); ((TaskSequence)prevTask).SetRate((int)stok.NumberValue, false); // set rate per sec } else { stok.NextToken(); if (stok.TokenType != StreamTokenizer.TT_WORD) { throw new Exception("expected rate unit: 'min' or 'sec' - " + stok.ToString()); } string unit = stok.StringValue.ToLowerInvariant(); if ("min".Equals(unit, StringComparison.Ordinal)) { ((TaskSequence)prevTask).SetRate((int)stok.NumberValue, true); // set rate per min } else if ("sec".Equals(unit, StringComparison.Ordinal)) { ((TaskSequence)prevTask).SetRate((int)stok.NumberValue, false); // set rate per sec } else { throw new Exception("expected rate unit: 'min' or 'sec' - " + stok.ToString()); } } } colonOk = false; break; case '{': case '[': // a sequence // check for sequence name string name = null; stok.NextToken(); if (stok.TokenType != '"') { stok.PushBack(); } else { name = stok.StringValue; if (stok.TokenType != '"' || name == null || name.Length == 0) { throw new Exception("sequence name problem - " + stok.ToString()); } } // start the sequence TaskSequence seq2 = new TaskSequence(runData, name, currSequence, c == '['); currSequence.AddTask(seq2); currSequence = seq2; colonOk = false; break; case '&': if (currSequence.IsParallel) { throw new Exception("Can only create background tasks within a serial task"); } stok.NextToken(); int deltaPri; if (stok.TokenType != StreamTokenizer.TT_NUMBER) { stok.PushBack(); deltaPri = 0; } else { // priority deltaPri = (int)stok.NumberValue; } if (prevTask == null) { throw new Exception("& was unexpected"); } else if (prevTask.RunInBackground) { throw new Exception("double & was unexpected"); } else { prevTask.SetRunInBackground(deltaPri); } break; case '>': currSequence.SetNoChildReport(); /* intentional fallthrough */ // end sequence colonOk = true; prevTask = currSequence; currSequence = currSequence.Parent; break; case '}': case ']': // end sequence colonOk = true; prevTask = currSequence; currSequence = currSequence.Parent; break; case '-': isDisableCountNextTask = true; break; } //switch(c) break; } //switch(stok.ttype) } if (sequence != currSequence) { throw new Exception("Unmatched sequences"); } // remove redundant top level enclosing sequences while (sequence.IsCollapsable && sequence.Repetitions == 1 && sequence.GetRate() == 0) { IList <PerfTask> t = sequence.Tasks; if (t != null && t.Count == 1) { PerfTask p = t[0]; if (p is TaskSequence) { sequence = (TaskSequence)p; continue; } } break; } }
/// <summary> /// Instantiates the given analysis factory class after pulling params from /// the given stream tokenizer, then stores the result in the appropriate /// pipeline component list. /// </summary> /// <param name="stok">Stream tokenizer from which to draw analysis factory params.</param> /// <param name="clazz">Analysis factory class to instantiate.</param> private void CreateAnalysisPipelineComponent(StreamTokenizer stok, Type clazz) { IDictionary <string, string> argMap = new Dictionary <string, string>(); bool parenthetical = false; try { while (stok.NextToken() != StreamTokenizer.TokenType_EndOfStream) { switch (stok.TokenType) { case ',': { if (parenthetical) { // Do nothing break; } else { // Finished reading this analysis factory configuration goto WHILE_LOOP_BREAK; } } case '(': { if (parenthetical) { throw RuntimeException.Create ("Line #" + GetLineNumber(stok) + ": Unexpected opening parenthesis."); } parenthetical = true; break; } case ')': { if (parenthetical) { parenthetical = false; } else { throw RuntimeException.Create ("Line #" + GetLineNumber(stok) + ": Unexpected closing parenthesis."); } break; } case StreamTokenizer.TokenType_Word: { if (!parenthetical) { throw RuntimeException.Create("Line #" + GetLineNumber(stok) + ": Unexpected token '" + stok.StringValue + "'"); } string argName = stok.StringValue; stok.NextToken(); if (stok.TokenType != ':') { throw RuntimeException.Create ("Line #" + GetLineNumber(stok) + ": Missing ':' after '" + argName + "' param to " + clazz.Name); } stok.NextToken(); string argValue = stok.StringValue; switch (stok.TokenType) { case StreamTokenizer.TokenType_Number: { argValue = stok.NumberValue.ToString(CultureInfo.InvariantCulture); // Drop the ".0" from numbers, for integer arguments argValue = TRAILING_DOT_ZERO_PATTERN.Replace(argValue, "", 1); // Intentional fall-through argMap[argName] = argValue; break; } case '"': case '\'': case StreamTokenizer.TokenType_Word: { argMap[argName] = argValue; break; } case StreamTokenizer.TokenType_EndOfStream: { throw RuntimeException.Create("Unexpected EOF: " + stok.ToString()); } default: { throw RuntimeException.Create ("Line #" + GetLineNumber(stok) + ": Unexpected token: " + stok.ToString()); } } break; } } } WHILE_LOOP_BREAK : { } if (!argMap.ContainsKey("luceneMatchVersion")) { #pragma warning disable 612, 618 argMap["luceneMatchVersion"] = LuceneVersion.LUCENE_CURRENT.ToString(); #pragma warning restore 612, 618 } AbstractAnalysisFactory instance; try { instance = (AbstractAnalysisFactory)Activator.CreateInstance(clazz, argMap); } catch (Exception e) when(e.IsException()) { throw RuntimeException.Create("Line #" + GetLineNumber(stok) + ": ", e); } if (instance is IResourceLoaderAware resourceLoaderAware) { DirectoryInfo baseDir = new DirectoryInfo(RunData.Config.Get("work.dir", "work")); resourceLoaderAware.Inform(new FilesystemResourceLoader(baseDir)); } if (typeof(CharFilterFactory).IsAssignableFrom(clazz)) { charFilterFactories.Add((CharFilterFactory)instance); } else if (typeof(TokenizerFactory).IsAssignableFrom(clazz)) { tokenizerFactory = (TokenizerFactory)instance; } else if (typeof(TokenFilterFactory).IsAssignableFrom(clazz)) { tokenFilterFactories.Add((TokenFilterFactory)instance); } } catch (Exception e) when(e.IsRuntimeException()) { if (e.Message.StartsWith("Line #", StringComparison.Ordinal)) { throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } else { throw RuntimeException.Create("Line #" + GetLineNumber(stok) + ": ", e); } } catch (Exception t) when(t.IsThrowable()) { throw RuntimeException.Create("Line #" + GetLineNumber(stok) + ": ", t); } }
/// <summary> /// Sets the params. /// Analysis component factory names may optionally include the "Factory" suffix. /// </summary> /// <param name="params"> /// analysis pipeline specification: name, (optional) positionIncrementGap, /// (optional) offsetGap, 0+ CharFilterFactory's, 1 TokenizerFactory, /// and 0+ TokenFilterFactory's /// </param> public override void SetParams(string @params) { base.SetParams(@params); ArgType expectedArgType = ArgType.ANALYZER_ARG; StreamTokenizer stok = new StreamTokenizer(new StringReader(@params)); stok.CommentChar('#'); stok.QuoteChar('"'); stok.QuoteChar('\''); stok.EndOfLineIsSignificant = false; stok.OrdinaryChar('('); stok.OrdinaryChar(')'); stok.OrdinaryChar(':'); stok.OrdinaryChar(','); try { while (stok.NextToken() != StreamTokenizer.TokenType_EndOfStream) { switch (stok.TokenType) { case ',': { // Do nothing break; } case StreamTokenizer.TokenType_Word: { if (expectedArgType.Equals(ArgType.ANALYZER_ARG)) { string argName = stok.StringValue; if (!argName.Equals("name", StringComparison.OrdinalIgnoreCase) && !argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase) && !argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase)) { throw RuntimeException.Create ("Line #" + GetLineNumber(stok) + ": Missing 'name' param to AnalyzerFactory: '" + @params + "'"); } stok.NextToken(); if (stok.TokenType != ':') { throw RuntimeException.Create ("Line #" + GetLineNumber(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory"); } stok.NextToken(); string argValue = stok.StringValue; switch (stok.TokenType) { case StreamTokenizer.TokenType_Number: { argValue = stok.NumberValue.ToString(CultureInfo.InvariantCulture); // Drop the ".0" from numbers, for integer arguments argValue = TRAILING_DOT_ZERO_PATTERN.Replace(argValue, "", 1); // Intentional fallthrough if (argName.Equals("name", StringComparison.OrdinalIgnoreCase)) { factoryName = argValue; expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER; } else { int intArgValue = 0; try { intArgValue = int.Parse(argValue, CultureInfo.InvariantCulture); } catch (Exception e) when(e.IsNumberFormatException()) { throw RuntimeException.Create ("Line #" + GetLineNumber(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e); } if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase)) { positionIncrementGap = intArgValue; } else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase)) { offsetGap = intArgValue; } } break; } case '"': case '\'': case StreamTokenizer.TokenType_Word: { if (argName.Equals("name", StringComparison.OrdinalIgnoreCase)) { factoryName = argValue; expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER; } else { int intArgValue = 0; try { intArgValue = int.Parse(argValue, CultureInfo.InvariantCulture); } catch (Exception e) when(e.IsNumberFormatException()) { throw RuntimeException.Create ("Line #" + GetLineNumber(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e); } if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase)) { positionIncrementGap = intArgValue; } else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase)) { offsetGap = intArgValue; } } break; } case StreamTokenizer.TokenType_EndOfStream: { throw RuntimeException.Create("Unexpected EOF: " + stok.ToString()); } default: { throw RuntimeException.Create ("Line #" + GetLineNumber(stok) + ": Unexpected token: " + stok.ToString()); } } } else if (expectedArgType.Equals(ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER)) { string argName = stok.StringValue; if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase) || argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase)) { stok.NextToken(); if (stok.TokenType != ':') { throw RuntimeException.Create ("Line #" + GetLineNumber(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory"); } stok.NextToken(); int intArgValue = (int)stok.NumberValue; switch (stok.TokenType) { case '"': case '\'': case StreamTokenizer.TokenType_Word: { intArgValue = 0; try { intArgValue = int.Parse(stok.StringValue.Trim(), CultureInfo.InvariantCulture); } catch (Exception e) when(e.IsNumberFormatException()) { throw RuntimeException.Create ("Line #" + GetLineNumber(stok) + ": Exception parsing " + argName + " value '" + stok.StringValue + "'", e); } // Intentional fall-through if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase)) { positionIncrementGap = intArgValue; } else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase)) { offsetGap = intArgValue; } break; } case StreamTokenizer.TokenType_Number: { if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase)) { positionIncrementGap = intArgValue; } else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase)) { offsetGap = intArgValue; } break; } case StreamTokenizer.TokenType_EndOfStream: { throw RuntimeException.Create("Unexpected EOF: " + stok.ToString()); } default: { throw RuntimeException.Create ("Line #" + GetLineNumber(stok) + ": Unexpected token: " + stok.ToString()); } } break; } try { Type clazz; clazz = LookupAnalysisClass(argName, typeof(CharFilterFactory)); CreateAnalysisPipelineComponent(stok, clazz); } catch (Exception e) when(e.IsIllegalArgumentException()) { try { Type clazz; clazz = LookupAnalysisClass(argName, typeof(TokenizerFactory)); CreateAnalysisPipelineComponent(stok, clazz); expectedArgType = ArgType.TOKENFILTER; } catch (Exception e2) when(e2.IsIllegalArgumentException()) { throw RuntimeException.Create("Line #" + GetLineNumber(stok) + ": Can't find class '" + argName + "' as CharFilterFactory or TokenizerFactory", e2); } } } else { // expectedArgType = ArgType.TOKENFILTER string className = stok.StringValue; Type clazz; try { clazz = LookupAnalysisClass(className, typeof(TokenFilterFactory)); } catch (Exception e) when(e.IsIllegalArgumentException()) { throw RuntimeException.Create ("Line #" + GetLineNumber(stok) + ": Can't find class '" + className + "' as TokenFilterFactory", e); } CreateAnalysisPipelineComponent(stok, clazz); } break; } default: { throw RuntimeException.Create("Line #" + GetLineNumber(stok) + ": Unexpected token: " + stok.ToString()); } } } } catch (Exception e) when(e.IsRuntimeException()) { if (e.Message.StartsWith("Line #", StringComparison.Ordinal)) { throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } else { throw RuntimeException.Create("Line #" + GetLineNumber(stok) + ": ", e); } } catch (Exception t) when(t.IsThrowable()) { throw RuntimeException.Create("Line #" + GetLineNumber(stok) + ": ", t); } AnalyzerFactory analyzerFactory = new AnalyzerFactory(charFilterFactories, tokenizerFactory, tokenFilterFactories) { PositionIncrementGap = positionIncrementGap, OffsetGap = offsetGap }; RunData.AnalyzerFactories[factoryName] = analyzerFactory; }
/// <summary> /// Sets the params. /// Analysis component factory names may optionally include the "Factory" suffix. /// </summary> /// <param name="params"> /// analysis pipeline specification: name, (optional) positionIncrementGap, /// (optional) offsetGap, 0+ CharFilterFactory's, 1 TokenizerFactory, /// and 0+ TokenFilterFactory's /// </param> public override void SetParams(string @params) { base.SetParams(@params); ArgType expectedArgType = ArgType.ANALYZER_ARG; StreamTokenizer stok = new StreamTokenizer(new StringReader(@params)); stok.CommentChar('#'); stok.QuoteChar('"'); stok.QuoteChar('\''); stok.IsEOLSignificant = false; stok.OrdinaryChar('('); stok.OrdinaryChar(')'); stok.OrdinaryChar(':'); stok.OrdinaryChar(','); try { while (stok.NextToken() != StreamTokenizer.TT_EOF) { switch (stok.TokenType) { case ',': { // Do nothing break; } case StreamTokenizer.TT_WORD: { if (expectedArgType.Equals(ArgType.ANALYZER_ARG)) { string argName = stok.StringValue; if (!argName.Equals("name", StringComparison.OrdinalIgnoreCase) && !argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase) && !argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase)) { throw new Exception ("Line #" + GetLineNumber(stok) + ": Missing 'name' param to AnalyzerFactory: '" + @params + "'"); } stok.NextToken(); if (stok.TokenType != ':') { throw new Exception ("Line #" + GetLineNumber(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory"); } stok.NextToken(); string argValue = stok.StringValue; switch (stok.TokenType) { case StreamTokenizer.TT_NUMBER: { argValue = stok.NumberValue.ToString(CultureInfo.InvariantCulture); // Drop the ".0" from numbers, for integer arguments argValue = TRAILING_DOT_ZERO_PATTERN.Replace(argValue, "", 1); // Intentional fallthrough if (argName.Equals("name", StringComparison.OrdinalIgnoreCase)) { factoryName = argValue; expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER; } else { int intArgValue = 0; try { intArgValue = int.Parse(argValue, CultureInfo.InvariantCulture); } catch (FormatException e) { throw new Exception ("Line #" + GetLineNumber(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e); } if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase)) { positionIncrementGap = intArgValue; } else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase)) { offsetGap = intArgValue; } } break; } case '"': case '\'': case StreamTokenizer.TT_WORD: { if (argName.Equals("name", StringComparison.OrdinalIgnoreCase)) { factoryName = argValue; expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER; } else { int intArgValue = 0; try { intArgValue = int.Parse(argValue, CultureInfo.InvariantCulture); } catch (FormatException e) { throw new Exception ("Line #" + GetLineNumber(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e); } if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase)) { positionIncrementGap = intArgValue; } else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase)) { offsetGap = intArgValue; } } break; } case StreamTokenizer.TT_EOF: { throw new Exception("Unexpected EOF: " + stok.ToString()); } default: { throw new Exception ("Line #" + GetLineNumber(stok) + ": Unexpected token: " + stok.ToString()); } } } else if (expectedArgType.Equals(ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER)) { string argName = stok.StringValue; if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase) || argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase)) { stok.NextToken(); if (stok.TokenType != ':') { throw new Exception ("Line #" + GetLineNumber(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory"); } stok.NextToken(); int intArgValue = (int)stok.NumberValue; switch (stok.TokenType) { case '"': case '\'': case StreamTokenizer.TT_WORD: { intArgValue = 0; try { intArgValue = int.Parse(stok.StringValue.Trim(), CultureInfo.InvariantCulture); } catch (FormatException e) { throw new Exception ("Line #" + GetLineNumber(stok) + ": Exception parsing " + argName + " value '" + stok.StringValue + "'", e); } // Intentional fall-through if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase)) { positionIncrementGap = intArgValue; } else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase)) { offsetGap = intArgValue; } break; } case StreamTokenizer.TT_NUMBER: { if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase)) { positionIncrementGap = intArgValue; } else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase)) { offsetGap = intArgValue; } break; } case StreamTokenizer.TT_EOF: { throw new Exception("Unexpected EOF: " + stok.ToString()); } default: { throw new Exception ("Line #" + GetLineNumber(stok) + ": Unexpected token: " + stok.ToString()); } } break; } try { Type clazz; clazz = LookupAnalysisClass(argName, typeof(CharFilterFactory)); CreateAnalysisPipelineComponent(stok, clazz); } catch (ArgumentException /*e*/) { try { Type clazz; clazz = LookupAnalysisClass(argName, typeof(TokenizerFactory)); CreateAnalysisPipelineComponent(stok, clazz); expectedArgType = ArgType.TOKENFILTER; } catch (ArgumentException e2) { throw new Exception("Line #" + GetLineNumber(stok) + ": Can't find class '" + argName + "' as CharFilterFactory or TokenizerFactory", e2); } } } else { // expectedArgType = ArgType.TOKENFILTER string className = stok.StringValue; Type clazz; try { clazz = LookupAnalysisClass(className, typeof(TokenFilterFactory)); } catch (ArgumentException e) { throw new Exception ("Line #" + GetLineNumber(stok) + ": Can't find class '" + className + "' as TokenFilterFactory", e); } CreateAnalysisPipelineComponent(stok, clazz); } break; } default: { throw new Exception("Line #" + GetLineNumber(stok) + ": Unexpected token: " + stok.ToString()); } } } } catch (Exception e) { if (e.Message.StartsWith("Line #", StringComparison.Ordinal)) { throw e; } else { throw new Exception("Line #" + GetLineNumber(stok) + ": ", e); } } AnalyzerFactory analyzerFactory = new AnalyzerFactory(charFilterFactories, tokenizerFactory, tokenFilterFactories) { PositionIncrementGap = positionIncrementGap, OffsetGap = offsetGap }; RunData.AnalyzerFactories[factoryName] = analyzerFactory; }
/// <summary> /// Lexicalizes a quote /// </summary> private void LexicalizeQuote() { //Store current line number location for backtracking int tempLine = tokenizer.GetScannedLines(); //Hold previous char for recognizing escape chars char previousChar = '\0'; bool IsString = false; //Skip " token, only text is interesting CurrentToken = tokenizer.NextToken(); //Ignore comments, due urls, etc tokenizer.SetIgnoreComments(true); //Check if this text is comment text Token[] tempArray = TokenStream.ToArray(); if (tempArray[tempArray.Length - 1].GetType() == TokenType.KEYWORD && tempArray[tempArray.Length - 1].GetValue().ToString() == "comment") { IsString = true; } //Retrieve possible quoted text StringBuilder stringBuilder = new StringBuilder(); tokenizer.SetIgnoreNumeric(true); while (tokenizer.GetCharacterValue() != '\"' || previousChar == '\\') //Scan until non escaped " found { if (CurrentToken == StreamTokenizer.EOF) { // End of file, so it wasn't a quoted part but just a single " tokenizer.SetIgnoreComments(false); tokenizer.SetIgnoreNumeric(false); //First add a single quote as token TokenStream.Add(new Token("\"", TokenType.SYMBOL, tempLine)); //Second, scan remaining string WaebricLexer tempLexer = new WaebricLexer(new StringReader(stringBuilder.ToString())); tempLexer.LexicalizeStream(); List <Token> tempTokenList = tempLexer.GetTokenList(); //Add all tokens to stream foreach (Token currentToken in tempTokenList) { TokenStream.Add(new Token(currentToken.GetValue(), currentToken.GetType(), (currentToken.GetLine() + tempLine))); } return; //Lexicalizing done } else if (tokenizer.GetCharacterValue() == '<' && !IsString) { //Embedding found, so lexicalize embedding LexicalizeEmbedding(stringBuilder.ToString()); tokenizer.SetIgnoreComments(false); tokenizer.SetIgnoreNumeric(false); return; } //Get next part and add it to stringBuilder stringBuilder.Append(tokenizer.ToString()); previousChar = tokenizer.GetCharacterValue(); CurrentToken = tokenizer.NextToken(); } tokenizer.SetIgnoreComments(false); tokenizer.SetIgnoreNumeric(false); //Check if string is correct quote text if (IsString) { if (!IsCorrectString(stringBuilder.ToString())) { throw new StreamTokenizerException("String Text containts non valid characters", tempLine); } } TokenStream.Add(new Token(stringBuilder.ToString(), TokenType.TEXT, tempLine)); //Skip " token, only text is interesting CurrentToken = tokenizer.NextToken(); }