Beispiel #1
0
        /// <summary>
        /// Set the params (analyzerName only),  Comma-separate list of Analyzer class names.  If the Analyzer lives in
        /// Lucene.Net.Analysis, the name can be shortened by dropping the Lucene.Net.Analysis part of the Fully Qualified Class Name.
        /// <para/>
        /// Analyzer names may also refer to previously defined AnalyzerFactory's.
        /// <para/>
        /// Example Declaration:
        /// <code>
        /// {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, Standard.StandardAnalyzer) >
        /// </code>
        /// <para/>
        /// Example AnalyzerFactory usage:
        /// <code>
        /// -AnalyzerFactory(name:'whitespace tokenized',WhitespaceTokenizer)
        /// -NewAnalyzer('whitespace tokenized')
        /// </code>
        /// </summary>
        /// <param name="params">analyzerClassName, or empty for the StandardAnalyzer</param>
        public override void SetParams(string @params)
        {
            base.SetParams(@params);
            StreamTokenizer stok = new StreamTokenizer(new StringReader(@params));

            stok.QuoteChar('"');
            stok.QuoteChar('\'');
            stok.EndOfLineIsSignificant = false;
            stok.OrdinaryChar(',');
            try
            {
                while (stok.NextToken() != StreamTokenizer.TokenType_EndOfStream)
                {
                    switch (stok.TokenType)
                    {
                    case ',':
                    {
                        // Do nothing
                        break;
                    }

                    case '\'':
                    case '\"':
                    case StreamTokenizer.TokenType_Word:
                    {
                        analyzerNames.Add(stok.StringValue);
                        break;
                    }

                    default:
                    {
                        throw RuntimeException.Create("Unexpected token: " + stok.ToString());
                    }
                    }
                }
            }
            catch (Exception e) when(e.IsRuntimeException())
            {
                if (e.Message.StartsWith("Line #", StringComparison.Ordinal))
                {
                    throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                }
                else
                {
                    throw RuntimeException.Create("Line #" + (stok.LineNumber + AlgLineNum) + ": ", e);
                }
            }
            catch (Exception t) when(t.IsThrowable())
            {
                throw RuntimeException.Create("Line #" + (stok.LineNumber + AlgLineNum) + ": ", t);
            }
        }
Beispiel #2
0
        /// <summary>
        /// Set the params (analyzerName only),  Comma-separate list of Analyzer class names.  If the Analyzer lives in
        /// Lucene.Net.Analysis, the name can be shortened by dropping the Lucene.Net.Analysis part of the Fully Qualified Class Name.
        /// <para/>
        /// Analyzer names may also refer to previously defined AnalyzerFactory's.
        /// <para/>
        /// Example Declaration:
        /// <code>
        /// {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, Standard.StandardAnalyzer) >
        /// </code>
        /// <para/>
        /// Example AnalyzerFactory usage:
        /// <code>
        /// -AnalyzerFactory(name:'whitespace tokenized',WhitespaceTokenizer)
        /// -NewAnalyzer('whitespace tokenized')
        /// </code>
        /// </summary>
        /// <param name="params">analyzerClassName, or empty for the StandardAnalyzer</param>
        public override void SetParams(string @params)
        {
            base.SetParams(@params);
            StreamTokenizer stok = new StreamTokenizer(new StringReader(@params));

            stok.QuoteChar('"');
            stok.QuoteChar('\'');
            stok.IsEOLSignificant = false;
            stok.OrdinaryChar(',');
            try
            {
                while (stok.NextToken() != StreamTokenizer.TT_EOF)
                {
                    switch (stok.TokenType)
                    {
                    case ',':
                    {
                        // Do nothing
                        break;
                    }

                    case '\'':
                    case '\"':
                    case StreamTokenizer.TT_WORD:
                    {
                        analyzerNames.Add(stok.StringValue);
                        break;
                    }

                    default:
                    {
                        //throw new RuntimeException("Unexpected token: " + stok.ToString());
                        throw new Exception("Unexpected token: " + stok.ToString());
                    }
                    }
                }
            }
            catch (Exception e)
            {
                if (e.Message.StartsWith("Line #", StringComparison.Ordinal))
                {
                    throw e;
                }
                else
                {
                    throw new Exception("Line #" + (stok.LineNumber + AlgLineNum) + ": ", e);
                }
            }
        }
Beispiel #3
0
        /// <summary>
        /// Read algorithm from file.
        /// Property examined: alt.tasks.packages == comma separated list of
        /// alternate Assembly names where tasks would be searched for, when not found
        /// in the default Assembly (that of <see cref="PerfTask"/>).
        /// If the same task class appears in more than one Assembly, the Assembly
        /// indicated first in this list will be used.
        /// <para/>
        /// The Lucene.Net implementation differs from Lucene in that all
        /// referenced assemblies are also scanned for the type. However,
        /// alt.tasks.packages may be included for assemblies that are
        /// not referenced in your project.
        /// </summary>
        /// <param name="runData">perf-run-data used at running the tasks.</param>
        /// <exception cref="Exception">if errors while parsing the algorithm.</exception>
        public Algorithm(PerfRunData runData)
        {
            Config config = runData.Config;

            taskPackages = InitTasksPackages(config);
            string algTxt = config.AlgorithmText;

            sequence = new TaskSequence(runData, null, null, false);
            TaskSequence    currSequence = sequence;
            PerfTask        prevTask     = null;
            StreamTokenizer stok         = new StreamTokenizer(new StringReader(algTxt));

            stok.CommentChar('#');
            stok.IsEOLSignificant = false;
            stok.QuoteChar('"');
            stok.QuoteChar('\'');
            stok.OrdinaryChar('/');
            stok.OrdinaryChar('(');
            stok.OrdinaryChar(')');
            bool colonOk = false;
            bool isDisableCountNextTask = false; // only for primitive tasks

            currSequence.Depth = 0;

            while (stok.NextToken() != StreamTokenizer.TT_EOF)
            {
                switch (stok.TokenType)
                {
                case StreamTokenizer.TT_WORD:
                    string   s    = stok.StringValue;
                    PerfTask task = (PerfTask)Activator.CreateInstance(TaskClass(config, s), runData);
                    task.AlgLineNum        = stok.LineNumber;
                    task.DisableCounting   = isDisableCountNextTask;
                    isDisableCountNextTask = false;
                    currSequence.AddTask(task);
                    if (task is RepSumByPrefTask)
                    {
                        stok.NextToken();
                        string prefix = stok.StringValue;
                        if (prefix == null || prefix.Length == 0)
                        {
                            throw new Exception("named report prefix problem - " + stok.ToString());
                        }
                        ((RepSumByPrefTask)task).SetPrefix(prefix);
                    }
                    // check for task param: '(' someParam ')'
                    stok.NextToken();
                    if (stok.TokenType != '(')
                    {
                        stok.PushBack();
                    }
                    else
                    {
                        // get params, for tasks that supports them - allow recursive parenthetical expressions
                        stok.IsEOLSignificant = true;      // Allow params tokenizer to keep track of line number
                        StringBuilder @params = new StringBuilder();
                        stok.NextToken();
                        if (stok.TokenType != ')')
                        {
                            int count = 1;
                            while (true)
                            {
                                switch (stok.TokenType)
                                {
                                case StreamTokenizer.TT_NUMBER:
                                {
                                    @params.Append(stok.NumberValue);
                                    break;
                                }

                                case StreamTokenizer.TT_WORD:
                                {
                                    @params.Append(stok.StringValue);
                                    break;
                                }

                                case StreamTokenizer.TT_EOF:
                                {
                                    throw new Exception("Unexpexted EOF: - " + stok.ToString());
                                }

                                case '"':
                                case '\'':
                                {
                                    @params.Append((char)stok.TokenType);
                                    // re-escape delimiters, if any
                                    @params.Append(stok.StringValue.Replace("" + (char)stok.TokenType, @"\" + (char)stok.TokenType));
                                    @params.Append((char)stok.TokenType);
                                    break;
                                }

                                case '(':
                                {
                                    @params.Append((char)stok.TokenType);
                                    ++count;
                                    break;
                                }

                                case ')':
                                {
                                    if (--count >= 1)
                                    {              // exclude final closing parenthesis
                                        @params.Append((char)stok.TokenType);
                                    }
                                    else
                                    {
                                        goto BALANCED_PARENS_BREAK;
                                    }
                                    break;
                                }

                                default:
                                {
                                    @params.Append((char)stok.TokenType);
                                    break;
                                }
                                }
                                stok.NextToken();
                            }
                            BALANCED_PARENS_BREAK : { }
                        }
                        stok.IsEOLSignificant = false;
                        string prm = @params.ToString().Trim();
                        if (prm.Length > 0)
                        {
                            task.SetParams(prm);
                        }
                    }

                    // ---------------------------------------
                    colonOk = false; prevTask = task;
                    break;

                default:
                    char c = (char)stok.TokenType;

                    switch (c)
                    {
                    case ':':
                        if (!colonOk)
                        {
                            throw new Exception("colon unexpexted: - " + stok.ToString());
                        }
                        colonOk = false;
                        // get repetitions number
                        stok.NextToken();
                        if ((char)stok.TokenType == '*')
                        {
                            ((TaskSequence)prevTask).SetRepetitions(TaskSequence.REPEAT_EXHAUST);
                        }
                        else
                        {
                            if (stok.TokenType != StreamTokenizer.TT_NUMBER)
                            {
                                throw new Exception("expected repetitions number or XXXs: - " + stok.ToString());
                            }
                            else
                            {
                                double num = stok.NumberValue;
                                stok.NextToken();
                                if (stok.TokenType == StreamTokenizer.TT_WORD && stok.StringValue.Equals("s", StringComparison.Ordinal))
                                {
                                    ((TaskSequence)prevTask).SetRunTime(num);
                                }
                                else
                                {
                                    stok.PushBack();
                                    ((TaskSequence)prevTask).SetRepetitions((int)num);
                                }
                            }
                        }
                        // check for rate specification (ops/min)
                        stok.NextToken();
                        if (stok.TokenType != ':')
                        {
                            stok.PushBack();
                        }
                        else
                        {
                            // get rate number
                            stok.NextToken();
                            if (stok.TokenType != StreamTokenizer.TT_NUMBER)
                            {
                                throw new Exception("expected rate number: - " + stok.ToString());
                            }
                            // check for unit - min or sec, sec is default
                            stok.NextToken();
                            if (stok.TokenType != '/')
                            {
                                stok.PushBack();
                                ((TaskSequence)prevTask).SetRate((int)stok.NumberValue, false);         // set rate per sec
                            }
                            else
                            {
                                stok.NextToken();
                                if (stok.TokenType != StreamTokenizer.TT_WORD)
                                {
                                    throw new Exception("expected rate unit: 'min' or 'sec' - " + stok.ToString());
                                }
                                string unit = stok.StringValue.ToLowerInvariant();
                                if ("min".Equals(unit, StringComparison.Ordinal))
                                {
                                    ((TaskSequence)prevTask).SetRate((int)stok.NumberValue, true);         // set rate per min
                                }
                                else if ("sec".Equals(unit, StringComparison.Ordinal))
                                {
                                    ((TaskSequence)prevTask).SetRate((int)stok.NumberValue, false);         // set rate per sec
                                }
                                else
                                {
                                    throw new Exception("expected rate unit: 'min' or 'sec' - " + stok.ToString());
                                }
                            }
                        }
                        colonOk = false;
                        break;

                    case '{':
                    case '[':
                        // a sequence
                        // check for sequence name
                        string name = null;
                        stok.NextToken();
                        if (stok.TokenType != '"')
                        {
                            stok.PushBack();
                        }
                        else
                        {
                            name = stok.StringValue;
                            if (stok.TokenType != '"' || name == null || name.Length == 0)
                            {
                                throw new Exception("sequence name problem - " + stok.ToString());
                            }
                        }
                        // start the sequence
                        TaskSequence seq2 = new TaskSequence(runData, name, currSequence, c == '[');
                        currSequence.AddTask(seq2);
                        currSequence = seq2;
                        colonOk      = false;
                        break;

                    case '&':
                        if (currSequence.IsParallel)
                        {
                            throw new Exception("Can only create background tasks within a serial task");
                        }
                        stok.NextToken();
                        int deltaPri;
                        if (stok.TokenType != StreamTokenizer.TT_NUMBER)
                        {
                            stok.PushBack();
                            deltaPri = 0;
                        }
                        else
                        {
                            // priority
                            deltaPri = (int)stok.NumberValue;
                        }

                        if (prevTask == null)
                        {
                            throw new Exception("& was unexpected");
                        }
                        else if (prevTask.RunInBackground)
                        {
                            throw new Exception("double & was unexpected");
                        }
                        else
                        {
                            prevTask.SetRunInBackground(deltaPri);
                        }
                        break;

                    case '>':
                        currSequence.SetNoChildReport();         /* intentional fallthrough */
                        // end sequence
                        colonOk      = true; prevTask = currSequence;
                        currSequence = currSequence.Parent;
                        break;

                    case '}':
                    case ']':
                        // end sequence
                        colonOk      = true; prevTask = currSequence;
                        currSequence = currSequence.Parent;
                        break;

                    case '-':
                        isDisableCountNextTask = true;
                        break;
                    }     //switch(c)
                    break;
                } //switch(stok.ttype)
            }

            if (sequence != currSequence)
            {
                throw new Exception("Unmatched sequences");
            }

            // remove redundant top level enclosing sequences
            while (sequence.IsCollapsable && sequence.Repetitions == 1 && sequence.GetRate() == 0)
            {
                IList <PerfTask> t = sequence.Tasks;
                if (t != null && t.Count == 1)
                {
                    PerfTask p = t[0];
                    if (p is TaskSequence)
                    {
                        sequence = (TaskSequence)p;
                        continue;
                    }
                }
                break;
            }
        }
Beispiel #4
0
        /// <summary>
        /// Instantiates the given analysis factory class after pulling params from
        /// the given stream tokenizer, then stores the result in the appropriate
        /// pipeline component list.
        /// </summary>
        /// <param name="stok">Stream tokenizer from which to draw analysis factory params.</param>
        /// <param name="clazz">Analysis factory class to instantiate.</param>
        private void CreateAnalysisPipelineComponent(StreamTokenizer stok, Type clazz)
        {
            IDictionary <string, string> argMap = new Dictionary <string, string>();
            bool parenthetical = false;

            try
            {
                while (stok.NextToken() != StreamTokenizer.TokenType_EndOfStream)
                {
                    switch (stok.TokenType)
                    {
                    case ',':
                    {
                        if (parenthetical)
                        {
                            // Do nothing
                            break;
                        }
                        else
                        {
                            // Finished reading this analysis factory configuration
                            goto WHILE_LOOP_BREAK;
                        }
                    }

                    case '(':
                    {
                        if (parenthetical)
                        {
                            throw RuntimeException.Create
                                      ("Line #" + GetLineNumber(stok) + ": Unexpected opening parenthesis.");
                        }
                        parenthetical = true;
                        break;
                    }

                    case ')':
                    {
                        if (parenthetical)
                        {
                            parenthetical = false;
                        }
                        else
                        {
                            throw RuntimeException.Create
                                      ("Line #" + GetLineNumber(stok) + ": Unexpected closing parenthesis.");
                        }
                        break;
                    }

                    case StreamTokenizer.TokenType_Word:
                    {
                        if (!parenthetical)
                        {
                            throw RuntimeException.Create("Line #" + GetLineNumber(stok) + ": Unexpected token '" + stok.StringValue + "'");
                        }
                        string argName = stok.StringValue;
                        stok.NextToken();
                        if (stok.TokenType != ':')
                        {
                            throw RuntimeException.Create
                                      ("Line #" + GetLineNumber(stok) + ": Missing ':' after '" + argName + "' param to " + clazz.Name);
                        }
                        stok.NextToken();
                        string argValue = stok.StringValue;
                        switch (stok.TokenType)
                        {
                        case StreamTokenizer.TokenType_Number:
                        {
                            argValue = stok.NumberValue.ToString(CultureInfo.InvariantCulture);
                            // Drop the ".0" from numbers, for integer arguments
                            argValue = TRAILING_DOT_ZERO_PATTERN.Replace(argValue, "", 1);
                            // Intentional fall-through
                            argMap[argName] = argValue;
                            break;
                        }

                        case '"':
                        case '\'':
                        case StreamTokenizer.TokenType_Word:
                        {
                            argMap[argName] = argValue;
                            break;
                        }

                        case StreamTokenizer.TokenType_EndOfStream:
                        {
                            throw RuntimeException.Create("Unexpected EOF: " + stok.ToString());
                        }

                        default:
                        {
                            throw RuntimeException.Create
                                      ("Line #" + GetLineNumber(stok) + ": Unexpected token: " + stok.ToString());
                        }
                        }
                        break;
                    }
                    }
                }
                WHILE_LOOP_BREAK : { }

                if (!argMap.ContainsKey("luceneMatchVersion"))
                {
#pragma warning disable 612, 618
                    argMap["luceneMatchVersion"] = LuceneVersion.LUCENE_CURRENT.ToString();
#pragma warning restore 612, 618
                }
                AbstractAnalysisFactory instance;
                try
                {
                    instance = (AbstractAnalysisFactory)Activator.CreateInstance(clazz, argMap);
                }
                catch (Exception e) when(e.IsException())
                {
                    throw RuntimeException.Create("Line #" + GetLineNumber(stok) + ": ", e);
                }
                if (instance is IResourceLoaderAware resourceLoaderAware)
                {
                    DirectoryInfo baseDir = new DirectoryInfo(RunData.Config.Get("work.dir", "work"));
                    resourceLoaderAware.Inform(new FilesystemResourceLoader(baseDir));
                }
                if (typeof(CharFilterFactory).IsAssignableFrom(clazz))
                {
                    charFilterFactories.Add((CharFilterFactory)instance);
                }
                else if (typeof(TokenizerFactory).IsAssignableFrom(clazz))
                {
                    tokenizerFactory = (TokenizerFactory)instance;
                }
                else if (typeof(TokenFilterFactory).IsAssignableFrom(clazz))
                {
                    tokenFilterFactories.Add((TokenFilterFactory)instance);
                }
            }
            catch (Exception e) when(e.IsRuntimeException())
            {
                if (e.Message.StartsWith("Line #", StringComparison.Ordinal))
                {
                    throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                }
                else
                {
                    throw RuntimeException.Create("Line #" + GetLineNumber(stok) + ": ", e);
                }
            }
            catch (Exception t) when(t.IsThrowable())
            {
                throw RuntimeException.Create("Line #" + GetLineNumber(stok) + ": ", t);
            }
        }
Beispiel #5
0
        /// <summary>
        /// Sets the params.
        /// Analysis component factory names may optionally include the "Factory" suffix.
        /// </summary>
        /// <param name="params">
        /// analysis pipeline specification: name, (optional) positionIncrementGap,
        /// (optional) offsetGap, 0+ CharFilterFactory's, 1 TokenizerFactory,
        /// and 0+ TokenFilterFactory's
        /// </param>
        public override void SetParams(string @params)
        {
            base.SetParams(@params);
            ArgType expectedArgType = ArgType.ANALYZER_ARG;

            StreamTokenizer stok = new StreamTokenizer(new StringReader(@params));

            stok.CommentChar('#');
            stok.QuoteChar('"');
            stok.QuoteChar('\'');
            stok.EndOfLineIsSignificant = false;
            stok.OrdinaryChar('(');
            stok.OrdinaryChar(')');
            stok.OrdinaryChar(':');
            stok.OrdinaryChar(',');
            try
            {
                while (stok.NextToken() != StreamTokenizer.TokenType_EndOfStream)
                {
                    switch (stok.TokenType)
                    {
                    case ',':
                    {
                        // Do nothing
                        break;
                    }

                    case StreamTokenizer.TokenType_Word:
                    {
                        if (expectedArgType.Equals(ArgType.ANALYZER_ARG))
                        {
                            string argName = stok.StringValue;
                            if (!argName.Equals("name", StringComparison.OrdinalIgnoreCase) &&
                                !argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase) &&
                                !argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                            {
                                throw RuntimeException.Create
                                          ("Line #" + GetLineNumber(stok) + ": Missing 'name' param to AnalyzerFactory: '" + @params + "'");
                            }
                            stok.NextToken();
                            if (stok.TokenType != ':')
                            {
                                throw RuntimeException.Create
                                          ("Line #" + GetLineNumber(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
                            }

                            stok.NextToken();
                            string argValue = stok.StringValue;
                            switch (stok.TokenType)
                            {
                            case StreamTokenizer.TokenType_Number:
                            {
                                argValue = stok.NumberValue.ToString(CultureInfo.InvariantCulture);
                                // Drop the ".0" from numbers, for integer arguments
                                argValue = TRAILING_DOT_ZERO_PATTERN.Replace(argValue, "", 1);
                                // Intentional fallthrough

                                if (argName.Equals("name", StringComparison.OrdinalIgnoreCase))
                                {
                                    factoryName     = argValue;
                                    expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER;
                                }
                                else
                                {
                                    int intArgValue = 0;
                                    try
                                    {
                                        intArgValue = int.Parse(argValue, CultureInfo.InvariantCulture);
                                    }
                                    catch (Exception e) when(e.IsNumberFormatException())
                                    {
                                        throw RuntimeException.Create
                                                  ("Line #" + GetLineNumber(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e);
                                    }
                                    if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        positionIncrementGap = intArgValue;
                                    }
                                    else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        offsetGap = intArgValue;
                                    }
                                }
                                break;
                            }

                            case '"':
                            case '\'':
                            case StreamTokenizer.TokenType_Word:
                            {
                                if (argName.Equals("name", StringComparison.OrdinalIgnoreCase))
                                {
                                    factoryName     = argValue;
                                    expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER;
                                }
                                else
                                {
                                    int intArgValue = 0;
                                    try
                                    {
                                        intArgValue = int.Parse(argValue, CultureInfo.InvariantCulture);
                                    }
                                    catch (Exception e) when(e.IsNumberFormatException())
                                    {
                                        throw RuntimeException.Create
                                                  ("Line #" + GetLineNumber(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e);
                                    }
                                    if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        positionIncrementGap = intArgValue;
                                    }
                                    else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        offsetGap = intArgValue;
                                    }
                                }
                                break;
                            }

                            case StreamTokenizer.TokenType_EndOfStream:
                            {
                                throw RuntimeException.Create("Unexpected EOF: " + stok.ToString());
                            }

                            default:
                            {
                                throw RuntimeException.Create
                                          ("Line #" + GetLineNumber(stok) + ": Unexpected token: " + stok.ToString());
                            }
                            }
                        }
                        else if (expectedArgType.Equals(ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER))
                        {
                            string argName = stok.StringValue;

                            if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase) ||
                                argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                            {
                                stok.NextToken();
                                if (stok.TokenType != ':')
                                {
                                    throw RuntimeException.Create
                                              ("Line #" + GetLineNumber(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
                                }
                                stok.NextToken();
                                int intArgValue = (int)stok.NumberValue;
                                switch (stok.TokenType)
                                {
                                case '"':
                                case '\'':
                                case StreamTokenizer.TokenType_Word:
                                {
                                    intArgValue = 0;
                                    try
                                    {
                                        intArgValue = int.Parse(stok.StringValue.Trim(), CultureInfo.InvariantCulture);
                                    }
                                    catch (Exception e) when(e.IsNumberFormatException())
                                    {
                                        throw RuntimeException.Create
                                                  ("Line #" + GetLineNumber(stok) + ": Exception parsing " + argName + " value '" + stok.StringValue + "'", e);
                                    }
                                    // Intentional fall-through

                                    if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        positionIncrementGap = intArgValue;
                                    }
                                    else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        offsetGap = intArgValue;
                                    }
                                    break;
                                }

                                case StreamTokenizer.TokenType_Number:
                                {
                                    if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        positionIncrementGap = intArgValue;
                                    }
                                    else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        offsetGap = intArgValue;
                                    }
                                    break;
                                }

                                case StreamTokenizer.TokenType_EndOfStream:
                                {
                                    throw RuntimeException.Create("Unexpected EOF: " + stok.ToString());
                                }

                                default:
                                {
                                    throw RuntimeException.Create
                                              ("Line #" + GetLineNumber(stok) + ": Unexpected token: " + stok.ToString());
                                }
                                }
                                break;
                            }
                            try
                            {
                                Type clazz;
                                clazz = LookupAnalysisClass(argName, typeof(CharFilterFactory));
                                CreateAnalysisPipelineComponent(stok, clazz);
                            }
                            catch (Exception e) when(e.IsIllegalArgumentException())
                            {
                                try
                                {
                                    Type clazz;
                                    clazz = LookupAnalysisClass(argName, typeof(TokenizerFactory));
                                    CreateAnalysisPipelineComponent(stok, clazz);
                                    expectedArgType = ArgType.TOKENFILTER;
                                }
                                catch (Exception e2) when(e2.IsIllegalArgumentException())
                                {
                                    throw RuntimeException.Create("Line #" + GetLineNumber(stok) + ": Can't find class '"
                                                                  + argName + "' as CharFilterFactory or TokenizerFactory", e2);
                                }
                            }
                        }
                        else
                        {         // expectedArgType = ArgType.TOKENFILTER
                            string className = stok.StringValue;
                            Type   clazz;
                            try
                            {
                                clazz = LookupAnalysisClass(className, typeof(TokenFilterFactory));
                            }
                            catch (Exception e) when(e.IsIllegalArgumentException())
                            {
                                throw RuntimeException.Create
                                          ("Line #" + GetLineNumber(stok) + ": Can't find class '" + className + "' as TokenFilterFactory", e);
                            }
                            CreateAnalysisPipelineComponent(stok, clazz);
                        }
                        break;
                    }

                    default:
                    {
                        throw RuntimeException.Create("Line #" + GetLineNumber(stok) + ": Unexpected token: " + stok.ToString());
                    }
                    }
                }
            }
            catch (Exception e) when(e.IsRuntimeException())
            {
                if (e.Message.StartsWith("Line #", StringComparison.Ordinal))
                {
                    throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                }
                else
                {
                    throw RuntimeException.Create("Line #" + GetLineNumber(stok) + ": ", e);
                }
            }
            catch (Exception t) when(t.IsThrowable())
            {
                throw RuntimeException.Create("Line #" + GetLineNumber(stok) + ": ", t);
            }

            AnalyzerFactory analyzerFactory = new AnalyzerFactory(charFilterFactories, tokenizerFactory, tokenFilterFactories)
            {
                PositionIncrementGap = positionIncrementGap,
                OffsetGap            = offsetGap
            };

            RunData.AnalyzerFactories[factoryName] = analyzerFactory;
        }
Beispiel #6
0
        /// <summary>
        /// Sets the params.
        /// Analysis component factory names may optionally include the "Factory" suffix.
        /// </summary>
        /// <param name="params">
        /// analysis pipeline specification: name, (optional) positionIncrementGap,
        /// (optional) offsetGap, 0+ CharFilterFactory's, 1 TokenizerFactory,
        /// and 0+ TokenFilterFactory's
        /// </param>
        public override void SetParams(string @params)
        {
            base.SetParams(@params);
            ArgType expectedArgType = ArgType.ANALYZER_ARG;

            StreamTokenizer stok = new StreamTokenizer(new StringReader(@params));

            stok.CommentChar('#');
            stok.QuoteChar('"');
            stok.QuoteChar('\'');
            stok.IsEOLSignificant = false;
            stok.OrdinaryChar('(');
            stok.OrdinaryChar(')');
            stok.OrdinaryChar(':');
            stok.OrdinaryChar(',');
            try
            {
                while (stok.NextToken() != StreamTokenizer.TT_EOF)
                {
                    switch (stok.TokenType)
                    {
                    case ',':
                    {
                        // Do nothing
                        break;
                    }

                    case StreamTokenizer.TT_WORD:
                    {
                        if (expectedArgType.Equals(ArgType.ANALYZER_ARG))
                        {
                            string argName = stok.StringValue;
                            if (!argName.Equals("name", StringComparison.OrdinalIgnoreCase) &&
                                !argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase) &&
                                !argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                            {
                                throw new Exception
                                          ("Line #" + GetLineNumber(stok) + ": Missing 'name' param to AnalyzerFactory: '" + @params + "'");
                            }
                            stok.NextToken();
                            if (stok.TokenType != ':')
                            {
                                throw new Exception
                                          ("Line #" + GetLineNumber(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
                            }

                            stok.NextToken();
                            string argValue = stok.StringValue;
                            switch (stok.TokenType)
                            {
                            case StreamTokenizer.TT_NUMBER:
                            {
                                argValue = stok.NumberValue.ToString(CultureInfo.InvariantCulture);
                                // Drop the ".0" from numbers, for integer arguments
                                argValue = TRAILING_DOT_ZERO_PATTERN.Replace(argValue, "", 1);
                                // Intentional fallthrough

                                if (argName.Equals("name", StringComparison.OrdinalIgnoreCase))
                                {
                                    factoryName     = argValue;
                                    expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER;
                                }
                                else
                                {
                                    int intArgValue = 0;
                                    try
                                    {
                                        intArgValue = int.Parse(argValue, CultureInfo.InvariantCulture);
                                    }
                                    catch (FormatException e)
                                    {
                                        throw new Exception
                                                  ("Line #" + GetLineNumber(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e);
                                    }
                                    if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        positionIncrementGap = intArgValue;
                                    }
                                    else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        offsetGap = intArgValue;
                                    }
                                }
                                break;
                            }

                            case '"':
                            case '\'':
                            case StreamTokenizer.TT_WORD:
                            {
                                if (argName.Equals("name", StringComparison.OrdinalIgnoreCase))
                                {
                                    factoryName     = argValue;
                                    expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER;
                                }
                                else
                                {
                                    int intArgValue = 0;
                                    try
                                    {
                                        intArgValue = int.Parse(argValue, CultureInfo.InvariantCulture);
                                    }
                                    catch (FormatException e)
                                    {
                                        throw new Exception
                                                  ("Line #" + GetLineNumber(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e);
                                    }
                                    if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        positionIncrementGap = intArgValue;
                                    }
                                    else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        offsetGap = intArgValue;
                                    }
                                }
                                break;
                            }

                            case StreamTokenizer.TT_EOF:
                            {
                                throw new Exception("Unexpected EOF: " + stok.ToString());
                            }

                            default:
                            {
                                throw new Exception
                                          ("Line #" + GetLineNumber(stok) + ": Unexpected token: " + stok.ToString());
                            }
                            }
                        }
                        else if (expectedArgType.Equals(ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER))
                        {
                            string argName = stok.StringValue;

                            if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase) ||
                                argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                            {
                                stok.NextToken();
                                if (stok.TokenType != ':')
                                {
                                    throw new Exception
                                              ("Line #" + GetLineNumber(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
                                }
                                stok.NextToken();
                                int intArgValue = (int)stok.NumberValue;
                                switch (stok.TokenType)
                                {
                                case '"':
                                case '\'':
                                case StreamTokenizer.TT_WORD:
                                {
                                    intArgValue = 0;
                                    try
                                    {
                                        intArgValue = int.Parse(stok.StringValue.Trim(), CultureInfo.InvariantCulture);
                                    }
                                    catch (FormatException e)
                                    {
                                        throw new Exception
                                                  ("Line #" + GetLineNumber(stok) + ": Exception parsing " + argName + " value '" + stok.StringValue + "'", e);
                                    }
                                    // Intentional fall-through

                                    if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        positionIncrementGap = intArgValue;
                                    }
                                    else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        offsetGap = intArgValue;
                                    }
                                    break;
                                }

                                case StreamTokenizer.TT_NUMBER:
                                {
                                    if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        positionIncrementGap = intArgValue;
                                    }
                                    else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        offsetGap = intArgValue;
                                    }
                                    break;
                                }

                                case StreamTokenizer.TT_EOF:
                                {
                                    throw new Exception("Unexpected EOF: " + stok.ToString());
                                }

                                default:
                                {
                                    throw new Exception
                                              ("Line #" + GetLineNumber(stok) + ": Unexpected token: " + stok.ToString());
                                }
                                }
                                break;
                            }
                            try
                            {
                                Type clazz;
                                clazz = LookupAnalysisClass(argName, typeof(CharFilterFactory));
                                CreateAnalysisPipelineComponent(stok, clazz);
                            }
                            catch (ArgumentException /*e*/)
                            {
                                try
                                {
                                    Type clazz;
                                    clazz = LookupAnalysisClass(argName, typeof(TokenizerFactory));
                                    CreateAnalysisPipelineComponent(stok, clazz);
                                    expectedArgType = ArgType.TOKENFILTER;
                                }
                                catch (ArgumentException e2)
                                {
                                    throw new Exception("Line #" + GetLineNumber(stok) + ": Can't find class '"
                                                        + argName + "' as CharFilterFactory or TokenizerFactory", e2);
                                }
                            }
                        }
                        else
                        {         // expectedArgType = ArgType.TOKENFILTER
                            string className = stok.StringValue;
                            Type   clazz;
                            try
                            {
                                clazz = LookupAnalysisClass(className, typeof(TokenFilterFactory));
                            }
                            catch (ArgumentException e)
                            {
                                throw new Exception
                                          ("Line #" + GetLineNumber(stok) + ": Can't find class '" + className + "' as TokenFilterFactory", e);
                            }
                            CreateAnalysisPipelineComponent(stok, clazz);
                        }
                        break;
                    }

                    default:
                    {
                        throw new Exception("Line #" + GetLineNumber(stok) + ": Unexpected token: " + stok.ToString());
                    }
                    }
                }
            }
            catch (Exception e)
            {
                if (e.Message.StartsWith("Line #", StringComparison.Ordinal))
                {
                    throw e;
                }
                else
                {
                    throw new Exception("Line #" + GetLineNumber(stok) + ": ", e);
                }
            }

            AnalyzerFactory analyzerFactory = new AnalyzerFactory(charFilterFactories, tokenizerFactory, tokenFilterFactories)
            {
                PositionIncrementGap = positionIncrementGap,
                OffsetGap            = offsetGap
            };

            RunData.AnalyzerFactories[factoryName] = analyzerFactory;
        }
Beispiel #7
0
        /// <summary>
        /// Lexicalizes a quote
        /// </summary>
        private void LexicalizeQuote()
        {
            //Store current line number location for backtracking
            int tempLine = tokenizer.GetScannedLines();
            //Hold previous char for recognizing escape chars
            char previousChar = '\0';
            bool IsString     = false;

            //Skip " token, only text is interesting
            CurrentToken = tokenizer.NextToken();

            //Ignore comments, due urls, etc
            tokenizer.SetIgnoreComments(true);

            //Check if this text is comment text
            Token[] tempArray = TokenStream.ToArray();
            if (tempArray[tempArray.Length - 1].GetType() == TokenType.KEYWORD && tempArray[tempArray.Length - 1].GetValue().ToString() == "comment")
            {
                IsString = true;
            }

            //Retrieve possible quoted text
            StringBuilder stringBuilder = new StringBuilder();

            tokenizer.SetIgnoreNumeric(true);
            while (tokenizer.GetCharacterValue() != '\"' || previousChar == '\\') //Scan until non escaped " found
            {
                if (CurrentToken == StreamTokenizer.EOF)
                {   // End of file, so it wasn't a quoted part but just a single "
                    tokenizer.SetIgnoreComments(false);
                    tokenizer.SetIgnoreNumeric(false);

                    //First add a single quote as token
                    TokenStream.Add(new Token("\"", TokenType.SYMBOL, tempLine));

                    //Second, scan remaining string
                    WaebricLexer tempLexer = new WaebricLexer(new StringReader(stringBuilder.ToString()));
                    tempLexer.LexicalizeStream();
                    List <Token> tempTokenList = tempLexer.GetTokenList();

                    //Add all tokens to stream
                    foreach (Token currentToken in tempTokenList)
                    {
                        TokenStream.Add(new Token(currentToken.GetValue(), currentToken.GetType(), (currentToken.GetLine() + tempLine)));
                    }

                    return; //Lexicalizing done
                }
                else if (tokenizer.GetCharacterValue() == '<' && !IsString)
                { //Embedding found, so lexicalize embedding
                    LexicalizeEmbedding(stringBuilder.ToString());
                    tokenizer.SetIgnoreComments(false);
                    tokenizer.SetIgnoreNumeric(false);
                    return;
                }

                //Get next part and add it to stringBuilder
                stringBuilder.Append(tokenizer.ToString());

                previousChar = tokenizer.GetCharacterValue();
                CurrentToken = tokenizer.NextToken();
            }
            tokenizer.SetIgnoreComments(false);
            tokenizer.SetIgnoreNumeric(false);
            //Check if string is correct quote text
            if (IsString)
            {
                if (!IsCorrectString(stringBuilder.ToString()))
                {
                    throw new StreamTokenizerException("String Text containts non valid characters", tempLine);
                }
            }

            TokenStream.Add(new Token(stringBuilder.ToString(), TokenType.TEXT, tempLine));

            //Skip " token, only text is interesting
            CurrentToken = tokenizer.NextToken();
        }