Пример #1
0
 internal Lexer(string s)
 {
     tok = new StreamTokenizer(new CharArrayReader(s.ToCharArray()));
     tok.QuoteChar('"');
     tok.ParseNumbers();
     tok.OrdinaryChar(',');
     tok.OrdinaryChar('(');
     tok.OrdinaryChar(')');
     tok.WordChars('$', '$');
     tok.WordChars('_', '_');
 }
Пример #2
0
        public static void InputGraph(IGraph inputGraph, Stream inputStream, int bufferSize,
                                      string defaultEdgeLabel, string vertexIdKey, string edgeIdKey,
                                      string edgeLabelKey)
        {
            if (inputGraph == null)
            {
                throw new ArgumentNullException(nameof(inputGraph));
            }
            if (inputStream == null)
            {
                throw new ArgumentNullException(nameof(inputStream));
            }
            if (bufferSize <= 0)
            {
                throw new ArgumentException("bufferSize must be greater than zero");
            }
            if (string.IsNullOrWhiteSpace(defaultEdgeLabel))
            {
                throw new ArgumentNullException(nameof(defaultEdgeLabel));
            }

            var graph = BatchGraph.Wrap(inputGraph, bufferSize);

            using (var r = new StreamReader(inputStream, Encoding.GetEncoding("ISO-8859-1")))
            {
                var st = new StreamTokenizer(r);

                try
                {
                    st.CommentChar(GmlTokens.CommentChar);
                    st.OrdinaryChar('[');
                    st.OrdinaryChar(']');

                    const string stringCharacters = "/\\(){}<>!£$%^&*-+=,.?:;@_`|~";
                    for (var i = 0; i < stringCharacters.Length; i++)
                    {
                        st.WordChars(stringCharacters.ElementAt(i), stringCharacters.ElementAt(i));
                    }

                    new GmlParser(graph, defaultEdgeLabel, vertexIdKey, edgeIdKey, edgeLabelKey).Parse(st);

                    graph.Commit();
                }
                catch (IOException e)
                {
                    throw new IOException(string.Concat("GML malformed line number ", st.LineNumber, ": "), e);
                }
            }
        }
Пример #3
0
        /// <summary>
        /// Set the params (analyzerName only),  Comma-separate list of Analyzer class names.  If the Analyzer lives in
        /// Lucene.Net.Analysis, the name can be shortened by dropping the Lucene.Net.Analysis part of the Fully Qualified Class Name.
        /// <para/>
        /// Analyzer names may also refer to previously defined AnalyzerFactory's.
        /// <para/>
        /// Example Declaration:
        /// <code>
        /// {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, Standard.StandardAnalyzer) >
        /// </code>
        /// <para/>
        /// Example AnalyzerFactory usage:
        /// <code>
        /// -AnalyzerFactory(name:'whitespace tokenized',WhitespaceTokenizer)
        /// -NewAnalyzer('whitespace tokenized')
        /// </code>
        /// </summary>
        /// <param name="params">analyzerClassName, or empty for the StandardAnalyzer</param>
        public override void SetParams(string @params)
        {
            base.SetParams(@params);
            StreamTokenizer stok = new StreamTokenizer(new StringReader(@params));

            stok.QuoteChar('"');
            stok.QuoteChar('\'');
            stok.EndOfLineIsSignificant = false;
            stok.OrdinaryChar(',');
            try
            {
                while (stok.NextToken() != StreamTokenizer.TokenType_EndOfStream)
                {
                    switch (stok.TokenType)
                    {
                    case ',':
                    {
                        // Do nothing
                        break;
                    }

                    case '\'':
                    case '\"':
                    case StreamTokenizer.TokenType_Word:
                    {
                        analyzerNames.Add(stok.StringValue);
                        break;
                    }

                    default:
                    {
                        throw RuntimeException.Create("Unexpected token: " + stok.ToString());
                    }
                    }
                }
            }
            catch (Exception e) when(e.IsRuntimeException())
            {
                if (e.Message.StartsWith("Line #", StringComparison.Ordinal))
                {
                    throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                }
                else
                {
                    throw RuntimeException.Create("Line #" + (stok.LineNumber + AlgLineNum) + ": ", e);
                }
            }
            catch (Exception t) when(t.IsThrowable())
            {
                throw RuntimeException.Create("Line #" + (stok.LineNumber + AlgLineNum) + ": ", t);
            }
        }
Пример #4
0
        /// <summary>
        /// Set the params (analyzerName only),  Comma-separate list of Analyzer class names.  If the Analyzer lives in
        /// Lucene.Net.Analysis, the name can be shortened by dropping the Lucene.Net.Analysis part of the Fully Qualified Class Name.
        /// <para/>
        /// Analyzer names may also refer to previously defined AnalyzerFactory's.
        /// <para/>
        /// Example Declaration:
        /// <code>
        /// {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, Standard.StandardAnalyzer) >
        /// </code>
        /// <para/>
        /// Example AnalyzerFactory usage:
        /// <code>
        /// -AnalyzerFactory(name:'whitespace tokenized',WhitespaceTokenizer)
        /// -NewAnalyzer('whitespace tokenized')
        /// </code>
        /// </summary>
        /// <param name="params">analyzerClassName, or empty for the StandardAnalyzer</param>
        public override void SetParams(string @params)
        {
            base.SetParams(@params);
            StreamTokenizer stok = new StreamTokenizer(new StringReader(@params));

            stok.QuoteChar('"');
            stok.QuoteChar('\'');
            stok.IsEOLSignificant = false;
            stok.OrdinaryChar(',');
            try
            {
                while (stok.NextToken() != StreamTokenizer.TT_EOF)
                {
                    switch (stok.TokenType)
                    {
                    case ',':
                    {
                        // Do nothing
                        break;
                    }

                    case '\'':
                    case '\"':
                    case StreamTokenizer.TT_WORD:
                    {
                        analyzerNames.Add(stok.StringValue);
                        break;
                    }

                    default:
                    {
                        //throw new RuntimeException("Unexpected token: " + stok.ToString());
                        throw new Exception("Unexpected token: " + stok.ToString());
                    }
                    }
                }
            }
            catch (Exception e)
            {
                if (e.Message.StartsWith("Line #", StringComparison.Ordinal))
                {
                    throw e;
                }
                else
                {
                    throw new Exception("Line #" + (stok.LineNumber + AlgLineNum) + ": ", e);
                }
            }
        }
Пример #5
0
        /// <summary>
        /// Read algorithm from file.
        /// Property examined: alt.tasks.packages == comma separated list of
        /// alternate Assembly names where tasks would be searched for, when not found
        /// in the default Assembly (that of <see cref="PerfTask"/>).
        /// If the same task class appears in more than one Assembly, the Assembly
        /// indicated first in this list will be used.
        /// <para/>
        /// The Lucene.Net implementation differs from Lucene in that all
        /// referenced assemblies are also scanned for the type. However,
        /// alt.tasks.packages may be included for assemblies that are
        /// not referenced in your project.
        /// </summary>
        /// <param name="runData">perf-run-data used at running the tasks.</param>
        /// <exception cref="Exception">if errors while parsing the algorithm.</exception>
        public Algorithm(PerfRunData runData)
        {
            Config config = runData.Config;

            taskPackages = InitTasksPackages(config);
            string algTxt = config.AlgorithmText;

            sequence = new TaskSequence(runData, null, null, false);
            TaskSequence    currSequence = sequence;
            PerfTask        prevTask     = null;
            StreamTokenizer stok         = new StreamTokenizer(new StringReader(algTxt));

            stok.CommentChar('#');
            stok.IsEOLSignificant = false;
            stok.QuoteChar('"');
            stok.QuoteChar('\'');
            stok.OrdinaryChar('/');
            stok.OrdinaryChar('(');
            stok.OrdinaryChar(')');
            bool colonOk = false;
            bool isDisableCountNextTask = false; // only for primitive tasks

            currSequence.Depth = 0;

            while (stok.NextToken() != StreamTokenizer.TT_EOF)
            {
                switch (stok.TokenType)
                {
                case StreamTokenizer.TT_WORD:
                    string   s    = stok.StringValue;
                    PerfTask task = (PerfTask)Activator.CreateInstance(TaskClass(config, s), runData);
                    task.AlgLineNum        = stok.LineNumber;
                    task.DisableCounting   = isDisableCountNextTask;
                    isDisableCountNextTask = false;
                    currSequence.AddTask(task);
                    if (task is RepSumByPrefTask)
                    {
                        stok.NextToken();
                        string prefix = stok.StringValue;
                        if (prefix == null || prefix.Length == 0)
                        {
                            throw new Exception("named report prefix problem - " + stok.ToString());
                        }
                        ((RepSumByPrefTask)task).SetPrefix(prefix);
                    }
                    // check for task param: '(' someParam ')'
                    stok.NextToken();
                    if (stok.TokenType != '(')
                    {
                        stok.PushBack();
                    }
                    else
                    {
                        // get params, for tasks that supports them - allow recursive parenthetical expressions
                        stok.IsEOLSignificant = true;      // Allow params tokenizer to keep track of line number
                        StringBuilder @params = new StringBuilder();
                        stok.NextToken();
                        if (stok.TokenType != ')')
                        {
                            int count = 1;
                            while (true)
                            {
                                switch (stok.TokenType)
                                {
                                case StreamTokenizer.TT_NUMBER:
                                {
                                    @params.Append(stok.NumberValue);
                                    break;
                                }

                                case StreamTokenizer.TT_WORD:
                                {
                                    @params.Append(stok.StringValue);
                                    break;
                                }

                                case StreamTokenizer.TT_EOF:
                                {
                                    throw new Exception("Unexpexted EOF: - " + stok.ToString());
                                }

                                case '"':
                                case '\'':
                                {
                                    @params.Append((char)stok.TokenType);
                                    // re-escape delimiters, if any
                                    @params.Append(stok.StringValue.Replace("" + (char)stok.TokenType, @"\" + (char)stok.TokenType));
                                    @params.Append((char)stok.TokenType);
                                    break;
                                }

                                case '(':
                                {
                                    @params.Append((char)stok.TokenType);
                                    ++count;
                                    break;
                                }

                                case ')':
                                {
                                    if (--count >= 1)
                                    {              // exclude final closing parenthesis
                                        @params.Append((char)stok.TokenType);
                                    }
                                    else
                                    {
                                        goto BALANCED_PARENS_BREAK;
                                    }
                                    break;
                                }

                                default:
                                {
                                    @params.Append((char)stok.TokenType);
                                    break;
                                }
                                }
                                stok.NextToken();
                            }
                            BALANCED_PARENS_BREAK : { }
                        }
                        stok.IsEOLSignificant = false;
                        string prm = @params.ToString().Trim();
                        if (prm.Length > 0)
                        {
                            task.SetParams(prm);
                        }
                    }

                    // ---------------------------------------
                    colonOk = false; prevTask = task;
                    break;

                default:
                    char c = (char)stok.TokenType;

                    switch (c)
                    {
                    case ':':
                        if (!colonOk)
                        {
                            throw new Exception("colon unexpexted: - " + stok.ToString());
                        }
                        colonOk = false;
                        // get repetitions number
                        stok.NextToken();
                        if ((char)stok.TokenType == '*')
                        {
                            ((TaskSequence)prevTask).SetRepetitions(TaskSequence.REPEAT_EXHAUST);
                        }
                        else
                        {
                            if (stok.TokenType != StreamTokenizer.TT_NUMBER)
                            {
                                throw new Exception("expected repetitions number or XXXs: - " + stok.ToString());
                            }
                            else
                            {
                                double num = stok.NumberValue;
                                stok.NextToken();
                                if (stok.TokenType == StreamTokenizer.TT_WORD && stok.StringValue.Equals("s", StringComparison.Ordinal))
                                {
                                    ((TaskSequence)prevTask).SetRunTime(num);
                                }
                                else
                                {
                                    stok.PushBack();
                                    ((TaskSequence)prevTask).SetRepetitions((int)num);
                                }
                            }
                        }
                        // check for rate specification (ops/min)
                        stok.NextToken();
                        if (stok.TokenType != ':')
                        {
                            stok.PushBack();
                        }
                        else
                        {
                            // get rate number
                            stok.NextToken();
                            if (stok.TokenType != StreamTokenizer.TT_NUMBER)
                            {
                                throw new Exception("expected rate number: - " + stok.ToString());
                            }
                            // check for unit - min or sec, sec is default
                            stok.NextToken();
                            if (stok.TokenType != '/')
                            {
                                stok.PushBack();
                                ((TaskSequence)prevTask).SetRate((int)stok.NumberValue, false);         // set rate per sec
                            }
                            else
                            {
                                stok.NextToken();
                                if (stok.TokenType != StreamTokenizer.TT_WORD)
                                {
                                    throw new Exception("expected rate unit: 'min' or 'sec' - " + stok.ToString());
                                }
                                string unit = stok.StringValue.ToLowerInvariant();
                                if ("min".Equals(unit, StringComparison.Ordinal))
                                {
                                    ((TaskSequence)prevTask).SetRate((int)stok.NumberValue, true);         // set rate per min
                                }
                                else if ("sec".Equals(unit, StringComparison.Ordinal))
                                {
                                    ((TaskSequence)prevTask).SetRate((int)stok.NumberValue, false);         // set rate per sec
                                }
                                else
                                {
                                    throw new Exception("expected rate unit: 'min' or 'sec' - " + stok.ToString());
                                }
                            }
                        }
                        colonOk = false;
                        break;

                    case '{':
                    case '[':
                        // a sequence
                        // check for sequence name
                        string name = null;
                        stok.NextToken();
                        if (stok.TokenType != '"')
                        {
                            stok.PushBack();
                        }
                        else
                        {
                            name = stok.StringValue;
                            if (stok.TokenType != '"' || name == null || name.Length == 0)
                            {
                                throw new Exception("sequence name problem - " + stok.ToString());
                            }
                        }
                        // start the sequence
                        TaskSequence seq2 = new TaskSequence(runData, name, currSequence, c == '[');
                        currSequence.AddTask(seq2);
                        currSequence = seq2;
                        colonOk      = false;
                        break;

                    case '&':
                        if (currSequence.IsParallel)
                        {
                            throw new Exception("Can only create background tasks within a serial task");
                        }
                        stok.NextToken();
                        int deltaPri;
                        if (stok.TokenType != StreamTokenizer.TT_NUMBER)
                        {
                            stok.PushBack();
                            deltaPri = 0;
                        }
                        else
                        {
                            // priority
                            deltaPri = (int)stok.NumberValue;
                        }

                        if (prevTask == null)
                        {
                            throw new Exception("& was unexpected");
                        }
                        else if (prevTask.RunInBackground)
                        {
                            throw new Exception("double & was unexpected");
                        }
                        else
                        {
                            prevTask.SetRunInBackground(deltaPri);
                        }
                        break;

                    case '>':
                        currSequence.SetNoChildReport();         /* intentional fallthrough */
                        // end sequence
                        colonOk      = true; prevTask = currSequence;
                        currSequence = currSequence.Parent;
                        break;

                    case '}':
                    case ']':
                        // end sequence
                        colonOk      = true; prevTask = currSequence;
                        currSequence = currSequence.Parent;
                        break;

                    case '-':
                        isDisableCountNextTask = true;
                        break;
                    }     //switch(c)
                    break;
                } //switch(stok.ttype)
            }

            if (sequence != currSequence)
            {
                throw new Exception("Unmatched sequences");
            }

            // remove redundant top level enclosing sequences
            while (sequence.IsCollapsable && sequence.Repetitions == 1 && sequence.GetRate() == 0)
            {
                IList <PerfTask> t = sequence.Tasks;
                if (t != null && t.Count == 1)
                {
                    PerfTask p = t[0];
                    if (p is TaskSequence)
                    {
                        sequence = (TaskSequence)p;
                        continue;
                    }
                }
                break;
            }
        }
Пример #6
0
        /// <summary>
        /// Sets the params.
        /// Analysis component factory names may optionally include the "Factory" suffix.
        /// </summary>
        /// <param name="params">
        /// analysis pipeline specification: name, (optional) positionIncrementGap,
        /// (optional) offsetGap, 0+ CharFilterFactory's, 1 TokenizerFactory,
        /// and 0+ TokenFilterFactory's
        /// </param>
        public override void SetParams(string @params)
        {
            base.SetParams(@params);
            ArgType expectedArgType = ArgType.ANALYZER_ARG;

            StreamTokenizer stok = new StreamTokenizer(new StringReader(@params));

            stok.CommentChar('#');
            stok.QuoteChar('"');
            stok.QuoteChar('\'');
            stok.EndOfLineIsSignificant = false;
            stok.OrdinaryChar('(');
            stok.OrdinaryChar(')');
            stok.OrdinaryChar(':');
            stok.OrdinaryChar(',');
            try
            {
                while (stok.NextToken() != StreamTokenizer.TokenType_EndOfStream)
                {
                    switch (stok.TokenType)
                    {
                    case ',':
                    {
                        // Do nothing
                        break;
                    }

                    case StreamTokenizer.TokenType_Word:
                    {
                        if (expectedArgType.Equals(ArgType.ANALYZER_ARG))
                        {
                            string argName = stok.StringValue;
                            if (!argName.Equals("name", StringComparison.OrdinalIgnoreCase) &&
                                !argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase) &&
                                !argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                            {
                                throw RuntimeException.Create
                                          ("Line #" + GetLineNumber(stok) + ": Missing 'name' param to AnalyzerFactory: '" + @params + "'");
                            }
                            stok.NextToken();
                            if (stok.TokenType != ':')
                            {
                                throw RuntimeException.Create
                                          ("Line #" + GetLineNumber(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
                            }

                            stok.NextToken();
                            string argValue = stok.StringValue;
                            switch (stok.TokenType)
                            {
                            case StreamTokenizer.TokenType_Number:
                            {
                                argValue = stok.NumberValue.ToString(CultureInfo.InvariantCulture);
                                // Drop the ".0" from numbers, for integer arguments
                                argValue = TRAILING_DOT_ZERO_PATTERN.Replace(argValue, "", 1);
                                // Intentional fallthrough

                                if (argName.Equals("name", StringComparison.OrdinalIgnoreCase))
                                {
                                    factoryName     = argValue;
                                    expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER;
                                }
                                else
                                {
                                    int intArgValue = 0;
                                    try
                                    {
                                        intArgValue = int.Parse(argValue, CultureInfo.InvariantCulture);
                                    }
                                    catch (Exception e) when(e.IsNumberFormatException())
                                    {
                                        throw RuntimeException.Create
                                                  ("Line #" + GetLineNumber(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e);
                                    }
                                    if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        positionIncrementGap = intArgValue;
                                    }
                                    else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        offsetGap = intArgValue;
                                    }
                                }
                                break;
                            }

                            case '"':
                            case '\'':
                            case StreamTokenizer.TokenType_Word:
                            {
                                if (argName.Equals("name", StringComparison.OrdinalIgnoreCase))
                                {
                                    factoryName     = argValue;
                                    expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER;
                                }
                                else
                                {
                                    int intArgValue = 0;
                                    try
                                    {
                                        intArgValue = int.Parse(argValue, CultureInfo.InvariantCulture);
                                    }
                                    catch (Exception e) when(e.IsNumberFormatException())
                                    {
                                        throw RuntimeException.Create
                                                  ("Line #" + GetLineNumber(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e);
                                    }
                                    if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        positionIncrementGap = intArgValue;
                                    }
                                    else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        offsetGap = intArgValue;
                                    }
                                }
                                break;
                            }

                            case StreamTokenizer.TokenType_EndOfStream:
                            {
                                throw RuntimeException.Create("Unexpected EOF: " + stok.ToString());
                            }

                            default:
                            {
                                throw RuntimeException.Create
                                          ("Line #" + GetLineNumber(stok) + ": Unexpected token: " + stok.ToString());
                            }
                            }
                        }
                        else if (expectedArgType.Equals(ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER))
                        {
                            string argName = stok.StringValue;

                            if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase) ||
                                argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                            {
                                stok.NextToken();
                                if (stok.TokenType != ':')
                                {
                                    throw RuntimeException.Create
                                              ("Line #" + GetLineNumber(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
                                }
                                stok.NextToken();
                                int intArgValue = (int)stok.NumberValue;
                                switch (stok.TokenType)
                                {
                                case '"':
                                case '\'':
                                case StreamTokenizer.TokenType_Word:
                                {
                                    intArgValue = 0;
                                    try
                                    {
                                        intArgValue = int.Parse(stok.StringValue.Trim(), CultureInfo.InvariantCulture);
                                    }
                                    catch (Exception e) when(e.IsNumberFormatException())
                                    {
                                        throw RuntimeException.Create
                                                  ("Line #" + GetLineNumber(stok) + ": Exception parsing " + argName + " value '" + stok.StringValue + "'", e);
                                    }
                                    // Intentional fall-through

                                    if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        positionIncrementGap = intArgValue;
                                    }
                                    else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        offsetGap = intArgValue;
                                    }
                                    break;
                                }

                                case StreamTokenizer.TokenType_Number:
                                {
                                    if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        positionIncrementGap = intArgValue;
                                    }
                                    else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        offsetGap = intArgValue;
                                    }
                                    break;
                                }

                                case StreamTokenizer.TokenType_EndOfStream:
                                {
                                    throw RuntimeException.Create("Unexpected EOF: " + stok.ToString());
                                }

                                default:
                                {
                                    throw RuntimeException.Create
                                              ("Line #" + GetLineNumber(stok) + ": Unexpected token: " + stok.ToString());
                                }
                                }
                                break;
                            }
                            try
                            {
                                Type clazz;
                                clazz = LookupAnalysisClass(argName, typeof(CharFilterFactory));
                                CreateAnalysisPipelineComponent(stok, clazz);
                            }
                            catch (Exception e) when(e.IsIllegalArgumentException())
                            {
                                try
                                {
                                    Type clazz;
                                    clazz = LookupAnalysisClass(argName, typeof(TokenizerFactory));
                                    CreateAnalysisPipelineComponent(stok, clazz);
                                    expectedArgType = ArgType.TOKENFILTER;
                                }
                                catch (Exception e2) when(e2.IsIllegalArgumentException())
                                {
                                    throw RuntimeException.Create("Line #" + GetLineNumber(stok) + ": Can't find class '"
                                                                  + argName + "' as CharFilterFactory or TokenizerFactory", e2);
                                }
                            }
                        }
                        else
                        {         // expectedArgType = ArgType.TOKENFILTER
                            string className = stok.StringValue;
                            Type   clazz;
                            try
                            {
                                clazz = LookupAnalysisClass(className, typeof(TokenFilterFactory));
                            }
                            catch (Exception e) when(e.IsIllegalArgumentException())
                            {
                                throw RuntimeException.Create
                                          ("Line #" + GetLineNumber(stok) + ": Can't find class '" + className + "' as TokenFilterFactory", e);
                            }
                            CreateAnalysisPipelineComponent(stok, clazz);
                        }
                        break;
                    }

                    default:
                    {
                        throw RuntimeException.Create("Line #" + GetLineNumber(stok) + ": Unexpected token: " + stok.ToString());
                    }
                    }
                }
            }
            catch (Exception e) when(e.IsRuntimeException())
            {
                if (e.Message.StartsWith("Line #", StringComparison.Ordinal))
                {
                    throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                }
                else
                {
                    throw RuntimeException.Create("Line #" + GetLineNumber(stok) + ": ", e);
                }
            }
            catch (Exception t) when(t.IsThrowable())
            {
                throw RuntimeException.Create("Line #" + GetLineNumber(stok) + ": ", t);
            }

            AnalyzerFactory analyzerFactory = new AnalyzerFactory(charFilterFactories, tokenizerFactory, tokenFilterFactories)
            {
                PositionIncrementGap = positionIncrementGap,
                OffsetGap            = offsetGap
            };

            RunData.AnalyzerFactories[factoryName] = analyzerFactory;
        }
Пример #7
0
        /// <exception cref="System.IO.IOException"></exception>
        public static AList<Lexeme> Tokenize(string s)
        {
            StreamTokenizer tokenizer = new StreamTokenizer(new StringReader(s));
            tokenizer.OrdinaryChar('-');
            // Don't parse minus as part of numbers.
            AList<Lexeme> tokBuf = new AList<Lexeme>();
            while (tokenizer.NextToken() != StreamTokenizer.TT_EOF)
            {
                switch (tokenizer.ttype)
                {
                    case StreamTokenizer.TT_NUMBER:
                    {
                        tokBuf.AddItem(new Lexeme(Lexeme.NUMBER, tokenizer.sval.ToString()));
                        break;
                    }

                    case StreamTokenizer.TT_WORD:
                    {
                        tokBuf.AddItem(new Lexeme(Lexeme.WORD, tokenizer.sval));
                        break;
                    }

                    default:
                    {
                        // operator
                        if ((char)tokenizer.ttype.ToString().Equals("("))
                        {
                            tokBuf.AddItem(new Lexeme(Lexeme.LPAREN, (char)tokenizer.ttype.ToString()));
                        }
                        else
                        {
                            if ((char)tokenizer.ttype.ToString().Equals(")"))
                            {
                                tokBuf.AddItem(new Lexeme(Lexeme.RPAREN, (char)tokenizer.ttype.ToString()));
                            }
                            else
                            {
                                if ((char)tokenizer.ttype.ToString().Equals(","))
                                {
                                    tokBuf.AddItem(new Lexeme(Lexeme.COMMA, (char)tokenizer.ttype.ToString()));
                                }
                                else
                                {
                                    tokBuf.AddItem(new Lexeme(Lexeme.OPERATOR, (char)tokenizer.ttype.ToString()));
                                }
                            }
                        }
                        break;
                        break;
                    }
                }
            }
            return tokBuf;
        }
Пример #8
0
        /// <summary>
        /// Sets the params.
        /// Analysis component factory names may optionally include the "Factory" suffix.
        /// </summary>
        /// <param name="params">
        /// analysis pipeline specification: name, (optional) positionIncrementGap,
        /// (optional) offsetGap, 0+ CharFilterFactory's, 1 TokenizerFactory,
        /// and 0+ TokenFilterFactory's
        /// </param>
        public override void SetParams(string @params)
        {
            base.SetParams(@params);
            ArgType expectedArgType = ArgType.ANALYZER_ARG;

            StreamTokenizer stok = new StreamTokenizer(new StringReader(@params));

            stok.CommentChar('#');
            stok.QuoteChar('"');
            stok.QuoteChar('\'');
            stok.IsEOLSignificant = false;
            stok.OrdinaryChar('(');
            stok.OrdinaryChar(')');
            stok.OrdinaryChar(':');
            stok.OrdinaryChar(',');
            try
            {
                while (stok.NextToken() != StreamTokenizer.TT_EOF)
                {
                    switch (stok.TokenType)
                    {
                    case ',':
                    {
                        // Do nothing
                        break;
                    }

                    case StreamTokenizer.TT_WORD:
                    {
                        if (expectedArgType.Equals(ArgType.ANALYZER_ARG))
                        {
                            string argName = stok.StringValue;
                            if (!argName.Equals("name", StringComparison.OrdinalIgnoreCase) &&
                                !argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase) &&
                                !argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                            {
                                throw new Exception
                                          ("Line #" + GetLineNumber(stok) + ": Missing 'name' param to AnalyzerFactory: '" + @params + "'");
                            }
                            stok.NextToken();
                            if (stok.TokenType != ':')
                            {
                                throw new Exception
                                          ("Line #" + GetLineNumber(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
                            }

                            stok.NextToken();
                            string argValue = stok.StringValue;
                            switch (stok.TokenType)
                            {
                            case StreamTokenizer.TT_NUMBER:
                            {
                                argValue = stok.NumberValue.ToString(CultureInfo.InvariantCulture);
                                // Drop the ".0" from numbers, for integer arguments
                                argValue = TRAILING_DOT_ZERO_PATTERN.Replace(argValue, "", 1);
                                // Intentional fallthrough

                                if (argName.Equals("name", StringComparison.OrdinalIgnoreCase))
                                {
                                    factoryName     = argValue;
                                    expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER;
                                }
                                else
                                {
                                    int intArgValue = 0;
                                    try
                                    {
                                        intArgValue = int.Parse(argValue, CultureInfo.InvariantCulture);
                                    }
                                    catch (FormatException e)
                                    {
                                        throw new Exception
                                                  ("Line #" + GetLineNumber(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e);
                                    }
                                    if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        positionIncrementGap = intArgValue;
                                    }
                                    else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        offsetGap = intArgValue;
                                    }
                                }
                                break;
                            }

                            case '"':
                            case '\'':
                            case StreamTokenizer.TT_WORD:
                            {
                                if (argName.Equals("name", StringComparison.OrdinalIgnoreCase))
                                {
                                    factoryName     = argValue;
                                    expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER;
                                }
                                else
                                {
                                    int intArgValue = 0;
                                    try
                                    {
                                        intArgValue = int.Parse(argValue, CultureInfo.InvariantCulture);
                                    }
                                    catch (FormatException e)
                                    {
                                        throw new Exception
                                                  ("Line #" + GetLineNumber(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e);
                                    }
                                    if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        positionIncrementGap = intArgValue;
                                    }
                                    else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        offsetGap = intArgValue;
                                    }
                                }
                                break;
                            }

                            case StreamTokenizer.TT_EOF:
                            {
                                throw new Exception("Unexpected EOF: " + stok.ToString());
                            }

                            default:
                            {
                                throw new Exception
                                          ("Line #" + GetLineNumber(stok) + ": Unexpected token: " + stok.ToString());
                            }
                            }
                        }
                        else if (expectedArgType.Equals(ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER))
                        {
                            string argName = stok.StringValue;

                            if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase) ||
                                argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                            {
                                stok.NextToken();
                                if (stok.TokenType != ':')
                                {
                                    throw new Exception
                                              ("Line #" + GetLineNumber(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
                                }
                                stok.NextToken();
                                int intArgValue = (int)stok.NumberValue;
                                switch (stok.TokenType)
                                {
                                case '"':
                                case '\'':
                                case StreamTokenizer.TT_WORD:
                                {
                                    intArgValue = 0;
                                    try
                                    {
                                        intArgValue = int.Parse(stok.StringValue.Trim(), CultureInfo.InvariantCulture);
                                    }
                                    catch (FormatException e)
                                    {
                                        throw new Exception
                                                  ("Line #" + GetLineNumber(stok) + ": Exception parsing " + argName + " value '" + stok.StringValue + "'", e);
                                    }
                                    // Intentional fall-through

                                    if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        positionIncrementGap = intArgValue;
                                    }
                                    else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        offsetGap = intArgValue;
                                    }
                                    break;
                                }

                                case StreamTokenizer.TT_NUMBER:
                                {
                                    if (argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        positionIncrementGap = intArgValue;
                                    }
                                    else if (argName.Equals("offsetGap", StringComparison.OrdinalIgnoreCase))
                                    {
                                        offsetGap = intArgValue;
                                    }
                                    break;
                                }

                                case StreamTokenizer.TT_EOF:
                                {
                                    throw new Exception("Unexpected EOF: " + stok.ToString());
                                }

                                default:
                                {
                                    throw new Exception
                                              ("Line #" + GetLineNumber(stok) + ": Unexpected token: " + stok.ToString());
                                }
                                }
                                break;
                            }
                            try
                            {
                                Type clazz;
                                clazz = LookupAnalysisClass(argName, typeof(CharFilterFactory));
                                CreateAnalysisPipelineComponent(stok, clazz);
                            }
                            catch (ArgumentException /*e*/)
                            {
                                try
                                {
                                    Type clazz;
                                    clazz = LookupAnalysisClass(argName, typeof(TokenizerFactory));
                                    CreateAnalysisPipelineComponent(stok, clazz);
                                    expectedArgType = ArgType.TOKENFILTER;
                                }
                                catch (ArgumentException e2)
                                {
                                    throw new Exception("Line #" + GetLineNumber(stok) + ": Can't find class '"
                                                        + argName + "' as CharFilterFactory or TokenizerFactory", e2);
                                }
                            }
                        }
                        else
                        {         // expectedArgType = ArgType.TOKENFILTER
                            string className = stok.StringValue;
                            Type   clazz;
                            try
                            {
                                clazz = LookupAnalysisClass(className, typeof(TokenFilterFactory));
                            }
                            catch (ArgumentException e)
                            {
                                throw new Exception
                                          ("Line #" + GetLineNumber(stok) + ": Can't find class '" + className + "' as TokenFilterFactory", e);
                            }
                            CreateAnalysisPipelineComponent(stok, clazz);
                        }
                        break;
                    }

                    default:
                    {
                        throw new Exception("Line #" + GetLineNumber(stok) + ": Unexpected token: " + stok.ToString());
                    }
                    }
                }
            }
            catch (Exception e)
            {
                if (e.Message.StartsWith("Line #", StringComparison.Ordinal))
                {
                    throw e;
                }
                else
                {
                    throw new Exception("Line #" + GetLineNumber(stok) + ": ", e);
                }
            }

            AnalyzerFactory analyzerFactory = new AnalyzerFactory(charFilterFactories, tokenizerFactory, tokenFilterFactories)
            {
                PositionIncrementGap = positionIncrementGap,
                OffsetGap            = offsetGap
            };

            RunData.AnalyzerFactories[factoryName] = analyzerFactory;
        }