public override sealed bool IncrementToken()
        {
            if (m_tokens.Count > 0)
            {
                Debug.Assert(current != null);
                CompoundToken token = m_tokens.First.Value;
                m_tokens.Remove(token);
                RestoreState(current); // keep all other attributes untouched
                m_termAtt.SetEmpty().Append(token.Text);
                m_offsetAtt.SetOffset(token.StartOffset, token.EndOffset);
                posIncAtt.PositionIncrement = 0;
                return(true);
            }

            current = null; // not really needed, but for safety
            if (m_input.IncrementToken())
            {
                // Only words longer than minWordSize get processed
                if (m_termAtt.Length >= this.m_minWordSize)
                {
                    Decompose();
                    // only capture the state if we really need it for producing new tokens
                    if (m_tokens.Count > 0)
                    {
                        current = CaptureState();
                    }
                }
                // return original token:
                return(true);
            }
            else
            {
                return(false);
            }
        }
        protected override void Decompose()
        {
            // get the hyphenation points
            Hyphenation.Hyphenation hyphens = hyphenator.Hyphenate(m_termAtt.Buffer, 0, m_termAtt.Length, 1, 1);
            // No hyphen points found -> exit
            if (hyphens is null)
            {
                return;
            }

            int[] hyp = hyphens.HyphenationPoints;

            for (int i = 0; i < hyp.Length; ++i)
            {
                int           remaining         = hyp.Length - i;
                int           start             = hyp[i];
                CompoundToken longestMatchToken = null;
                for (int j = 1; j < remaining; j++)
                {
                    int partLength = hyp[i + j] - start;

                    // if the part is longer than maxSubwordSize we
                    // are done with this round
                    if (partLength > this.m_maxSubwordSize)
                    {
                        break;
                    }

                    // we only put subwords to the token stream
                    // that are longer than minPartSize
                    if (partLength < this.m_minSubwordSize)
                    {
                        // BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
                        // calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
                        continue;
                    }

                    // check the dictionary
                    if (m_dictionary is null || m_dictionary.Contains(m_termAtt.Buffer, start, partLength))
                    {
                        if (this.m_onlyLongestMatch)
                        {
                            if (longestMatchToken != null)
                            {
                                if (longestMatchToken.Text.Length < partLength)
                                {
                                    longestMatchToken = new CompoundToken(this, start, partLength);
                                }
                            }
                            else
                            {
                                longestMatchToken = new CompoundToken(this, start, partLength);
                            }
                        }
                        else
                        {
                            m_tokens.Enqueue(new CompoundToken(this, start, partLength));
                        }
                    }
Exemplo n.º 3
0
        public string RenderToHtml(string markdown)
        {
            if (markdown == null)
            {
                throw new ArgumentNullException(nameof(markdown));
            }
            var reader   = new TokenReader(new MdLanguage(), markdown);
            var token    = new CompoundToken(reader.ReadTokens());
            var renderer = new HtmlTokenRenderer();

            return(renderer.Render(token));
        }
Exemplo n.º 4
0
        protected override void Decompose()
        {
            int len = m_termAtt.Length;

            for (int i = 0; i <= len - this.m_minSubwordSize; ++i)
            {
                CompoundToken longestMatchToken = null;
                for (int j = this.m_minSubwordSize; j <= this.m_maxSubwordSize; ++j)
                {
                    if (i + j > len)
                    {
                        break;
                    }
                    if (m_dictionary.Contains(m_termAtt.Buffer, i, j))
                    {
                        if (this.m_onlyLongestMatch)
                        {
                            if (longestMatchToken != null)
                            {
                                if (longestMatchToken.Text.Length < j)
                                {
                                    longestMatchToken = new CompoundToken(this, i, j);
                                }
                            }
                            else
                            {
                                longestMatchToken = new CompoundToken(this, i, j);
                            }
                        }
                        else
                        {
                            m_tokens.Enqueue(new CompoundToken(this, i, j));
                        }
                    }
                }
                if (this.m_onlyLongestMatch && longestMatchToken != null)
                {
                    m_tokens.Enqueue(longestMatchToken);
                }
            }
        }
        protected internal override void Decompose()
        {
            int len = termAtt.Length;

            for (int i = 0; i <= len - this.minSubwordSize; ++i)
            {
                CompoundToken longestMatchToken = null;
                for (int j = this.minSubwordSize; j <= this.maxSubwordSize; ++j)
                {
                    if (i + j > len)
                    {
                        break;
                    }
                    if (dictionary.Contains(termAtt.Buffer(), i, j))
                    {
                        if (this.onlyLongestMatch)
                        {
                            if (longestMatchToken != null)
                            {
                                if (longestMatchToken.txt.Length < j)
                                {
                                    longestMatchToken = new CompoundToken(this, i, j);
                                }
                            }
                            else
                            {
                                longestMatchToken = new CompoundToken(this, i, j);
                            }
                        }
                        else
                        {
                            tokens.AddLast(new CompoundToken(this, i, j));
                        }
                    }
                }
                if (this.onlyLongestMatch && longestMatchToken != null)
                {
                    tokens.AddLast(longestMatchToken);
                }
            }
        }
 protected internal override void Decompose()
 {
     int len = termAtt.Length;
     for (int i = 0; i <= len - this.minSubwordSize; ++i)
     {
         CompoundToken longestMatchToken = null;
         for (int j = this.minSubwordSize; j <= this.maxSubwordSize; ++j)
         {
             if (i + j > len)
             {
                 break;
             }
             if (dictionary.Contains(termAtt.Buffer(), i, j))
             {
                 if (this.onlyLongestMatch)
                 {
                     if (longestMatchToken != null)
                     {
                         if (longestMatchToken.txt.Length < j)
                         {
                             longestMatchToken = new CompoundToken(this, i, j);
                         }
                     }
                     else
                     {
                         longestMatchToken = new CompoundToken(this, i, j);
                     }
                 }
                 else
                 {
                     tokens.AddLast(new CompoundToken(this, i, j));
                 }
             }
         }
         if (this.onlyLongestMatch && longestMatchToken != null)
         {
             tokens.AddLast(longestMatchToken);
         }
     }
 }
        protected internal override void decompose()
        {
            // get the hyphenation points
            Hyphenation hyphens = hyphenator.hyphenate(termAtt.Buffer(), 0, termAtt.Length(), 1, 1);

            // No hyphen points found -> exit
            if (hyphens == null)
            {
                return;
            }

            int[] hyp = hyphens.HyphenationPoints;

            for (int i = 0; i < hyp.Length; ++i)
            {
                int           remaining         = hyp.Length - i;
                int           start             = hyp[i];
                CompoundToken longestMatchToken = null;
                for (int j = 1; j < remaining; j++)
                {
                    int partLength = hyp[i + j] - start;

                    // if the part is longer than maxSubwordSize we
                    // are done with this round
                    if (partLength > this.maxSubwordSize)
                    {
                        break;
                    }

                    // we only put subwords to the token stream
                    // that are longer than minPartSize
                    if (partLength < this.minSubwordSize)
                    {
                        // BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
                        // calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
                        continue;
                    }

                    // check the dictionary
                    if (dictionary == null || dictionary.Contains(termAtt.Buffer(), start, partLength))
                    {
                        if (this.onlyLongestMatch)
                        {
                            if (longestMatchToken != null)
                            {
                                if (longestMatchToken.txt.Length() < partLength)
                                {
                                    longestMatchToken = new CompoundToken(this, start, partLength);
                                }
                            }
                            else
                            {
                                longestMatchToken = new CompoundToken(this, start, partLength);
                            }
                        }
                        else
                        {
                            tokens.AddLast(new CompoundToken(this, start, partLength));
                        }
                    }
                    else if (dictionary.contains(termAtt.buffer(), start, partLength - 1))
                    {
                        // check the dictionary again with a word that is one character
                        // shorter
                        // to avoid problems with genitive 's characters and other binding
                        // characters
                        if (this.onlyLongestMatch)
                        {
                            if (longestMatchToken != null)
                            {
                                if (longestMatchToken.txt.Length() < partLength - 1)
                                {
                                    longestMatchToken = new CompoundToken(this, start, partLength - 1);
                                }
                            }
                            else
                            {
                                longestMatchToken = new CompoundToken(this, start, partLength - 1);
                            }
                        }
                        else
                        {
                            tokens.AddLast(new CompoundToken(this, start, partLength - 1));
                        }
                    }
                }
                if (this.onlyLongestMatch && longestMatchToken != null)
                {
                    tokens.AddLast(longestMatchToken);
                }
            }
        }
        protected internal override void decompose()
        {
            // get the hyphenation points
            Hyphenation hyphens = hyphenator.hyphenate(termAtt.Buffer(), 0, termAtt.Length(), 1, 1);
            // No hyphen points found -> exit
            if (hyphens == null)
            {
                return;
            }

            int[] hyp = hyphens.HyphenationPoints;

            for (int i = 0; i < hyp.Length; ++i)
            {
                int remaining = hyp.Length - i;
                int start = hyp[i];
                CompoundToken longestMatchToken = null;
                for (int j = 1; j < remaining; j++)
                {
                    int partLength = hyp[i + j] - start;

                    // if the part is longer than maxSubwordSize we
                    // are done with this round
                    if (partLength > this.maxSubwordSize)
                    {
                        break;
                    }

                    // we only put subwords to the token stream
                    // that are longer than minPartSize
                    if (partLength < this.minSubwordSize)
                    {
                        // BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the 
                        // calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
                        continue;
                    }

                    // check the dictionary
                    if (dictionary == null || dictionary.Contains(termAtt.Buffer(), start, partLength))
                    {
                        if (this.onlyLongestMatch)
                        {
                            if (longestMatchToken != null)
                            {
                                if (longestMatchToken.txt.Length() < partLength)
                                {
                                    longestMatchToken = new CompoundToken(this, start, partLength);
                                }
                            }
                            else
                            {
                                longestMatchToken = new CompoundToken(this, start, partLength);
                            }
                        }
                        else
                        {
                            tokens.AddLast(new CompoundToken(this, start, partLength));
                        }
                    }
                    else if (dictionary.contains(termAtt.buffer(), start, partLength - 1))
                    {
                        // check the dictionary again with a word that is one character
                        // shorter
                        // to avoid problems with genitive 's characters and other binding
                        // characters
                        if (this.onlyLongestMatch)
                        {
                            if (longestMatchToken != null)
                            {
                                if (longestMatchToken.txt.Length() < partLength - 1)
                                {
                                    longestMatchToken = new CompoundToken(this, start, partLength - 1);
                                }
                            }
                            else
                            {
                                longestMatchToken = new CompoundToken(this, start, partLength - 1);
                            }
                        }
                        else
                        {
                            tokens.AddLast(new CompoundToken(this, start, partLength - 1));
                        }
                    }
                }
                if (this.onlyLongestMatch && longestMatchToken != null)
                {
                    tokens.AddLast(longestMatchToken);
                }
            }
        }
Exemplo n.º 9
0
            private void parseLexemes()
            {
                var children = new List <Token>();

                lexInd = 0;

                /*
                 * <module> ::= { <import> } /\n+/
                 *              <export> [ <implement> ] /\n+/
                 *              { <definition> /\n+/ }
                 */

                // Get all the imports
                while (lexInd < lexemes.Length &&
                       (string)lexemes[lexInd].Source() == "imports")
                {
                    var import = parseImport();
                    if (lexInd >= lexemes.Length)
                    {
                        throw new UnexpectedEOFException(import.Line());
                    }
                    else if (lexemes[lexInd].Type() != TokenType.NewLine)
                    {
                        throw new UnexpectedTokenException(
                                  lexemes[lexInd].Type(),
                                  new TokenType[] { TokenType.NewLine },
                                  import.Line()
                                  );
                    }
                    var newLine = lexemes[lexInd++];
                    eatNewLines();

                    children.Add(import);
                    children.Add(newLine);
                }

                // Get the exports and implements line
                var export = parseExport();

                children.Add(export);
                if (lexInd >= lexemes.Length)
                {
                    throw new UnexpectedEOFException(export.Line());
                }
                else if ((string)lexemes[lexInd].Source() == "implements")
                {
                    children.Add(parseImplement()); // Optional, hence if stmt
                }
                if (lexInd >= lexemes.Length)
                {
                    throw new UnexpectedEOFException(export.Line());
                }
                else if (lexemes[lexInd].Type() != TokenType.NewLine)
                {
                    throw new UnexpectedTokenException(
                              lexemes[lexInd].Type(),
                              new TokenType[] { TokenType.NewLine },
                              export.Line()
                              );
                }
                children.Add(lexemes[lexInd++]);
                eatNewLines();

                // Finally, parse definitions
                while (lexInd < lexemes.Length)
                {
                    var definition = parseDefinition();
                    if (lexInd >= lexemes.Length)
                    {
                        throw new UnexpectedEOFException(definition.Line());
                    }
                    else if (lexemes[lexInd].Type() != TokenType.NewLine)
                    {
                        throw new UnexpectedTokenException(
                                  lexemes[lexInd].Type(),
                                  new TokenType[] { TokenType.NewLine },
                                  definition.Line()
                                  );
                    }
                    var newLine3 = lexemes[lexInd++];
                    eatNewLines();

                    children.Add(definition);
                    children.Add(newLine3);
                }

                ast = new CompoundToken(TokenType.Module, children, 1);
            }