Exemplo n.º 1
0
        /// <summary>
        /// Split the string of mail addresses on the "," that separates them.
        /// </summary>
        /// <param name="InString"></param>
        /// <returns></returns>
        public static ArrayList SplitStringOfMailAddresses(string InString)
        {
            ArrayList addrList = new ArrayList( );

            TextTraits traits;

            traits = new TextTraits()
                     .SetQuoteEncapsulation(QuoteEncapsulation.Escape);
            traits.DividerPatterns.Replace(new string[] { ",", "\t" }, DelimClassification.DividerSymbol);
            traits.WhitespacePatterns.Replace(" ", "\t", DelimClassification.Whitespace);

            WordCursor word = Scanner.PositionBeginWord(InString, traits);

            while (true)
            {
                ObjectPair pair = ScanNextAddress(InString, word);
                word = (WordCursor )pair.b;

                // got nothing. end of string.
                if (pair.a == null)
                {
                    break;
                }

                // isolate the mail address string.
                string mailAddr = PullMailAddress(InString, pair);

                // add the address string to list of such strings.
                addrList.Add(mailAddr);
            }

            // return the split list of address strings.
            return(addrList);
        }
Exemplo n.º 2
0
        public static AcNamedValues Parse(string InString)
        {
            AcNamedValues vlus = new AcNamedValues();

            TextTraits traits = new TextTraits();

            traits.OpenNamedBracedPatterns.Replace("[", Text.Enums.DelimClassification.OpenNamedBraced);
            traits.DividerPatterns.AddDistinct(",", Text.Enums.DelimClassification.DividerSymbol);
            WordCursor csr = Scanner.PositionBeginWord(InString, traits);

            while (true)
            {
                csr = Scanner.ScanNextWord(InString, csr);
                if (csr.IsEndOfString == true)
                {
                    break;
                }
                else if
                ((csr.IsDelimOnly == false) &&
                 (csr.Word.Class == WordClassification.ContentBraced))
                {
                    KeyValuePair <string, string> pair = ParsePair(csr.Word.BracedText);
                    vlus.Add(pair.Key, pair.Value);
                }
                else
                {
                    throw new ApplicationException(
                              "serialized AcNamedValues string in invalid format");
                }
            }
            return(vlus);
        }
Exemplo n.º 3
0
        // ------------------------- ParseContentType ----------------------------
        public static PartProperty.ContentType ParseContentType(string InString)
        {
            TextTraits traits = new TextTraits()
                                .SetQuoteEncapsulation(QuoteEncapsulation.Escape);

            traits.DividerPatterns.Replace(
                new string[] { "/", ":", ";", " ", "\t", "=" }, DelimClassification.DividerSymbol);
            traits.WhitespacePatterns.Replace(" ", "\t", DelimClassification.Whitespace);
            PartProperty.ContentType results = new PartProperty.ContentType();

            WordCursor csr = Scanner.PositionBeginWord(InString, traits);

            while (true)
            {
                csr = Scanner.ScanNextWord(InString, csr);
                if (csr.IsEndOfString)
                {
                    break;
                }

                // content type
                if (csr.DelimValue == "/")
                {
                    results.Type = csr.Word.ToString( ).ToLower( );
                }

                // content sub type.
                else if (csr.DelimValue == ";")
                {
                    results.SubType = csr.Word.ToString( ).ToLower( );
                }

                // a kwd
                else if (csr.DelimValue == "=")
                {
                    WordCursor nxCsr = csr.NextWord( );
                    if ((nxCsr.DelimClass == DelimClassification.EndOfString) ||
                        (nxCsr.DelimClass == DelimClassification.Whitespace))
                    {
                        string kwd = csr.Word.ToString( ).ToLower( );
                        csr = nxCsr;
                        if (kwd == "charset")
                        {
                            results.CharSet = csr.Word.NonQuotedSimpleValue;
                        }
                        else if (kwd == "boundary")
                        {
                            results.Boundary = csr.Word.NonQuotedSimpleValue;
                        }
                        else if (kwd == "name")
                        {
                            results.Name = csr.Word.NonQuotedSimpleValue;
                        }
                    }
                }
            }
            return(results);
        }
Exemplo n.º 4
0
        // ------------------------- ParseContentType ----------------------------
        public static PartProperty.ContentType ParseContentType(string InString)
        {
            TextTraits traits = new TextTraits( )
                                .SetDelimChars("/:; \t=")
                                .SetWhitespaceChars(" \t")
                                .SetQuoteEncapsulation(QuoteEncapsulation.Escape);

            PartProperty.ContentType results = new PartProperty.ContentType( );

            Scanner.WordCursor csr = Scanner.PositionBeginWord( );
            while (true)
            {
                csr = Scanner.ScanNextWord(InString, csr, traits);
                if (csr.IsEndOfString)
                {
                    break;
                }

                // content type
                if (csr.Delim == "/")
                {
                    results.Type = csr.Word.ToString( ).ToLower( );
                }

                // content sub type.
                else if (csr.Delim == ";")
                {
                    results.SubType = csr.Word.ToString( ).ToLower( );
                }

                // a kwd
                else if (csr.Delim == "=")
                {
                    Scanner.WordCursor nxCsr = csr.NextWord( );
                    if ((nxCsr.DelimClass == DelimClassification.End) ||
                        (nxCsr.DelimClass == DelimClassification.Whitespace))
                    {
                        string kwd = csr.Word.ToString( ).ToLower( );
                        csr = nxCsr;
                        if (kwd == "charset")
                        {
                            results.CharSet = csr.Word.NonQuotedWord;
                        }
                        else if (kwd == "boundary")
                        {
                            results.Boundary = csr.Word.NonQuotedWord;
                        }
                        else if (kwd == "name")
                        {
                            results.Name = csr.Word.NonQuotedWord;
                        }
                    }
                }
            }
            return(results);
        }
Exemplo n.º 5
0
        // ------------------------ Load ----------------------------------
        public MimeContentTypexLine Load(string InLine)
        {
            // traits used when stepping word to word in the content-type line.
            mTraits = new TextTraits( )
                      .SetDelimChars("/:; \t=")
                      .SetWhitespaceChars(" \t")
                      .SetQuoteEncapsulation(QuoteEncapsulation.Escape);

            mLine = InLine;
            Parse( );
            return(this);
        }
Exemplo n.º 6
0
        IsolateNumericLiteral(
            ScanStream ScanStream,
            TextTraits Traits,
            int Bx)
        {
            // for now, all numeric literals are simple integers.
            // have to expand to determine if a float, decimal, what the sign is,
            // what the precision is.
            LiteralType litType = LiteralType.Integer;

            string      litText  = null;
            ScanPattern foundPat = null;
            int         foundIx  = -1;

            // step from char to char. Look for a char that is not part of the
            // numeric literal.
            int ix    = Bx;
            int litEx = Bx;

            while (true)
            {
                if (ix >= ScanStream.Stream.Length)
                {
                    break;
                }
                char ch1 = ScanStream.Stream[ix];
                if (Char.IsDigit(ch1) == false)
                {
                    break;
                }

                litEx = ix;
                ix   += 1;
            }

            // isolate the numeric literal.
            int lx = litEx - Bx + 1;

            litText = ScanStream.Substring(Bx, lx);

            // isolate the delim that follows that numeric literal.
            int bx = litEx + 1;

            if (bx < ScanStream.Stream.Length)
            {
                var rv = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.DelimPatterns);
                foundPat = rv.Item1;
                foundIx  = rv.Item2;
            }

            return(new Tuple <LiteralType, string, ScanPattern, int>(
                       litType, litText, foundPat, foundIx));
        }
Exemplo n.º 7
0
        // ----------------------- CalcScanNextStart ---------------------------
        // calc start position from which to start scan to the next word.
        private static int CalcScanNextStart(
            ScanStream ScanStream,
            TextTraits Traits, ScanWordCursor Cursor)
        {
            int bx;

            switch (Cursor.Position)
            {
            case RelativePosition.Begin:
                bx = 0;
                break;

            case RelativePosition.Before:
                bx = Cursor.CursorBx.ToStreamLocation(ScanStream).Value;
                break;

            case RelativePosition.After:
            case RelativePosition.At:
                if (Traits.IsNonWordPattern(Cursor.DelimPattern))
                {
                    bx = Cursor.DelimEx.ToStreamLocation(ScanStream).Value + 1;
                }
                else if (Cursor.WordIsDelim == true)
                {
                    bx = Cursor.WordEx.ToStreamLocation(ScanStream).Value + 1;
                }
                else
                {
                    bx = Cursor.DelimBx.ToStreamLocation(ScanStream).Value;
                }
                break;

            case RelativePosition.End:
                bx = ScanStream.Stream.Length;
                break;

            case RelativePosition.None:
                bx = -1;
                break;

            default:
                bx = -1;
                break;
            }

            if (bx > (ScanStream.Stream.Length - 1))
            {
                bx = -1;
            }

            return(bx);
        }
        IsolateQuotedWord(
            ScanStream ScanStream,
            TextTraits Traits,
            int Bx)
        {
            LiteralType litType = LiteralType.none;
            string      litText = null;
            char        ch1     = ScanStream.Stream[Bx];

            ScanPattern foundPat = null;
            int         foundIx  = -1;
            int         quoteEx  = -1;

            // is start of a verbatim string literal
            if ((Traits.VerbatimLiteralPattern != null) &&
                (Traits.VerbatimLiteralPattern.Match(ScanStream.Stream, Bx)))
            {
                var rv = VerbatimLiteral.ScanCloseQuote(
                    ScanStream.Stream, Traits.VerbatimLiteralPattern, Bx);
                quoteEx = rv.Item1;
                litText = rv.Item2;
                litType = LiteralType.VerbatimString;
            }

            // is a quoted literal
            else if (Traits.IsQuoteChar(ch1) == true)
            {
                quoteEx = Scanner.ScanCloseQuote(ScanStream.Stream, Bx, Traits.QuoteEncapsulation);
                if (quoteEx != -1)
                {
                    int lx = quoteEx - Bx + 1;
                    litText = ScanStream.Substring(Bx, lx);

                    // correct the following at some point. Should be either string or
                    // char lit.
                    litType = LiteralType.String;
                }
            }

            // isolate the delim that follows that quoted word.
            {
                int bx = quoteEx + 1;
                var rv = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.DelimPatterns);
                foundPat = rv.Item1;
                foundIx  = rv.Item2;
            }

            return(new Tuple <LiteralType, string, ScanPattern, int>(
                       litType, litText, foundPat, foundIx));
        }
Exemplo n.º 9
0
        string[] Parse_CrackWords(string InLine)
        {
            List <string> words  = new List <string>();
            TextTraits    traits = new TextTraits();

            traits.DividerPatterns.Clear( );
            traits.OpenContentBracedPatterns.Clear();
            traits.OpenNamedBracedPatterns.Clear();

            // the dir entry name can contain spaces. This traits object
            // is used
            TextTraits entryNameTraits = new TextTraits(traits);

            entryNameTraits.WhitespacePatterns.Replace("\t", Text.Enums.DelimClassification.Whitespace);

            WordCursor csr = null;

            csr = Scanner.PositionBeginWord(InLine, traits);
            while (true)
            {
                // the 4th word is the file/dir name. This word has a diff char set,
                // it can have a space in the name.
                if (words.Count == 3)
                {
                    csr.TextTraits = entryNameTraits;
                }
                else
                {
                    csr.TextTraits = traits;
                }

                csr = Scanner.ScanNextWord(InLine, csr);
                if (csr.IsEndOfString == true)
                {
                    break;
                }

                if (words.Count == 3)
                {
                    words.Add(csr.Word.Value.Trim());
                }
                else
                {
                    words.Add(csr.Word.Value);
                }
            }

            return(words.ToArray());
        }
        // ------------------------ Load ----------------------------------
        public MimeContentTypexLine Load(string InLine)
        {
            // traits used when stepping word to word in the content-type line.
            mTraits = new TextTraits()
                      .SetQuoteEncapsulation(QuoteEncapsulation.Escape);

            mTraits.DividerPatterns.AddDistinct(new ScanPatterns(
                                                    new string[] { "/", ":", ";", " ", "\t", "=" },
                                                    Text.Enums.DelimClassification.DividerSymbol));

            mTraits.WhitespacePatterns.Replace(" ", "\t", Text.Enums.DelimClassification.Whitespace);

            mLine = InLine;
            Parse( );
            return(this);
        }
Exemplo n.º 11
0
        // --------------------------- IsolateDelim_SetDelimIsWhitespace ----------
        private static void IsolateDelim_SetDelimIsWhitespace(
            string Text, TextTraits Traits,
            WordCursor Results, int WsIx)
        {
            // store the actual string of whitespace characters. ( the whitespace can be
            // checked later to see if it contains tabs or newlines )
            ScanPatternResults spr = Scanner.ScanNotEqual(
                Text, WsIx, Text.Length - 1,
                Traits.WhitespacePatterns);

            string delimVlu = spr.ScannedOverString;

            Results.SetDelim(
                Text, delimVlu, WsIx, DelimClassification.Whitespace);

            Results.DelimIsWhitespace = true;
        }
Exemplo n.º 12
0
        // ------------------------------ ScanNextAddress ---------------------------
        private static ObjectPair ScanNextAddress(
            string InString, WordCursor InWord)
        {
            TextTraits traits;

            traits = new TextTraits()
                     .SetQuoteEncapsulation(QuoteEncapsulation.Escape);
            traits.DividerPatterns.Replace(new string[] { ",", "\t" }, DelimClassification.DividerSymbol);
            traits.WhitespacePatterns.Replace(" ", "\t", DelimClassification.Whitespace);
            WordCursor bgnAddrWord = null;
            WordCursor endAddrWord = null;

            // advance from word to word in the string until the comma between addresses
            // or the end of the string.
            WordCursor word = InWord;

            word.TextTraits = traits;
            while (true)
            {
                word = Scanner.ScanNextWord(InString, word);
                if (word.IsEndOfString)
                {
                    break;
                }

                // expand the word range of the current mail address string.
                if (bgnAddrWord == null)
                {
                    bgnAddrWord = word;
                }
                endAddrWord = word;

                if (word.DelimValue == ",")                     // end of this address.
                {
                    break;
                }
                if (word.DelimValue == "")                              // end of the string.
                {
                    break;
                }
            }

            return(new ObjectPair(bgnAddrWord, endAddrWord));
        }
Exemplo n.º 13
0
        ParseContentDisposition(string InString)
        {
            TextTraits traits = new TextTraits()
                                .SetQuoteEncapsulation(QuoteEncapsulation.Escape);

            traits.DividerPatterns.Replace(
                new string[] { ";", " ", "\t", "=" }, DelimClassification.DividerSymbol);
            traits.WhitespacePatterns.Replace(" ", "\t", DelimClassification.Whitespace);
            PartProperty.ContentDisposition results = new PartProperty.ContentDisposition();

            WordCursor csr = Scanner.PositionBeginWord(InString, traits);

            while (true)
            {
                csr = Scanner.ScanNextWord(InString, csr);
                if (csr.IsEndOfString)
                {
                    break;
                }

                // content disposition
                if (csr.DelimValue == ";")
                {
                    results.Disposition = csr.Word.ToString( ).ToLower( );
                }

                // a kwd
                else if (csr.DelimValue == "=")
                {
                    WordCursor nxCsr = csr.NextWord( );
                    if ((nxCsr.DelimClass == DelimClassification.EndOfString) ||
                        (nxCsr.DelimClass == DelimClassification.Whitespace))
                    {
                        string kwd = csr.Word.ToString( ).ToLower( );
                        csr = nxCsr;
                        if (kwd == "filename")
                        {
                            results.FileName = csr.Word.NonQuotedSimpleValue;
                        }
                    }
                }
            }
            return(results);
        }
Exemplo n.º 14
0
        // ------------------------------ ScanNextAddress ---------------------------
        private static ObjectPair ScanNextAddress(
            string InString, Scanner.WordCursor InWord)
        {
            TextTraits traits;

            traits = new TextTraits( )
                     .SetDelimChars(", \t")
                     .SetWhitespaceChars(" \t")
                     .SetQuoteEncapsulation(QuoteEncapsulation.Escape);
            Scanner.WordCursor bgnAddrWord = null;
            Scanner.WordCursor endAddrWord = null;

            // advance from word to word in the string until the comma between addresses
            // or the end of the string.
            Scanner.WordCursor word = InWord;
            while (true)
            {
                word = Scanner.ScanNextWord(InString, word, traits);
                if (word.IsEndOfString)
                {
                    break;
                }

                // expand the word range of the current mail address string.
                if (bgnAddrWord == null)
                {
                    bgnAddrWord = word;
                }
                endAddrWord = word;

                if (word.Delim == ",")                          // end of this address.
                {
                    break;
                }
                if (word.Delim == "")                           // end of the string.
                {
                    break;
                }
            }

            return(new ObjectPair(bgnAddrWord, endAddrWord));
        }
Exemplo n.º 15
0
        ParseContentDisposition(string InString)
        {
            TextTraits traits = new TextTraits( )
                                .SetDelimChars("; \t=")
                                .SetWhitespaceChars(" \t")
                                .SetQuoteEncapsulation(QuoteEncapsulation.Escape);

            PartProperty.ContentDisposition results = new PartProperty.ContentDisposition( );

            Scanner.WordCursor csr = Scanner.PositionBeginWord( );
            while (true)
            {
                csr = Scanner.ScanNextWord(InString, csr, traits);
                if (csr.IsEndOfString)
                {
                    break;
                }

                // content disposition
                if (csr.Delim == ";")
                {
                    results.Disposition = csr.Word.ToString( ).ToLower( );
                }

                // a kwd
                else if (csr.Delim == "=")
                {
                    Scanner.WordCursor nxCsr = csr.NextWord( );
                    if ((nxCsr.DelimClass == DelimClassification.End) ||
                        (nxCsr.DelimClass == DelimClassification.Whitespace))
                    {
                        string kwd = csr.Word.ToString( ).ToLower( );
                        csr = nxCsr;
                        if (kwd == "filename")
                        {
                            results.FileName = csr.Word.NonQuotedWord;
                        }
                    }
                }
            }
            return(results);
        }
Exemplo n.º 16
0
        // ----------------------- CalcScanNextStart ---------------------------
        // calc start position from which to start scan to the next word.
        private static int CalcScanNextStart(
            ScanStream ScanStream,
            TextTraits Traits, ScanAtomCursor Cursor)
        {
            int bx;

            switch (Cursor.Position)
            {
            case RelativePosition.Begin:
                bx = 0;
                break;

            case RelativePosition.Before:
                bx = Cursor.StartLoc.ToStreamLocation(ScanStream).Value;
                break;

            case RelativePosition.After:
            case RelativePosition.At:
                bx = Cursor.EndLoc.ToStreamLocation(ScanStream).Value + 1;
                break;

            case RelativePosition.End:
                bx = ScanStream.Stream.Length;
                break;

            case RelativePosition.None:
                bx = -1;
                break;

            default:
                bx = -1;
                break;
            }

            if (bx > (ScanStream.Stream.Length - 1))
            {
                bx = -1;
            }

            return(bx);
        }
Exemplo n.º 17
0
        private static Tuple <TextLocation, TextWord> IsolateWordText(
            ScanStream ScanStream,
            TextTraits Traits,
            LiteralType?LiteralType,
            string LitText,
            int Bx, int?NonWordBx)
        {
            TextLocation wordBx   = null;
            TextWord     wordPart = null;

            // not a literal. A word that runs from Bx to immed before NonWordBx.
            if (LiteralType == null)
            {
                wordBx = new StreamLocation(Bx).ToTextLocation(ScanStream);
                int lx;
                if (NonWordBx == null)
                {
                    lx = ScanStream.Stream.Length - Bx;
                }
                else
                {
                    lx = NonWordBx.Value - Bx;
                }
                wordPart = new TextWord(
                    ScanStream.Substring(Bx, lx), WordClassification.Identifier, Traits);
            }

            // a quoted or numeric literal
            else
            {
                wordBx   = new StreamLocation(Bx).ToTextLocation(ScanStream);
                wordPart = new TextWord(LitText, LiteralType.Value, Traits);
            }

            return(new Tuple <TextLocation, TextWord>(wordBx, wordPart));
        }
Exemplo n.º 18
0
        // -------------------- IsolateDelim ---------------------------
        private static void IsolateDelim(
            string Text,
            ScanPatternResults PatternResults,
            ref WordCursor Results,
            TextTraits Traits)
        {
            // did not find a nonword char.  must have hit end of string.
            if (PatternResults.IsNotFound)
            {
                Results.DelimClass = DelimClassification.EndOfString;
            }

            // we have a delimiter of some kind.
            else
            {
                DelimClassification sprdc = PatternResults.FoundPat.DelimClassification;

                // delim is whitespace of some sort. Continue to look ahead for a non
                // whitespace pattern.
                if (Traits.IsWhitespaceDelimClass(sprdc) == true)
                {
                    int bx  = PatternResults.FoundPos;
                    var spr = Scanner.ScanNotEqual(
                        Text, bx, Text.Length - 1,
                        Traits.WhitespacePatterns);
                    if (spr.FoundPat != null)
                    {
                    }
                }

                Results.WhitespaceFollowsWord  = false;
                Results.WhitespaceFollowsDelim = false;
                Results.DelimIsWhitespace      = false;

                // the delim is a hard delim ( not whitespace )
                if (sprdc != DelimClassification.Whitespace)
                {
                    // Want the openContent brace to be processed as a standalone word. Use
                    // virtual whitespace so the word that this open brace is the delim of will
                    // have what appears to be a whitespace delim. Then the following word will
                    // be the standalone open content brace char.
                    if ((sprdc == DelimClassification.OpenContentBraced) &&
                        (Traits.VirtualWhitespace == true))
                    {
                        Results.SetDelim(
                            Text,
                            null, PatternResults.FoundPos, DelimClassification.VirtualWhitespace);
                    }
                    else
                    {
                        // delim is either as classified in the collection of NonWords or is
                        // a PathPart delim.
                        ScanPattern pat = Traits.GetPathPartDelim(
                            Text, PatternResults.FoundPos);
                        if (pat != null)
                        {
                            Results.SetDelim(
                                Text,
                                pat.PatternValue,
                                PatternResults.FoundPos,
                                DelimClassification.PathSep);
                        }
                        else
                        {
                            Results.SetDelim(
                                Text,
                                PatternResults.FoundPat.PatternValue,
                                PatternResults.FoundPos,
                                sprdc);
                        }
                    }
                }

                // whitespace immed follows the word text
                else
                {
                    ScanWord.IsolateDelim_WhitespaceFollows(
                        Text, PatternResults, ref Results, Traits);
                }
            }
        }
Exemplo n.º 19
0
        /// <summary>
        /// The delim after the word is whitspace. If what follows the whitespace
        /// is a delim char, then this whitspace is disregarded as the delim, and
        /// the delim is what follows the whitespace.
        /// </summary>
        /// <param name="InBoundedString"></param>
        /// <param name="InNonWordResults"></param>
        /// <param name="InOutResults"></param>
        /// <param name="InTraits"></param>
        private static void IsolateDelim_WhitespaceFollows(
            string Text,
            ScanPatternResults PatternResults,
            ref WordCursor Results,
            TextTraits Traits)
        {
            Results.WhitespaceFollowsWord = true;
            ScanPattern nwPat     = null;
            int         nwMatchLx = 0;

            // Look for hard delim after the ws.
            ScanPatternResults scanResults =
                Scanner.ScanNotEqual(
                    Text, PatternResults.FoundPos, Text.Length - 1,
                    Traits.WhitespacePatterns);

            // look that the char after the ws is a nonword.
            if (scanResults.FoundPos != -1)
            {
                var rv = Traits.NonWordPatterns.MatchPatternsAtStringLocation(
                    Text, scanResults.FoundPos, Text.Length - 1);
                nwPat     = rv.Item1;
                nwMatchLx = rv.Item2;
            }

            // the char after the whitespace is a non word (delim) char.
            if (nwPat != null)
            {
                DelimClassification nwdc = nwPat.DelimClassification;

                // is the delim actually a sep char in a path name.
                // so the delim is the whitespace.
                if (Traits.IsPathPartDelim(Text, scanResults.FoundPos))
                {
                    ScanWord.IsolateDelim_SetDelimIsWhitespace(
                        Text, Traits, Results, PatternResults.FoundPos);
                }

                // is a content open brace char. delim stays as whitespace because
                // content braces are considered standalone words.
                else if (nwPat.DelimClassification.IsOpenBraced( ))
                {
                    ScanWord.IsolateDelim_SetDelimIsWhitespace(
                        Text, Traits, Results, PatternResults.FoundPos);
                }

                // is a quote char. the quoted string is considered a word.
                else if (nwdc == DelimClassification.Quote)
                {
                    ScanWord.IsolateDelim_SetDelimIsWhitespace(
                        Text, Traits, Results, PatternResults.FoundPos);
                }

                // is an actual delim.
                else
                {
                    Results.SetDelim(
                        Text,
                        nwPat.PatternValue, scanResults.FoundPos, nwdc);
                }
            }

            // the whitespace char is the delim of record.
            else
            {
                ScanWord.IsolateDelim_SetDelimIsWhitespace(
                    Text, Traits, Results, PatternResults.FoundPos);
            }
        }
Exemplo n.º 20
0
        // ------------------------ ScanNextAtom -------------------------
        // Scans to the next atom in the string. ( a word being the text bounded by the
        // delimeter and whitespace characters as spcfd in the TextTraits argument )
        // Return null when end of string.
        public static ScanAtomCursor ScanNextAtom(
            ScanStream ScanStream,
            TextTraits Traits, ScanAtomCursor CurrentWord)
        {
            // components of the next word.
            TextLocation wordBx    = null;
            int          nonWordIx = -1;
            int          nonWordLx = 0;

            ScanPattern             nonWordPat     = null;
            List <MatchScanPattern> nonWordPatList = null;
            AtomText atomText = null;
            List <MatchScanPattern> atomTextList = null;
            AtomText whitespaceText = null;
//      ScanAtomCode? priorCode = null;
            bool?priorCodeIsWhitespaceSignificant = null;

            // stay at the current location. return copy of the cursor, but with stayatflag
            // turned off.
            if (CurrentWord.StayAtFlag == true)
            {
                atomText   = CurrentWord.AtomText;
                nonWordPat = CurrentWord.AtomPattern;
                wordBx     = CurrentWord.StartLoc;
            }

            else
            {
                #region STEP1 setup the begin pos of the next word.
                // ----------------------------- STEP 1 ------------------------------
                // setup the begin pos of the next word.
                int bx;
                {
                    // save the ScanAtomCode of the prior word.
                    if ((CurrentWord.Position == RelativePosition.At) ||
                        (CurrentWord.Position == RelativePosition.After))
                    {
                        priorCodeIsWhitespaceSignificant = CurrentWord.WhitespaceIsSignificant;
//            priorCode = CurrentWord.AtomText.AtomCode;
                    }

                    // calc scan start position
                    bx = ScanAtom.CalcScanNextStart(ScanStream, Traits, CurrentWord);

                    // advance past whitespace
                    if (bx != -1)
                    {
                        int saveBx = bx;
                        bx = Scanner.ScanNotEqual(ScanStream.Stream, Traits.WhitespacePatterns, bx);

                        // there is some whitespace. depending on what preceeds and follows, may
                        // return this as the atom.
                        if ((priorCodeIsWhitespaceSignificant != null) &&
                            (priorCodeIsWhitespaceSignificant.Value == true))
                        {
                            if (bx != saveBx)
                            {
                                int whitespaceEx = -1;
                                if (bx == -1)
                                {
                                    whitespaceEx = ScanStream.Stream.Length - 1;
                                }
                                else
                                {
                                    whitespaceEx = bx - 1;
                                }
                                int whitespaceLx = whitespaceEx - saveBx + 1;

                                whitespaceText = new AtomText(
                                    ScanAtomCode.Whitespace,
                                    ScanStream.Stream.Substring(saveBx, whitespaceLx), " ",
                                    new StreamLocation(saveBx).ToTextLocation(ScanStream),
                                    new StreamLocation(whitespaceEx).ToTextLocation(ScanStream));
                            }
                        }
                    }
                }
                // end STEP 1.
                #endregion

                #region STEP 2. Isolate either numeric lib, quoted lit or scan to non word pattern
                // ------------------------------- STEP 2 ----------------------------------
                // Isolate either numeric literal, quoted literal or scan to the next non word
                // pattern.
                LiteralType?litType = null;
                string      litText = null;
                {
                    // got a decimal digit. isolate the numeric literal string.
                    if ((bx != -1) && (Char.IsDigit(ScanStream.Stream[bx]) == true))
                    {
                        var rv = Scanner.IsolateNumericLiteral(ScanStream, Traits, bx);
                        litType    = rv.Item1;
                        litText    = rv.Item2;
                        nonWordPat = rv.Item3; // the non word pattern immed after numeric literal
                        nonWordIx  = rv.Item4; // pos of foundPat
                    }

                    // got something.  now scan forward for the pattern that delimits the word.
                    else if (bx != -1)
                    {
                        {
                            var rv = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.DelimPatterns);
                            nonWordPat     = rv.Item1;
                            nonWordIx      = rv.Item2;
                            nonWordLx      = rv.Item3;
                            nonWordPatList = rv.Item4;
                        }

                        // got a quote char. Isolate the quoted string, then find the delim that follows
                        // the quoted string.
                        if ((nonWordPat != null) &&
                            (nonWordPat.DelimClassification == DelimClassification.Quote) &&
                            (nonWordIx == bx))
                        {
                            var rv = Scanner.IsolateQuotedWord(ScanStream, Traits, nonWordIx);
                            litType    = rv.Item1;
                            litText    = rv.Item2;
                            nonWordPat = rv.Item3; // the non word pattern immed after quoted literal
                            nonWordIx  = rv.Item4; // pos of foundPat.
                        }
                    }
                }
                // end STEP 2.
                #endregion

                #region STEP 3 - setup wordBx and wordPart with the found word.
                {
                    // got nothing.
                    if (bx == -1)
                    {
                    }

                    // no delim found. word text all the way to the end.
                    else if (nonWordIx == -1)
                    {
                        if (whitespaceText != null)
                        {
                            atomText       = whitespaceText;
                            nonWordPat     = null;
                            nonWordPatList = null;
                        }

                        else
                        {
                            var rv = Scanner.IsolateWordText(
                                ScanStream, Traits, litType, litText, bx, null);
                            atomText = rv.Item3;
                            wordBx   = atomText.StartLoc;
                        }
                    }

                    // got a word and a non word pattern.
                    else if (nonWordIx > bx)
                    {
                        if (whitespaceText != null)
                        {
                            atomText       = whitespaceText;
                            nonWordPat     = null;
                            nonWordPatList = null;
                        }

                        else
                        {
                            var rv = Scanner.IsolateWordText(
                                ScanStream, Traits, litType, litText, bx, nonWordIx);
                            atomText = rv.Item3;
                            wordBx   = atomText.StartLoc;
                        }
                    }

                    // no word. just delim.
                    else
                    {
                        // the delim is comment to end. store as a word.
                        if (nonWordPat.DelimClassification == DelimClassification.CommentToEnd)
                        {
                            var rv     = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.NewLinePatterns);
                            var eolPat = rv.Item1;
                            var eolIx  = rv.Item2;
                            if (eolPat == null)
                            {
                                int ex = ScanStream.Stream.Length - 1;
                                wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream);
                                TextLocation wordEx      = new StreamLocation(ex).ToTextLocation(ScanStream);
                                string       commentText = ScanStream.Substring(nonWordIx);

                                atomText = new AtomText(
                                    ScanAtomCode.CommentToEnd, commentText, null, wordBx, wordEx);

                                nonWordPat     = null;
                                nonWordPatList = null;
                            }
                            else
                            {
                                wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream);
                                int          lx     = eolIx - nonWordIx;
                                TextLocation wordEx =
                                    new StreamLocation(nonWordIx + lx - 1).ToTextLocation(ScanStream);
                                string commentText = ScanStream.Substring(nonWordIx, lx);
                                atomText = new AtomText(
                                    ScanAtomCode.CommentToEnd, commentText, null, wordBx, wordEx);
                                var sloc = wordBx.ToStreamLocation(ScanStream);

                                nonWordPat     = eolPat;
                                nonWordPatList = null;
                            }
                        }

                        // the word found is a non word or keyword pattern.
                        else
                        {
                            // got whitespace followed by keyword. Return the whitespace.
                            if ((nonWordPat.DelimClassification == DelimClassification.Keyword) &&
                                (whitespaceText != null))
                            {
                                atomText       = whitespaceText;
                                nonWordPat     = null;
                                nonWordPatList = null;
                            }

                            // there are more than one scan patterns that match.
                            else if (nonWordPatList != null)
                            {
                                atomTextList = new List <MatchScanPattern>();
                                foreach (var pat in nonWordPatList)
                                {
                                    wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream);
                                    int          lx     = pat.MatchLength;
                                    TextLocation wordEx =
                                        new StreamLocation(nonWordIx + lx - 1).ToTextLocation(ScanStream);
                                    string scanText = ScanStream.Stream.Substring(nonWordIx, lx);

                                    atomText = new AtomText(
                                        pat.MatchPattern.DelimClassification.ToScanAtomCode().Value,
                                        scanText,
                                        pat.MatchPattern.ReplacementValue,
                                        wordBx, wordEx);

                                    pat.AtomText = atomText;
                                    atomTextList.Add(pat);
                                }
                            }

                            else
                            {
                                wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream);
                                int          lx     = nonWordLx;
                                TextLocation wordEx =
                                    new StreamLocation(nonWordIx + lx - 1).ToTextLocation(ScanStream);
                                string scanText = ScanStream.Stream.Substring(nonWordIx, lx);

                                atomText = new AtomText(
                                    nonWordPat.DelimClassification.ToScanAtomCode().Value,
                                    scanText, nonWordPat.ReplacementValue,
                                    wordBx, wordEx);
                            }
                        }
                    }
                }
                #endregion
            }

            // store the results in the return cursor.
            ScanAtomCursor nx = null;
            if (atomText == null)
            {
                nx          = new ScanAtomCursor( );
                nx.Position = RelativePosition.End;
            }
            else if (atomTextList != null)
            {
                nx = new ScanAtomCursor(atomTextList);
            }
            else
            {
                nx          = new ScanAtomCursor(atomText, nonWordPat);
                nx.Position = RelativePosition.At;
            }

            return(nx);
        }
Exemplo n.º 21
0
        // ------------------------ ScanNextWord -------------------------
        // Scans to the next word in the string. ( a word being the text bounded by the
        // delimeter and whitespace characters as spcfd in the TextTraits argument )
        // Return null when end of string.
        public static ScanWordCursor ScanNextWord(
            ScanStream ScanStream,
            TextTraits Traits, ScanWordCursor CurrentWord)
        {
            // components of the next word.
            TextWord     wordPart   = null;
            TextLocation wordBx     = null;
            ScanPattern  nonWordPat = null;
            TextLocation nonWordLoc = null;
            int          nonWordIx  = -1;

            // stay at the current location. return copy of the cursor, but with stayatflag
            // turned off.
            if (CurrentWord.StayAtFlag == true)
            {
                nonWordPat = CurrentWord.DelimPattern;
                nonWordLoc = CurrentWord.DelimBx;
                wordPart   = CurrentWord.Word;
                wordBx     = CurrentWord.WordBx;
            }

            else
            {
                #region STEP1 setup the begin pos of the next word.
                // ----------------------------- STEP 1 ------------------------------
                // setup the begin pos of the next word.
                int bx;
                {
                    // calc scan start position
                    bx = ScanWord.CalcScanNextStart(ScanStream, Traits, CurrentWord);

                    // advance past whitespace
                    if (bx != -1)
                    {
                        bx = Scanner.ScanNotEqual(ScanStream.Stream, Traits.WhitespacePatterns, bx);
                    }
                }
                // end STEP 1.
                #endregion

                #region STEP 2. Isolate either numeric lib, quoted lit or scan to non word pattern
                // ------------------------------- STEP 2 ----------------------------------
                // Isolate either numeric literal, quoted literal or scan to the next non word
                // pattern.
                LiteralType?litType = null;
                string      litText = null;
                {
                    // got a decimal digit. isolate the numeric literal string.
                    if ((bx != -1) && (Char.IsDigit(ScanStream.Stream[bx]) == true))
                    {
                        var rv = ScanWord.IsolateNumericLiteral(ScanStream, Traits, bx);
                        litType    = rv.Item1;
                        litText    = rv.Item2;
                        nonWordPat = rv.Item3; // the non word pattern immed after numeric literal
                        nonWordIx  = rv.Item4; // pos of foundPat
                    }

                    // got something.  now scan forward for the pattern that delimits the word.
                    else if (bx != -1)
                    {
                        {
                            var rv = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.DelimPatterns);
                            nonWordPat = rv.Item1;
                            nonWordIx  = rv.Item2;
                        }

                        // got a quote char. Isolate the quoted string, then find the delim that follows
                        // the quoted string.
                        if ((nonWordPat != null) &&
                            (nonWordPat.DelimClassification == DelimClassification.Quote) &&
                            (nonWordIx == bx))
                        {
                            var rv = IsolateQuotedWord(ScanStream, Traits, nonWordIx);
                            litType    = rv.Item1;
                            litText    = rv.Item2;
                            nonWordPat = rv.Item3; // the non word pattern immed after quoted literal
                            nonWordIx  = rv.Item4; // pos of foundPat.
                        }
                    }
                }
                // end STEP 2.
                #endregion

                #region STEP 3 - setup wordBx and wordPart with the found word.
                {
                    // got nothing.
                    if (bx == -1)
                    {
                    }

                    // no delim found. word text all the way to the end.
                    else if (nonWordIx == -1)
                    {
                        var rv = ScanWord.IsolateWordText(
                            ScanStream, Traits, litType, litText, bx, null);
                        wordBx   = rv.Item1;
                        wordPart = rv.Item2;

#if skip
                        wordBx = new StreamLocation(bx).ToTextLocation(ScanStream);
                        if (litType != null)
                        {
                            wordPart = new TextWord(litText, WordClassification.Quoted, Traits);
                        }
                        else
                        {
                            wordPart = new TextWord(
                                ScanStream.Substring(bx), WordClassification.Identifier, Traits);
                        }
#endif
                    }

                    // got a word and a non word pattern.
                    else if (nonWordIx > bx)
                    {
                        var rv = ScanWord.IsolateWordText(
                            ScanStream, Traits, litType, litText, bx, nonWordIx);
                        wordBx   = rv.Item1;
                        wordPart = rv.Item2;

#if skip
                        wordBx = new StreamLocation(bx).ToTextLocation(ScanStream);
                        int lx = foundIx - bx;
                        wordPart = new TextWord(
                            ScanStream.Substring(bx, lx), WordClassification.Identifier, Traits);
#endif

                        nonWordLoc = new StreamLocation(nonWordIx).ToTextLocation(ScanStream);
                    }

                    // no word. just delim.
                    else
                    {
                        nonWordLoc = new StreamLocation(nonWordIx).ToTextLocation(ScanStream);

                        // the delim is comment to end. store as a word.
                        if (nonWordPat.DelimClassification == DelimClassification.CommentToEnd)
                        {
                            var rv     = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.NewLinePatterns);
                            var eolPat = rv.Item1;
                            var eolIx  = rv.Item2;
                            if (eolPat == null)
                            {
                                wordBx   = new StreamLocation(nonWordIx).ToTextLocation(ScanStream);
                                wordPart = new TextWord(
                                    ScanStream.Substring(nonWordIx), WordClassification.CommentToEnd, Traits);
                                nonWordLoc = null;
                                nonWordPat = null;
                            }
                            else
                            {
                                wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream);
                                int lx   = eolIx - nonWordIx;
                                var sloc = wordBx.ToStreamLocation(ScanStream);
                                wordPart = new TextWord(
                                    ScanStream.Substring(sloc.Value, lx), WordClassification.CommentToEnd, Traits);
                                nonWordLoc = new StreamLocation(eolIx).ToTextLocation(ScanStream);
                                nonWordPat = eolPat;
                            }
                        }

                        // if the delim pattern is not non word ( a divider ), store the pattern also
                        // as the word.
                        else if (Traits.DelimPatternsThatAreNonWords.Contains(nonWordPat) == false)
                        {
                            wordBx   = nonWordLoc;
                            wordPart = new TextWord(
                                nonWordPat.PatternValue,
                                nonWordPat.DelimClassification.ToWordClassification().Value,
                                Traits);
                        }
                    }
                }
                #endregion

                // delim is whitespace. scan ahead for something more meaningful than whitespace.
                if ((nonWordPat != null) && (Traits.IsWhitespace(nonWordPat)))
                {
                    StreamLocation dx = nonWordLoc.ToStreamLocation(ScanStream);
                    int            fx = Scanner.ScanNotEqual(
                        ScanStream.Stream, Traits.WhitespacePatterns, dx.Value + nonWordPat.Length);
                    var pat = Traits.DelimPatterns.MatchAt(ScanStream.Stream, fx);
                    if (pat != null)
                    {
                        nonWordLoc = new StreamLocation(fx).ToTextLocation(ScanStream);
                        nonWordPat = pat;
                    }
                }
            }

            // store the results in the return cursor.
            ScanWordCursor nx = null;
            if ((wordPart == null) && (nonWordPat == null))
            {
                nx          = new ScanWordCursor( );
                nx.Position = RelativePosition.End;
            }
            else
            {
                nx          = new ScanWordCursor(wordPart, wordBx, nonWordLoc, nonWordPat);
                nx.Position = RelativePosition.At;
            }

            return(nx);
        }
Exemplo n.º 22
0
        // ------------------------ ScanNextAtom -------------------------
        // Scans to the next atom in the string. ( a word being the text bounded by the
        // delimiter and whitespace characters as spcfd in the TextTraits argument )
        // Return null when end of string.
        public static ScanAtomCursor ScanNextAtom(
            ScanStream ScanStream,
            TextTraits Traits, ScanAtomCursor CurrentWord)
        {
            PatternScanResults nonWord = null;

            AtomText atomText = null;
            List <MatchScanPattern> atomTextList = null;
            AtomText whitespaceText = null;

            ScanAtomCode?tokenCode      = null; // ScanAtomCode of this token.
            int?         tokenLx        = null;
            ScanAtomCode?priorTokenCode = null;

            bool?priorCodeIsWhitespaceSignificant = null;

            // stay at the current location. return copy of the cursor, but with stayatflag
            // turned off.
            if (CurrentWord.StayAtFlag == true)
            {
                atomText       = CurrentWord.AtomText;
                tokenCode      = atomText.AtomCode;
                priorTokenCode = null;
                nonWord        = new PatternScanResults(
                    CurrentWord.AtomPattern,
                    CurrentWord.StartLoc.ToStreamLocation(ScanStream).Value,
                    CurrentWord.AtomPattern.Length);
            }

            else
            {
                #region STEP1 setup the begin pos of the next word.
                // ----------------------------- STEP 1 ------------------------------
                // setup the begin pos of the next word.
                int bx;
                {
                    // save the ScanAtomCode of the prior word.
                    if ((CurrentWord.Position == RelativePosition.At) ||
                        (CurrentWord.Position == RelativePosition.After))
                    {
                        priorTokenCode = CurrentWord.AtomCode;
                        priorCodeIsWhitespaceSignificant = CurrentWord.WhitespaceIsSignificant;
                    }

                    // calc scan start position
                    bx = ScanAtom.CalcScanNextStart(ScanStream, Traits, CurrentWord);

                    // advance past whitespace
                    if (bx != -1)
                    {
                        int saveBx = bx;
                        bx = Scanner.ScanNotEqual(ScanStream.Stream, Traits.WhitespacePatterns, bx);

                        // there is some whitespace. Isolate it as AtomText.
                        // This method will return the whitespace as the token. But need to look at
                        // the token before and after to classify the whitespace as significant or
                        // not. ( whitespace between identifiers or keywords is significant.
                        // Whitespace between symbols is not significant.
                        // note: even insignificant whitespace is returned as a token because the
                        //       whitespace is needed when redisplaying the statement text.
                        if (bx != saveBx)
                        {
                            int whitespaceEx = -1;
                            if (bx == -1)
                            {
                                whitespaceEx = ScanStream.Stream.Length - 1;
                            }
                            else
                            {
                                whitespaceEx = bx - 1;
                            }
                            int whitespaceLx = whitespaceEx - saveBx + 1;

                            // split the whitespace between space/tab and EOL
                            {
                                int fx1 = ScanStream.Stream.IndexOfAny(new char[] { ' ', '\t' }, saveBx);
                                int fx2 = ScanStream.Stream.IndexOfAny(new char[] { '\r', '\n' }, saveBx);
                                if (fx1 > whitespaceEx)
                                {
                                    fx1 = -1;
                                }
                                if (fx2 > whitespaceEx)
                                {
                                    fx2 = -1;
                                }
                                if ((fx1 == saveBx) && (fx2 != -1))
                                {
                                    whitespaceEx = fx2 - 1;
                                }
                                if ((fx2 == saveBx) && (fx1 != -1))
                                {
                                    whitespaceEx = fx1 - 1;
                                }
                                whitespaceLx = whitespaceEx - saveBx + 1;
                            }

                            string userCode = null;
                            whitespaceText = new AtomText(
                                ScanAtomCode.Whitespace,
                                ScanStream.Stream.Substring(saveBx, whitespaceLx), " ",
                                new StreamLocation(saveBx).ToTextLocation(ScanStream),
                                new StreamLocation(whitespaceEx).ToTextLocation(ScanStream),
                                userCode);
                        }
                    }
                }
                // end STEP 1.
                #endregion

                #region STEP 2. Isolate either numeric lit, quoted lit or identifier/keyword.
                // ------------------------------- STEP 2 ----------------------------------
                // Isolate either numeric literal, quoted literal or scan to the next non word
                // pattern.
                LiteralType?litType = null;
                string      litText = null;
                {
                    // got a decimal digit. isolate the numeric literal string.
                    if ((bx != -1) && (Char.IsDigit(ScanStream.Stream[bx]) == true))
                    {
                        var rv = Scanner.IsolateNumericLiteral(ScanStream, Traits, bx);
                        litType   = rv.Item1;
                        litText   = rv.Item2;
                        nonWord   = rv.Item3; // the non word pattern immed after numeric literal
                        tokenCode = ScanAtomCode.Numeric;
                    }

                    // got something.  now scan forward for the pattern that delimits the word.
                    else if (bx != -1)
                    {
                        {
                            nonWord = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.DelimPatterns);
                        }

                        // a special value starter. scan further for the spcval word.
                        // If an identifier follows
                        var startPat = nonWord.FindPattern(DelimClassification.SpecialValueStarter);
                        if (startPat != null)
                        {
                            var csr = new ScanAtomCursor(startPat, ScanStream);
                            var nx  = ScanAtom.ScanNextAtom(ScanStream, Traits, csr);
                            if ((nx.Position == RelativePosition.At) &&
                                (nx.AtomCode.IsIdentifier() == true))
                            {
                                atomText = AtomText.Combine(
                                    startPat.AtomText, nx.AtomText, ScanAtomCode.SpecialValue);
                            }
                        }

                        // got the AtomText of the token.
                        if (atomText != null)
                        {
                        }

                        // word chars all the way to the end.
                        else if (nonWord == null)
                        {
                            tokenCode = ScanAtomCode.Identifier;
                            tokenLx   = ScanStream.Stream.Length - bx;
                        }

                        else if (nonWord.FoundAtPosition(DelimClassification.Quote, bx))
                        {
                            var rv = Scanner.IsolateQuotedWord(ScanStream, Traits, bx);
                            litType   = rv.Item1;
                            litText   = rv.Item2;
                            nonWord   = rv.Item3; // the non word pattern immed after quoted literal
                            tokenCode = ScanAtomCode.Quoted;
                        }

                        // delim pattern found past the start of the scan. That means there are
                        // identifier chars from the start of the scan to the found delim.
                        else if (bx != nonWord.Position)
                        {
                            tokenCode = ScanAtomCode.Identifier;
                            tokenLx   = nonWord.Position - bx;
                        }

                        else if (nonWord.IsEmpty == false)
                        {
                            tokenCode =
                                nonWord.FirstFoundPattern.MatchPattern.DelimClassification.ToScanAtomCode();
                        }

                        // should never get here.
                        else
                        {
                            tokenCode = null;
                        }
                    }

                    // attempt to classify the identifier token as a keyword.
                    if (atomText == null)
                    {
                        if ((tokenCode != null) && (tokenCode.Value == ScanAtomCode.Identifier))
                        {
                            var rv = Traits.KeywordPatterns.MatchPatternToSubstring(
                                ScanStream.Stream, bx, tokenLx.Value);
                            var kwdResults = rv.Item3;
                            var kwdPat     = kwdResults.FirstFoundPattern;
                            if (kwdPat != null)
                            {
                                tokenCode = kwdPat.MatchPattern.DelimClassification.ToScanAtomCode();
                                nonWord   = kwdResults;
                            }
                        }
                    }
                }
                // end STEP 2.
                #endregion

                #region STEP 3 - setup atomText of the found token.
                {
                    // got the atomText of the token.
                    if (atomText != null)
                    {
                        nonWord = null;
                    }

                    // got whitespace.
                    else if (whitespaceText != null)
                    {
                        ScanAtomCode wstc = ScanAtomCode.Whitespace;

                        if (priorTokenCode == null)
                        {
                            wstc = ScanAtomCode.InsignificantWhitespace;
                        }
                        else if (tokenCode == null)
                        {
                            wstc = ScanAtomCode.InsignificantWhitespace;
                        }
                        else if ((priorTokenCode.Value.WhitespaceIsSignificant() == true) &&
                                 (tokenCode.Value.WhitespaceIsSignificant() == true))
                        {
                            wstc = ScanAtomCode.Whitespace;
                        }
                        else
                        {
                            wstc = ScanAtomCode.InsignificantWhitespace;
                        }

                        atomText          = whitespaceText;
                        atomText.AtomCode = wstc;
                    }

                    // got nothing.
                    else if (bx == -1)
                    {
                    }

                    // no delim found. word text all the way to the end.
                    else if (nonWord.IsEmpty == true)
                    {
                        if (whitespaceText != null)
                        {
                            atomText = whitespaceText;
                        }

                        else
                        {
                            // get the text from start of scan to end of string.
                            var rv = Scanner.IsolateWordText(
                                ScanStream, Traits, litType, litText, bx, null);
                            atomText = rv.Item3;
                        }
                    }

                    // got a word followed by non word pattern. return the word.
                    else if (nonWord.Position > bx)
                    {
                        if (whitespaceText != null)
                        {
                            atomText = whitespaceText;
                            nonWord  = new PatternScanResults();
                        }

                        else
                        {
                            var rv = Scanner.IsolateWordText(
                                ScanStream, Traits, litType, litText, bx, nonWord.Position);
                            atomText = rv.Item3;
                        }
                    }

                    // no word. just delim.
                    else
                    {
                        // the delim is comment to end. store as a word.
                        if (nonWord.FirstFoundPattern.MatchPattern.DelimClassification ==
                            DelimClassification.CommentToEnd)
                        {
                            var rv = ScanAtom.ClassifyAsComment(ScanStream, Traits, bx);
                            atomText = rv.Item2;
                            nonWord  = rv.Item4;
                        }

                        // the word found is a non word or keyword pattern.
                        else
                        {
                            // got whitespace followed by keyword. Return the whitespace.
                            if ((nonWord.FirstFoundPattern.MatchPattern.DelimClassification
                                 == DelimClassification.Keyword) &&
                                (whitespaceText != null))
                            {
                                atomText = whitespaceText;
                                nonWord  = new PatternScanResults();
                            }

                            // there are more than one scan patterns that match.
                            else if (nonWord.FoundCount > 1)
                            {
                                atomTextList = new List <MatchScanPattern>();

                                foreach (var pat in nonWord)
                                {
                                    pat.AssignAtomText(ScanStream);
                                    atomTextList.Add(pat);
                                }
                            }

                            else
                            {
                                var foundPat = nonWord.FirstFoundPattern;
                                foundPat.AssignAtomText(ScanStream);
                                atomText = foundPat.AtomText;
                            }
                        }
                    }
                }
                #endregion
            }

            // store the results in the return cursor.
            {
                ScanAtomCursor nx = null;
                if (atomText == null)
                {
                    nx          = new ScanAtomCursor();
                    nx.Position = RelativePosition.End;
                }
                else if (atomTextList != null)
                {
                    nx = new ScanAtomCursor(atomTextList);
                }
                else
                {
                    if ((nonWord == null) || (nonWord.IsEmpty == true))
                    {
                        nx = new ScanAtomCursor(atomText, null);
                    }
                    else
                    {
                        nx = new ScanAtomCursor(atomText, nonWord.FirstFoundPattern.MatchPattern);
                    }
                    nx.Position = RelativePosition.At;
                }

                return(nx);
            }
        }
Exemplo n.º 23
0
        XmlUnit CrackUnits_ScanOpenUnit(int InBx)
        {
            Scanner.ScanCharResults res;
            XmlUnit unit = new XmlUnit();

            unit.UnitCode = XmlUnitCode.Open;
            WordCursor nxWord = null;

            BoundedString boundedStream = new BoundedString(mStream);

            // unit starts with "<"
            if (boundedStream[InBx] != '<')
            {
                ThrowIncorrectlyFormedXmlException(InBx);
            }
            unit.Bx = InBx;

            // scan for the end of the unit. ( there should be a > before an < )
            res = Scanner.ScanEqualAny_BypassQuoted(
                boundedStream, InBx + 1, new char[] { '>', '<' }, QuoteEncapsulation.Double);
            if ((res.ResultPos == -1) || (res.ResultChar == '<'))
            {
                ThrowIncorrectlyFormedXmlException(InBx);
            }
            else
            {
                unit.Ex = res.ResultPos;
            }

            // setup to step from word to word in the unit.
            boundedStream = new BoundedString(mStream, InBx + 1, res.ResultPos - 1);
            TextTraits traits = new TextTraits();

            traits.OpenNamedBracedPatterns.Clear( );
            traits.DividerPatterns.Add("/", "=", DelimClassification.DividerSymbol);
            traits.WhitespacePatterns.AddDistinct(
                Environment.NewLine, DelimClassification.Whitespace);

            // isolate the words of the open unit.
            WordCursor csr = Scanner.ScanFirstWord(boundedStream, traits);

            while (true)
            {
                if (csr.IsEndOfString == true)
                {
                    break;
                }

                // the unit name
                if (ScanOpenUnit_CursorAtUnitName(csr) == true)
                {
                    if (unit.NameWord != null)
                    {
                        ThrowIncorrectlyFormedXmlException(InBx); // already have a unit name
                    }
                    else
                    {
                        unit.NameWord = csr;
                    }
                }

                // no word. just the ending "/".  ( handle a little later. )
                else if ((csr.Word == null) && (csr.DelimValue == "/"))
                {
                }
                else if (csr.Word == null)
                {
                    ThrowIncorrectlyFormedXmlException(InBx);
                }

                // handle as an element attribute ( a named value pair )
                else
                {
                    nxWord = ScanOpenUnit_Attribute_GetValue(boundedStream, csr);
                    if (nxWord != null)
                    {
                        // note: attributes values are stored in their xml encoded
                        //       state.
                        unit.AddAttribute(csr, nxWord);
                        csr = nxWord;
                    }
                    else
                    {
                        ThrowIncorrectlyFormedXmlException(InBx);
                    }
                }

                // process the "/" delimeter. ( must be the end of the OpenUnit )
                if (csr.DelimValue == "/")
                {
                    WordCursor nx = Scanner.ScanNextWord(boundedStream, csr);
                    if (nx.IsEndOfString == true)
                    {
                        unit.UnitCode = XmlUnitCode.Single;
                        break;
                    }
                    else
                    {
                        ThrowIncorrectlyFormedXmlException(InBx);
                    }
                }

                csr = Scanner.ScanNextWord(boundedStream, csr);
            }

            return(unit);
        }
Exemplo n.º 24
0
        private XmlUnit CrackUnits_ScanCloseUnit(int InBx)
        {
            Scanner.ScanCharResults res;
            XmlUnit unit = new XmlUnit();

            unit.UnitCode = XmlUnitCode.Close;
            WordCursor csr = null;

            BoundedString boundedStream = new BoundedString(mStream);

            // unit starts with "<"
            if (boundedStream[InBx] != '<')
            {
                ThrowIncorrectlyFormedXmlException(InBx);
            }
            unit.Bx = InBx;

            // scan for the end of the unit. ( there should be a > before an < )
            res = Scanner.ScanEqualAny_BypassQuoted(
                boundedStream, InBx + 1, new char[] { '>', '<' }, QuoteEncapsulation.Double);
            if ((res.ResultPos == -1) || (res.ResultChar == '<'))
            {
                ThrowIncorrectlyFormedXmlException(InBx);
            }
            else
            {
                unit.Ex = res.ResultPos;
            }

            // setup to step from word to word in the close unit.
            boundedStream = new BoundedString(mStream, InBx + 1, res.ResultPos - 1);
            TextTraits traits = new TextTraits();

            traits.OpenNamedBracedPatterns.Clear();
            traits.DividerPatterns.Add("/", "=", DelimClassification.DividerSymbol);

            // first word must be an empty word w/ "/" delim.
            csr = Scanner.ScanFirstWord(boundedStream, traits);
            if ((csr.IsDelimOnly) && (csr.DelimValue == "/"))
            {
            }
            else
            {
                ThrowIncorrectlyFormedXmlException(InBx);
            }

            // next is a name with end of string delim.
            csr = Scanner.ScanNextWord(boundedStream, csr);
            if ((csr.IsEndOfString) ||
                (csr.DelimClass == DelimClassification.EndOfString))
            {
            }
            else
            {
                ThrowIncorrectlyFormedXmlException(InBx);
            }

            // if there is an element name, store it.
            if (csr.Word != null)
            {
                unit.NameWord = csr;
            }

            return(unit);
        }
Exemplo n.º 25
0
 private void ConstructCommon()
 {
     mTraits = new TextTraits();
     mTraits.WhitespacePatterns.Replace(
         new string[] { " ", "\t", "\r", "\n", Environment.NewLine }, DelimClassification.Whitespace);
 }
Exemplo n.º 26
0
        ClassifyAsComment(ScanStream ScanStream, TextTraits Traits, int Bx)
        {
            TextLocation       wordBx     = null;
            AtomText           atomText   = null;
            ScanPattern        nonWordPat = null;
            PatternScanResults nonWord    = null;

            // look prior to see if this comment to the end of the line is the first non
            // blank on the line.
            bool isFirstNonBlankOnLine = false;

            if (Bx == 0)
            {
                isFirstNonBlankOnLine = true;
            }
            else
            {
                // go back to the first non blank.
                int ix = Scanner.ScanReverseNotEqual(
                    ScanStream.Stream, Bx - 1, Traits.WhitespaceWithoutNewLinePatterns);
                if (ix == -1) // nothing but blanks to start of string.
                {
                    isFirstNonBlankOnLine = true;
                }

                else
                {
                    var rv = Traits.NewLinePatterns.MatchFirstPatternEndsAtStringLocation(
                        ScanStream.Stream, ix);
                    var pat   = rv.Item1;
                    var patBx = rv.Item2;

                    // is a new line pattern. there is nothing but spaces between this new line
                    // and the start of the comment.
                    if (pat != null)
                    {
                        isFirstNonBlankOnLine = true;
                    }
                }
            }

            // set the atomCode of this atom depending on if the comment starts the line.
            ScanAtomCode atomCode = ScanAtomCode.CommentToEnd;

            if (isFirstNonBlankOnLine == true)
            {
                atomCode = ScanAtomCode.EntireLineCommentToEnd;
            }

            // scan for a new line. That is the end of the comment.
            {
                nonWord = Scanner.ScanEqualAny(ScanStream.Stream, Bx, Traits.NewLinePatterns);
//        eolPat = rv.Item1;
//        eolIx = rv.Item2;
//        nonWord = rv.Item3;
            }

            // no newline pattern found. Comment to the end of the text stream.
            if (nonWord.IsEmpty == true)
//      if (eolPat == null)
            {
                int ex = ScanStream.Stream.Length - 1;
                wordBx = new StreamLocation(Bx).ToTextLocation(ScanStream);
                TextLocation wordEx      = new StreamLocation(ex).ToTextLocation(ScanStream);
                string       commentText = ScanStream.Substring(Bx);

                string userCode = null;
                atomText = new AtomText(
                    atomCode, commentText, null, wordBx, wordEx,
                    userCode);

//        nonWordPat = eolPat;
            }

            else
            {
                wordBx = new StreamLocation(Bx).ToTextLocation(ScanStream);
                int lx = nonWord.Position - Bx;
//        int lx = eolIx - Bx;
                TextLocation wordEx =
                    new StreamLocation(Bx + lx - 1).ToTextLocation(ScanStream);
                string commentText = ScanStream.Substring(Bx, lx);
                string userCode    = null;
                atomText = new AtomText(
                    atomCode, commentText, null, wordBx, wordEx,
                    userCode);
                var sloc = wordBx.ToStreamLocation(ScanStream);

//        nonWordPat = eolPat;
            }
            return(new Tuple <TextLocation, AtomText, ScanPattern, PatternScanResults>
                       (wordBx, atomText, nonWordPat, nonWord));
        }
Exemplo n.º 27
0
        // -------------------- IsolateWord ---------------------------
        // We have a word starting at InBx. Scan to the end of the word.
        // Returns the word in the InOutResults parm.
        // Returns the word delim in the return argument.
        private static ScanPatternResults IsolateWord(
            string Text,
            int Bx,
            ref WordCursor Results,
            TextTraits Traits)
        {
            int bx;
            ScanPatternResults spr = null;

            bx = Bx;
            char ch1 = Text[bx];

            // is start of either verbatim string literal or quoted literal.
            if (
                ((Traits.VerbatimLiteralPattern != null) &&
                 (Traits.VerbatimLiteralPattern.Match(Text, bx))) ||
                (Traits.IsQuoteChar(ch1) == true)
                )
            {
                var rv = ScanWord.IsolateQuotedWord(Text, bx, Traits);
                bx = rv.Item1;
                int?               ex       = rv.Item2;
                string             wordText = rv.Item3;
                WordClassification wc       = WordClassification.Quoted;
                var                litType  = rv.Item4;
                spr = rv.Item5;
                Results.SetWord(wordText, wc, bx);
                Results.Word.LiteralType = litType;
            }

            else
            {
                // Scan the string for any of the non word patterns spcfd in Traits.
                DelimClassification sprdc = DelimClassification.None;
                int remLx = Text.Length - bx;
                spr = Scanner.ScanEqualAny(Text, bx, remLx, Traits.NonWordPatterns);
                if (spr.IsNotFound == false)
                {
                    sprdc = spr.FoundPat.DelimClassification;
                }

                // a quote character within the name.  this is an error.
                if (sprdc == DelimClassification.Quote)
                {
                    throw new ApplicationException(
                              "quote character immed follows name character at position " +
                              spr.FoundPos.ToString() + " in " + Text);
                }

                // no delim found. all word to the end of the string.
                else if (spr.IsNotFound)
                {
                    string wordText = Text.Substring(Bx);
                    Results.SetWord(wordText, WordClassification.Identifier, Bx);
                }

                // found an open named brace char
                // Open named braced words are words that combine the word and the braced contents.
                // debateable that this feature is needed and should be retained.
                else if (sprdc == DelimClassification.OpenNamedBraced)
                {
                    Scanner.ScanWord_IsolateWord_Braced(
                        Text, bx, spr, ref Results, Traits);
                }

                // delim is same position as the word.  so either the word is the delim ( an
                // expression symbol ) or the word is empty ( the delim is a comma, semicolon,
                // ... a content divider )
                else if (spr.FoundPos == Bx)
                {
                    if ((Traits.NonDividerIsWord == true) &&
                        (Traits.IsDividerDelim(spr.FoundPat.DelimClassification) == false))
                    {
                        Results.SetWord(
                            spr.FoundPat.PatternValue,
                            spr.FoundPat.DelimClassification.ToWordClassification( ).Value,
                            Bx,
                            spr.FoundPat.LeadChar);
                    }

                    // start of CommentToEnd comment. This is a word, not a delim. Find the
                    // end of the comment and set the delim to that end position.
                    else if (sprdc == DelimClassification.CommentToEnd)
                    {
                        spr = Scanner.ScanWord_IsolateWord_CommentToEnd(
                            Text, spr.FoundPos, ref Results, Traits);
                    }

                    else
                    {
                        Results.SetNullWord();
                    }
                }

                // we have a word that ends with a delim.
                else
                {
                    int    lx       = spr.FoundPos - Bx;
                    string wordText = Text.Substring(Bx, lx);
                    Results.SetWord(wordText, WordClassification.Identifier, Bx);
                }
            }

            // return ScanPatternResults of the delim that ends the word.
            return(spr);
        }
        IsolateQuotedWord(
            string Text, int Bx, TextTraits Traits)
        {
            ScanPatternResults spr = null;
            int?        ex         = null;
            string      wordText   = null;
            char        ch1        = Text[Bx];
            LiteralType litType    = LiteralType.none;

            // is start of a verbatim string literal
            if ((Traits.VerbatimLiteralPattern != null) &&
                (Traits.VerbatimLiteralPattern.Match(Text, Bx)))
            {
                var rv = VerbatimLiteral.ScanCloseQuote(
                    Text, Traits.VerbatimLiteralPattern, Bx);
                ex       = rv.Item1;
                wordText = rv.Item2;
                litType  = LiteralType.VerbatimString;
            }

            // is a quoted literal
            else if (Traits.IsQuoteChar(ch1) == true)
            {
                ex = Scanner.ScanCloseQuote(Text, Bx, Traits.QuoteEncapsulation);
                if (ex.Value != -1)
                {
                    int lx = ex.Value - Bx + 1;
                    wordText = Text.Substring(Bx, lx);

                    // correct the following at some point. Should be either string or
                    // char lit.
                    litType = LiteralType.String;
                }
            }

            // not a quoted literal
            if ((ex == null) || (ex.Value == -1))
            {
                throw (new ApplicationException(
                           "Closing quote not found starting at position " +
                           Bx.ToString() + " in " + Text));
            }

            else
            {
                // setup the non word which follows the closing quote.
                int ix = ex.Value + 1;
                if (Text.IsPastEnd(ix))
                {
                    spr = new ScanPatternResults(-1);
                }
                else
                {
                    // the char that follows the closing quote must be a delim
                    int remLx = Text.Length - ix;
                    spr = Scanner.ScanEqualAny(Text, ix, remLx, Traits.NonWordPatterns);
                    if (spr.FoundPos != ix)
                    {
                        throw new ApplicationException(
                                  "invalid char follows close quote at pos " + ix.ToString() +
                                  " in " + Stringer.Head(Text, 80));
                    }
                }
            }

            return(new Tuple <int, int?, string, LiteralType, ScanPatternResults>
                       (Bx, ex, wordText, litType, spr));
        }
Exemplo n.º 29
0
        // ------------------------- ParseAddressString ------------------------
        public static EmailAddress ParseAddressString(string InString)
        {
            TextTraits traits;

            traits = new TextTraits( )
                     .SetQuoteEncapsulation(QuoteEncapsulation.Escape);
            traits.DividerPatterns.AddDistinct(
                new string[] { " ", "\t" }, Text.Enums.DelimClassification.DividerSymbol);
            WordCursor bgnFriendly = null;
            WordCursor endFriendly = null;

            EmailAddress results = new EmailAddress( );

            WordCursor csr = Scanner.PositionBeginWord(InString, traits);

            while (true)
            {
                // advance to the next word in the address string.
                csr = Scanner.ScanNextWord(InString, csr);
                if (csr.IsEndOfString)
                {
                    break;
                }

                // the email address itself is <braced>.
                else if ((csr.Word.Class == WordClassification.ContentBraced) &&
                         (csr.Word.BraceChar == '<'))
                {
                    results.Address = csr.Word.BracedText;
                }

                // comment in the email address string.
                else if ((csr.Word.Class == WordClassification.ContentBraced) &&
                         (csr.Word.BraceChar == '('))
                {
                    results.Comment = csr.Word.BracedText;
                    results.Comment =
                        MimeCommon.DecodeHeaderString_EncodedOnly(results.Comment);
                }

                // word part of the friendly name in the address. extend the word range of
                // the friendly string.
                else
                {
                    if (bgnFriendly == null)
                    {
                        bgnFriendly = csr;
                    }
                    endFriendly = csr;
                }
            }

            // working from the word range, isolate the full friendly name string.
            string fullFriendly = null;

            if ((bgnFriendly != null) && (bgnFriendly == endFriendly))
            {
                fullFriendly = bgnFriendly.Word.ToString( );
            }
            else if (bgnFriendly != null)
            {
                int Bx = bgnFriendly.WordBx;
                int Ex = endFriendly.WordEx;
                fullFriendly = InString.Substring(Bx, Ex - Bx + 1);
            }

            // final decode of the friendly name.  name could be quoted, could contain
            // encoded-words.
            if (fullFriendly != null)
            {
                fullFriendly = MimeCommon.DecodeHeaderString_QuotedEncodedEither(fullFriendly);
            }

            // the friendly name could actually be the email address.
            if (results.Address == null)
            {
                results.Address = fullFriendly;
            }
            else
            {
                results.FriendlyName = fullFriendly;
            }

            return(results);
        }
Exemplo n.º 30
0
        // ------------------------ ScanNextWord -------------------------
        // Scans to the next word in the string. ( a word being the text bounded by the
        // delimeter and whitespace characters as spcfd in the TextTraits argument )
        // Return null when end of string.
        public static WordCursor ScanNextWord(
            string Text, TextTraits Traits, WordCursor CurrentWord)
        {
            int                Bx;
            WordCursor         results = null;
            ScanPatternResults spr     = null;

            // stay at the current location. return copy of the cursor, but with stayatflag
            // turned off.
            if (CurrentWord.StayAtFlag == true)
            {
                WordCursor nx = new WordCursor(CurrentWord);
                nx.StayAtFlag = false;
            }

            else
            {
                // calc scan start position
                Bx = ScanWord.CalcStartBx(Text, CurrentWord);

                // advance past whitespace
                if ((Bx != -1) && (Bx <= (Text.Length - 1)))
                {
                    Bx = Scanner.ScanNotEqual(
                        Text, Bx, Text.Length - 1,
                        CurrentWord.TextTraits.WhitespacePatterns).FoundPos;
                }

                // got the start of something. scan for the delimeter (could be the current char)
                spr = null;
                DelimClassification sprdc = DelimClassification.None;
                if ((Bx != -1) && (Bx <= (Text.Length - 1)))
                {
                    spr =
                        ScanWord.IsolateWord(Text, Bx, ref results, CurrentWord.TextTraits);
                    if (spr.IsNotFound == true)
                    {
                        sprdc = DelimClassification.EndOfString;
                    }
                    else
                    {
                        sprdc = spr.FoundPat.DelimClassification;
                    }
                }

                if (spr == null)
                {
                    results.Position = RelativePosition.End;
                    results.SetDelim(Text, null, -1, DelimClassification.EndOfString);
                }

                else
                {
                    // depending on the word, isolate and store the delim that follows.

                    // OpenNamedBraced. delim is the open brace char.
                    if (results.WordClassification == WordClassification.OpenNamedBraced)
                    {
                        ScanPatternResults spr2;
                        int remLx = Text.Length - Bx;
                        spr2 = Scanner.ScanEqualAny(
                            Text, Bx, remLx, CurrentWord.TextTraits.OpenNamedBracedPatterns);
                        results.SetDelim(
                            Text,
                            spr2.FoundPat.PatternValue,
                            spr2.FoundPos, DelimClassification.OpenNamedBraced);
                    }

                    // OpenContentBraced. word and delim are the same.
                    else if (results.WordClassification == WordClassification.OpenContentBraced)
                    {
                        results.SetDelim(
                            Text,
                            results.Word.Value, results.WordBx, DelimClassification.OpenContentBraced);
                    }

                    // word is CommentToEnd. delim is end of line.
                    else if (results.WordClassification == WordClassification.CommentToEnd)
                    {
                        results.SetDelim(Text, spr, sprdc);
                    }

                    // process the NonWordResults returned by "ScanWord_IsolateWord"
                    else
                    {
                        ScanWord.IsolateDelim(
                            Text, spr, ref results, CurrentWord.TextTraits);
                    }
                }

                // current word position.
                if (results.ScanEx == -1)
                {
                    results.Position = RelativePosition.End;
                    results.SetDelim(Text, null, -1, DelimClassification.EndOfString);
                }
                else
                {
                    results.Position = RelativePosition.At;
                }
            }

            return(results);
        }