Example #1
0
        // -------------------- IsolateDelim ---------------------------
        private static void IsolateDelim(
            string Text,
            ScanPatternResults PatternResults,
            ref WordCursor Results,
            TextTraits Traits)
        {
            // did not find a nonword char.  must have hit end of string.
            if (PatternResults.IsNotFound)
            {
                Results.DelimClass = DelimClassification.EndOfString;
            }

            // we have a delimiter of some kind.
            else
            {
                DelimClassification sprdc = PatternResults.FoundPat.DelimClassification;

                // delim is whitespace of some sort. Continue to look ahead for a non
                // whitespace pattern.
                if (Traits.IsWhitespaceDelimClass(sprdc) == true)
                {
                    int bx  = PatternResults.FoundPos;
                    var spr = Scanner.ScanNotEqual(
                        Text, bx, Text.Length - 1,
                        Traits.WhitespacePatterns);
                    if (spr.FoundPat != null)
                    {
                    }
                }

                Results.WhitespaceFollowsWord  = false;
                Results.WhitespaceFollowsDelim = false;
                Results.DelimIsWhitespace      = false;

                // the delim is a hard delim ( not whitespace )
                if (sprdc != DelimClassification.Whitespace)
                {
                    // Want the openContent brace to be processed as a standalone word. Use
                    // virtual whitespace so the word that this open brace is the delim of will
                    // have what appears to be a whitespace delim. Then the following word will
                    // be the standalone open content brace char.
                    if ((sprdc == DelimClassification.OpenContentBraced) &&
                        (Traits.VirtualWhitespace == true))
                    {
                        Results.SetDelim(
                            Text,
                            null, PatternResults.FoundPos, DelimClassification.VirtualWhitespace);
                    }
                    else
                    {
                        // delim is either as classified in the collection of NonWords or is
                        // a PathPart delim.
                        ScanPattern pat = Traits.GetPathPartDelim(
                            Text, PatternResults.FoundPos);
                        if (pat != null)
                        {
                            Results.SetDelim(
                                Text,
                                pat.PatternValue,
                                PatternResults.FoundPos,
                                DelimClassification.PathSep);
                        }
                        else
                        {
                            Results.SetDelim(
                                Text,
                                PatternResults.FoundPat.PatternValue,
                                PatternResults.FoundPos,
                                sprdc);
                        }
                    }
                }

                // whitespace immed follows the word text
                else
                {
                    ScanWord.IsolateDelim_WhitespaceFollows(
                        Text, PatternResults, ref Results, Traits);
                }
            }
        }
Example #2
0
        /// <summary>
        /// The delim after the word is whitspace. If what follows the whitespace
        /// is a delim char, then this whitspace is disregarded as the delim, and
        /// the delim is what follows the whitespace.
        /// </summary>
        /// <param name="InBoundedString"></param>
        /// <param name="InNonWordResults"></param>
        /// <param name="InOutResults"></param>
        /// <param name="InTraits"></param>
        private static void IsolateDelim_WhitespaceFollows(
            string Text,
            ScanPatternResults PatternResults,
            ref WordCursor Results,
            TextTraits Traits)
        {
            Results.WhitespaceFollowsWord = true;
            ScanPattern nwPat     = null;
            int         nwMatchLx = 0;

            // Look for hard delim after the ws.
            ScanPatternResults scanResults =
                Scanner.ScanNotEqual(
                    Text, PatternResults.FoundPos, Text.Length - 1,
                    Traits.WhitespacePatterns);

            // look that the char after the ws is a nonword.
            if (scanResults.FoundPos != -1)
            {
                var rv = Traits.NonWordPatterns.MatchPatternsAtStringLocation(
                    Text, scanResults.FoundPos, Text.Length - 1);
                nwPat     = rv.Item1;
                nwMatchLx = rv.Item2;
            }

            // the char after the whitespace is a non word (delim) char.
            if (nwPat != null)
            {
                DelimClassification nwdc = nwPat.DelimClassification;

                // is the delim actually a sep char in a path name.
                // so the delim is the whitespace.
                if (Traits.IsPathPartDelim(Text, scanResults.FoundPos))
                {
                    ScanWord.IsolateDelim_SetDelimIsWhitespace(
                        Text, Traits, Results, PatternResults.FoundPos);
                }

                // is a content open brace char. delim stays as whitespace because
                // content braces are considered standalone words.
                else if (nwPat.DelimClassification.IsOpenBraced( ))
                {
                    ScanWord.IsolateDelim_SetDelimIsWhitespace(
                        Text, Traits, Results, PatternResults.FoundPos);
                }

                // is a quote char. the quoted string is considered a word.
                else if (nwdc == DelimClassification.Quote)
                {
                    ScanWord.IsolateDelim_SetDelimIsWhitespace(
                        Text, Traits, Results, PatternResults.FoundPos);
                }

                // is an actual delim.
                else
                {
                    Results.SetDelim(
                        Text,
                        nwPat.PatternValue, scanResults.FoundPos, nwdc);
                }
            }

            // the whitespace char is the delim of record.
            else
            {
                ScanWord.IsolateDelim_SetDelimIsWhitespace(
                    Text, Traits, Results, PatternResults.FoundPos);
            }
        }
Example #3
0
        // ------------------------ ScanNextWord -------------------------
        // Scans to the next word in the string. ( a word being the text bounded by the
        // delimeter and whitespace characters as spcfd in the TextTraits argument )
        // Return null when end of string.
        public static WordCursor ScanNextWord(
            string Text, TextTraits Traits, WordCursor CurrentWord)
        {
            int                Bx;
            WordCursor         results = null;
            ScanPatternResults spr     = null;

            // stay at the current location. return copy of the cursor, but with stayatflag
            // turned off.
            if (CurrentWord.StayAtFlag == true)
            {
                WordCursor nx = new WordCursor(CurrentWord);
                nx.StayAtFlag = false;
            }

            else
            {
                // calc scan start position
                Bx = ScanWord.CalcStartBx(Text, CurrentWord);

                // advance past whitespace
                if ((Bx != -1) && (Bx <= (Text.Length - 1)))
                {
                    Bx = Scanner.ScanNotEqual(
                        Text, Bx, Text.Length - 1,
                        CurrentWord.TextTraits.WhitespacePatterns).FoundPos;
                }

                // got the start of something. scan for the delimeter (could be the current char)
                spr = null;
                DelimClassification sprdc = DelimClassification.None;
                if ((Bx != -1) && (Bx <= (Text.Length - 1)))
                {
                    spr =
                        ScanWord.IsolateWord(Text, Bx, ref results, CurrentWord.TextTraits);
                    if (spr.IsNotFound == true)
                    {
                        sprdc = DelimClassification.EndOfString;
                    }
                    else
                    {
                        sprdc = spr.FoundPat.DelimClassification;
                    }
                }

                if (spr == null)
                {
                    results.Position = RelativePosition.End;
                    results.SetDelim(Text, null, -1, DelimClassification.EndOfString);
                }

                else
                {
                    // depending on the word, isolate and store the delim that follows.

                    // OpenNamedBraced. delim is the open brace char.
                    if (results.WordClassification == WordClassification.OpenNamedBraced)
                    {
                        ScanPatternResults spr2;
                        int remLx = Text.Length - Bx;
                        spr2 = Scanner.ScanEqualAny(
                            Text, Bx, remLx, CurrentWord.TextTraits.OpenNamedBracedPatterns);
                        results.SetDelim(
                            Text,
                            spr2.FoundPat.PatternValue,
                            spr2.FoundPos, DelimClassification.OpenNamedBraced);
                    }

                    // OpenContentBraced. word and delim are the same.
                    else if (results.WordClassification == WordClassification.OpenContentBraced)
                    {
                        results.SetDelim(
                            Text,
                            results.Word.Value, results.WordBx, DelimClassification.OpenContentBraced);
                    }

                    // word is CommentToEnd. delim is end of line.
                    else if (results.WordClassification == WordClassification.CommentToEnd)
                    {
                        results.SetDelim(Text, spr, sprdc);
                    }

                    // process the NonWordResults returned by "ScanWord_IsolateWord"
                    else
                    {
                        ScanWord.IsolateDelim(
                            Text, spr, ref results, CurrentWord.TextTraits);
                    }
                }

                // current word position.
                if (results.ScanEx == -1)
                {
                    results.Position = RelativePosition.End;
                    results.SetDelim(Text, null, -1, DelimClassification.EndOfString);
                }
                else
                {
                    results.Position = RelativePosition.At;
                }
            }

            return(results);
        }
        // -------------------- IsolateWord ---------------------------
        // We have a word starting at InBx. Scan to the end of the word.
        // Returns the word in the InOutResults parm.
        // Returns the word delim in the return argument.
        private static ScanPatternResults IsolateWord(
            string Text,
            int Bx,
            ref WordCursor Results,
            TextTraits Traits)
        {
            int bx;
            ScanPatternResults spr = null;

            bx = Bx;
            char ch1 = Text[bx];

            // is start of either verbatim string literal or quoted literal.
            if (
                ((Traits.VerbatimLiteralPattern != null) &&
                 (Traits.VerbatimLiteralPattern.Match(Text, bx))) ||
                (Traits.IsQuoteChar(ch1) == true)
                )
            {
                var rv = ScanWord.IsolateQuotedWord(Text, bx, Traits);
                bx = rv.Item1;
                int?               ex       = rv.Item2;
                string             wordText = rv.Item3;
                WordClassification wc       = WordClassification.Quoted;
                var                litType  = rv.Item4;
                spr = rv.Item5;
                Results.SetWord(wordText, wc, bx);
                Results.Word.LiteralType = litType;
            }

            else
            {
                // Scan the string for any of the non word patterns spcfd in Traits.
                DelimClassification sprdc = DelimClassification.None;
                int remLx = Text.Length - bx;
                spr = Scanner.ScanEqualAny(Text, bx, remLx, Traits.NonWordPatterns);
                if (spr.IsNotFound == false)
                {
                    sprdc = spr.FoundPat.DelimClassification;
                }

                // a quote character within the name.  this is an error.
                if (sprdc == DelimClassification.Quote)
                {
                    throw new ApplicationException(
                              "quote character immed follows name character at position " +
                              spr.FoundPos.ToString() + " in " + Text);
                }

                // no delim found. all word to the end of the string.
                else if (spr.IsNotFound)
                {
                    string wordText = Text.Substring(Bx);
                    Results.SetWord(wordText, WordClassification.Identifier, Bx);
                }

                // found an open named brace char
                // Open named braced words are words that combine the word and the braced contents.
                // debateable that this feature is needed and should be retained.
                else if (sprdc == DelimClassification.OpenNamedBraced)
                {
                    Scanner.ScanWord_IsolateWord_Braced(
                        Text, bx, spr, ref Results, Traits);
                }

                // delim is same position as the word.  so either the word is the delim ( an
                // expression symbol ) or the word is empty ( the delim is a comma, semicolon,
                // ... a content divider )
                else if (spr.FoundPos == Bx)
                {
                    if ((Traits.NonDividerIsWord == true) &&
                        (Traits.IsDividerDelim(spr.FoundPat.DelimClassification) == false))
                    {
                        Results.SetWord(
                            spr.FoundPat.PatternValue,
                            spr.FoundPat.DelimClassification.ToWordClassification( ).Value,
                            Bx,
                            spr.FoundPat.LeadChar);
                    }

                    // start of CommentToEnd comment. This is a word, not a delim. Find the
                    // end of the comment and set the delim to that end position.
                    else if (sprdc == DelimClassification.CommentToEnd)
                    {
                        spr = Scanner.ScanWord_IsolateWord_CommentToEnd(
                            Text, spr.FoundPos, ref Results, Traits);
                    }

                    else
                    {
                        Results.SetNullWord();
                    }
                }

                // we have a word that ends with a delim.
                else
                {
                    int    lx       = spr.FoundPos - Bx;
                    string wordText = Text.Substring(Bx, lx);
                    Results.SetWord(wordText, WordClassification.Identifier, Bx);
                }
            }

            // return ScanPatternResults of the delim that ends the word.
            return(spr);
        }
Example #5
0
        // ------------------------ ScanNextWord -------------------------
        // Scans to the next word in the string. ( a word being the text bounded by the
        // delimeter and whitespace characters as spcfd in the TextTraits argument )
        // Return null when end of string.
        public static ScanWordCursor ScanNextWord(
            ScanStream ScanStream,
            TextTraits Traits, ScanWordCursor CurrentWord)
        {
            // components of the next word.
            TextWord     wordPart   = null;
            TextLocation wordBx     = null;
            ScanPattern  nonWordPat = null;
            TextLocation nonWordLoc = null;
            int          nonWordIx  = -1;

            // stay at the current location. return copy of the cursor, but with stayatflag
            // turned off.
            if (CurrentWord.StayAtFlag == true)
            {
                nonWordPat = CurrentWord.DelimPattern;
                nonWordLoc = CurrentWord.DelimBx;
                wordPart   = CurrentWord.Word;
                wordBx     = CurrentWord.WordBx;
            }

            else
            {
                #region STEP1 setup the begin pos of the next word.
                // ----------------------------- STEP 1 ------------------------------
                // setup the begin pos of the next word.
                int bx;
                {
                    // calc scan start position
                    bx = ScanWord.CalcScanNextStart(ScanStream, Traits, CurrentWord);

                    // advance past whitespace
                    if (bx != -1)
                    {
                        bx = Scanner.ScanNotEqual(ScanStream.Stream, Traits.WhitespacePatterns, bx);
                    }
                }
                // end STEP 1.
                #endregion

                #region STEP 2. Isolate either numeric lib, quoted lit or scan to non word pattern
                // ------------------------------- STEP 2 ----------------------------------
                // Isolate either numeric literal, quoted literal or scan to the next non word
                // pattern.
                LiteralType?litType = null;
                string      litText = null;
                {
                    // got a decimal digit. isolate the numeric literal string.
                    if ((bx != -1) && (Char.IsDigit(ScanStream.Stream[bx]) == true))
                    {
                        var rv = ScanWord.IsolateNumericLiteral(ScanStream, Traits, bx);
                        litType    = rv.Item1;
                        litText    = rv.Item2;
                        nonWordPat = rv.Item3; // the non word pattern immed after numeric literal
                        nonWordIx  = rv.Item4; // pos of foundPat
                    }

                    // got something.  now scan forward for the pattern that delimits the word.
                    else if (bx != -1)
                    {
                        {
                            var rv = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.DelimPatterns);
                            nonWordPat = rv.Item1;
                            nonWordIx  = rv.Item2;
                        }

                        // got a quote char. Isolate the quoted string, then find the delim that follows
                        // the quoted string.
                        if ((nonWordPat != null) &&
                            (nonWordPat.DelimClassification == DelimClassification.Quote) &&
                            (nonWordIx == bx))
                        {
                            var rv = IsolateQuotedWord(ScanStream, Traits, nonWordIx);
                            litType    = rv.Item1;
                            litText    = rv.Item2;
                            nonWordPat = rv.Item3; // the non word pattern immed after quoted literal
                            nonWordIx  = rv.Item4; // pos of foundPat.
                        }
                    }
                }
                // end STEP 2.
                #endregion

                #region STEP 3 - setup wordBx and wordPart with the found word.
                {
                    // got nothing.
                    if (bx == -1)
                    {
                    }

                    // no delim found. word text all the way to the end.
                    else if (nonWordIx == -1)
                    {
                        var rv = ScanWord.IsolateWordText(
                            ScanStream, Traits, litType, litText, bx, null);
                        wordBx   = rv.Item1;
                        wordPart = rv.Item2;

#if skip
                        wordBx = new StreamLocation(bx).ToTextLocation(ScanStream);
                        if (litType != null)
                        {
                            wordPart = new TextWord(litText, WordClassification.Quoted, Traits);
                        }
                        else
                        {
                            wordPart = new TextWord(
                                ScanStream.Substring(bx), WordClassification.Identifier, Traits);
                        }
#endif
                    }

                    // got a word and a non word pattern.
                    else if (nonWordIx > bx)
                    {
                        var rv = ScanWord.IsolateWordText(
                            ScanStream, Traits, litType, litText, bx, nonWordIx);
                        wordBx   = rv.Item1;
                        wordPart = rv.Item2;

#if skip
                        wordBx = new StreamLocation(bx).ToTextLocation(ScanStream);
                        int lx = foundIx - bx;
                        wordPart = new TextWord(
                            ScanStream.Substring(bx, lx), WordClassification.Identifier, Traits);
#endif

                        nonWordLoc = new StreamLocation(nonWordIx).ToTextLocation(ScanStream);
                    }

                    // no word. just delim.
                    else
                    {
                        nonWordLoc = new StreamLocation(nonWordIx).ToTextLocation(ScanStream);

                        // the delim is comment to end. store as a word.
                        if (nonWordPat.DelimClassification == DelimClassification.CommentToEnd)
                        {
                            var rv     = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.NewLinePatterns);
                            var eolPat = rv.Item1;
                            var eolIx  = rv.Item2;
                            if (eolPat == null)
                            {
                                wordBx   = new StreamLocation(nonWordIx).ToTextLocation(ScanStream);
                                wordPart = new TextWord(
                                    ScanStream.Substring(nonWordIx), WordClassification.CommentToEnd, Traits);
                                nonWordLoc = null;
                                nonWordPat = null;
                            }
                            else
                            {
                                wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream);
                                int lx   = eolIx - nonWordIx;
                                var sloc = wordBx.ToStreamLocation(ScanStream);
                                wordPart = new TextWord(
                                    ScanStream.Substring(sloc.Value, lx), WordClassification.CommentToEnd, Traits);
                                nonWordLoc = new StreamLocation(eolIx).ToTextLocation(ScanStream);
                                nonWordPat = eolPat;
                            }
                        }

                        // if the delim pattern is not non word ( a divider ), store the pattern also
                        // as the word.
                        else if (Traits.DelimPatternsThatAreNonWords.Contains(nonWordPat) == false)
                        {
                            wordBx   = nonWordLoc;
                            wordPart = new TextWord(
                                nonWordPat.PatternValue,
                                nonWordPat.DelimClassification.ToWordClassification().Value,
                                Traits);
                        }
                    }
                }
                #endregion

                // delim is whitespace. scan ahead for something more meaningful than whitespace.
                if ((nonWordPat != null) && (Traits.IsWhitespace(nonWordPat)))
                {
                    StreamLocation dx = nonWordLoc.ToStreamLocation(ScanStream);
                    int            fx = Scanner.ScanNotEqual(
                        ScanStream.Stream, Traits.WhitespacePatterns, dx.Value + nonWordPat.Length);
                    var pat = Traits.DelimPatterns.MatchAt(ScanStream.Stream, fx);
                    if (pat != null)
                    {
                        nonWordLoc = new StreamLocation(fx).ToTextLocation(ScanStream);
                        nonWordPat = pat;
                    }
                }
            }

            // store the results in the return cursor.
            ScanWordCursor nx = null;
            if ((wordPart == null) && (nonWordPat == null))
            {
                nx          = new ScanWordCursor( );
                nx.Position = RelativePosition.End;
            }
            else
            {
                nx          = new ScanWordCursor(wordPart, wordBx, nonWordLoc, nonWordPat);
                nx.Position = RelativePosition.At;
            }

            return(nx);
        }