// -------------------- IsolateDelim --------------------------- private static void IsolateDelim( string Text, ScanPatternResults PatternResults, ref WordCursor Results, TextTraits Traits) { // did not find a nonword char. must have hit end of string. if (PatternResults.IsNotFound) { Results.DelimClass = DelimClassification.EndOfString; } // we have a delimiter of some kind. else { DelimClassification sprdc = PatternResults.FoundPat.DelimClassification; // delim is whitespace of some sort. Continue to look ahead for a non // whitespace pattern. if (Traits.IsWhitespaceDelimClass(sprdc) == true) { int bx = PatternResults.FoundPos; var spr = Scanner.ScanNotEqual( Text, bx, Text.Length - 1, Traits.WhitespacePatterns); if (spr.FoundPat != null) { } } Results.WhitespaceFollowsWord = false; Results.WhitespaceFollowsDelim = false; Results.DelimIsWhitespace = false; // the delim is a hard delim ( not whitespace ) if (sprdc != DelimClassification.Whitespace) { // Want the openContent brace to be processed as a standalone word. Use // virtual whitespace so the word that this open brace is the delim of will // have what appears to be a whitespace delim. Then the following word will // be the standalone open content brace char. if ((sprdc == DelimClassification.OpenContentBraced) && (Traits.VirtualWhitespace == true)) { Results.SetDelim( Text, null, PatternResults.FoundPos, DelimClassification.VirtualWhitespace); } else { // delim is either as classified in the collection of NonWords or is // a PathPart delim. ScanPattern pat = Traits.GetPathPartDelim( Text, PatternResults.FoundPos); if (pat != null) { Results.SetDelim( Text, pat.PatternValue, PatternResults.FoundPos, DelimClassification.PathSep); } else { Results.SetDelim( Text, PatternResults.FoundPat.PatternValue, PatternResults.FoundPos, sprdc); } } } // whitespace immed follows the word text else { ScanWord.IsolateDelim_WhitespaceFollows( Text, PatternResults, ref Results, Traits); } } }
/// <summary> /// The delim after the word is whitspace. If what follows the whitespace /// is a delim char, then this whitspace is disregarded as the delim, and /// the delim is what follows the whitespace. /// </summary> /// <param name="InBoundedString"></param> /// <param name="InNonWordResults"></param> /// <param name="InOutResults"></param> /// <param name="InTraits"></param> private static void IsolateDelim_WhitespaceFollows( string Text, ScanPatternResults PatternResults, ref WordCursor Results, TextTraits Traits) { Results.WhitespaceFollowsWord = true; ScanPattern nwPat = null; int nwMatchLx = 0; // Look for hard delim after the ws. ScanPatternResults scanResults = Scanner.ScanNotEqual( Text, PatternResults.FoundPos, Text.Length - 1, Traits.WhitespacePatterns); // look that the char after the ws is a nonword. if (scanResults.FoundPos != -1) { var rv = Traits.NonWordPatterns.MatchPatternsAtStringLocation( Text, scanResults.FoundPos, Text.Length - 1); nwPat = rv.Item1; nwMatchLx = rv.Item2; } // the char after the whitespace is a non word (delim) char. if (nwPat != null) { DelimClassification nwdc = nwPat.DelimClassification; // is the delim actually a sep char in a path name. // so the delim is the whitespace. if (Traits.IsPathPartDelim(Text, scanResults.FoundPos)) { ScanWord.IsolateDelim_SetDelimIsWhitespace( Text, Traits, Results, PatternResults.FoundPos); } // is a content open brace char. delim stays as whitespace because // content braces are considered standalone words. else if (nwPat.DelimClassification.IsOpenBraced( )) { ScanWord.IsolateDelim_SetDelimIsWhitespace( Text, Traits, Results, PatternResults.FoundPos); } // is a quote char. the quoted string is considered a word. else if (nwdc == DelimClassification.Quote) { ScanWord.IsolateDelim_SetDelimIsWhitespace( Text, Traits, Results, PatternResults.FoundPos); } // is an actual delim. else { Results.SetDelim( Text, nwPat.PatternValue, scanResults.FoundPos, nwdc); } } // the whitespace char is the delim of record. else { ScanWord.IsolateDelim_SetDelimIsWhitespace( Text, Traits, Results, PatternResults.FoundPos); } }
// ------------------------ ScanNextWord ------------------------- // Scans to the next word in the string. ( a word being the text bounded by the // delimeter and whitespace characters as spcfd in the TextTraits argument ) // Return null when end of string. public static WordCursor ScanNextWord( string Text, TextTraits Traits, WordCursor CurrentWord) { int Bx; WordCursor results = null; ScanPatternResults spr = null; // stay at the current location. return copy of the cursor, but with stayatflag // turned off. if (CurrentWord.StayAtFlag == true) { WordCursor nx = new WordCursor(CurrentWord); nx.StayAtFlag = false; } else { // calc scan start position Bx = ScanWord.CalcStartBx(Text, CurrentWord); // advance past whitespace if ((Bx != -1) && (Bx <= (Text.Length - 1))) { Bx = Scanner.ScanNotEqual( Text, Bx, Text.Length - 1, CurrentWord.TextTraits.WhitespacePatterns).FoundPos; } // got the start of something. scan for the delimeter (could be the current char) spr = null; DelimClassification sprdc = DelimClassification.None; if ((Bx != -1) && (Bx <= (Text.Length - 1))) { spr = ScanWord.IsolateWord(Text, Bx, ref results, CurrentWord.TextTraits); if (spr.IsNotFound == true) { sprdc = DelimClassification.EndOfString; } else { sprdc = spr.FoundPat.DelimClassification; } } if (spr == null) { results.Position = RelativePosition.End; results.SetDelim(Text, null, -1, DelimClassification.EndOfString); } else { // depending on the word, isolate and store the delim that follows. // OpenNamedBraced. delim is the open brace char. if (results.WordClassification == WordClassification.OpenNamedBraced) { ScanPatternResults spr2; int remLx = Text.Length - Bx; spr2 = Scanner.ScanEqualAny( Text, Bx, remLx, CurrentWord.TextTraits.OpenNamedBracedPatterns); results.SetDelim( Text, spr2.FoundPat.PatternValue, spr2.FoundPos, DelimClassification.OpenNamedBraced); } // OpenContentBraced. word and delim are the same. else if (results.WordClassification == WordClassification.OpenContentBraced) { results.SetDelim( Text, results.Word.Value, results.WordBx, DelimClassification.OpenContentBraced); } // word is CommentToEnd. delim is end of line. else if (results.WordClassification == WordClassification.CommentToEnd) { results.SetDelim(Text, spr, sprdc); } // process the NonWordResults returned by "ScanWord_IsolateWord" else { ScanWord.IsolateDelim( Text, spr, ref results, CurrentWord.TextTraits); } } // current word position. if (results.ScanEx == -1) { results.Position = RelativePosition.End; results.SetDelim(Text, null, -1, DelimClassification.EndOfString); } else { results.Position = RelativePosition.At; } } return(results); }
// -------------------- IsolateWord --------------------------- // We have a word starting at InBx. Scan to the end of the word. // Returns the word in the InOutResults parm. // Returns the word delim in the return argument. private static ScanPatternResults IsolateWord( string Text, int Bx, ref WordCursor Results, TextTraits Traits) { int bx; ScanPatternResults spr = null; bx = Bx; char ch1 = Text[bx]; // is start of either verbatim string literal or quoted literal. if ( ((Traits.VerbatimLiteralPattern != null) && (Traits.VerbatimLiteralPattern.Match(Text, bx))) || (Traits.IsQuoteChar(ch1) == true) ) { var rv = ScanWord.IsolateQuotedWord(Text, bx, Traits); bx = rv.Item1; int? ex = rv.Item2; string wordText = rv.Item3; WordClassification wc = WordClassification.Quoted; var litType = rv.Item4; spr = rv.Item5; Results.SetWord(wordText, wc, bx); Results.Word.LiteralType = litType; } else { // Scan the string for any of the non word patterns spcfd in Traits. DelimClassification sprdc = DelimClassification.None; int remLx = Text.Length - bx; spr = Scanner.ScanEqualAny(Text, bx, remLx, Traits.NonWordPatterns); if (spr.IsNotFound == false) { sprdc = spr.FoundPat.DelimClassification; } // a quote character within the name. this is an error. if (sprdc == DelimClassification.Quote) { throw new ApplicationException( "quote character immed follows name character at position " + spr.FoundPos.ToString() + " in " + Text); } // no delim found. all word to the end of the string. else if (spr.IsNotFound) { string wordText = Text.Substring(Bx); Results.SetWord(wordText, WordClassification.Identifier, Bx); } // found an open named brace char // Open named braced words are words that combine the word and the braced contents. // debateable that this feature is needed and should be retained. else if (sprdc == DelimClassification.OpenNamedBraced) { Scanner.ScanWord_IsolateWord_Braced( Text, bx, spr, ref Results, Traits); } // delim is same position as the word. so either the word is the delim ( an // expression symbol ) or the word is empty ( the delim is a comma, semicolon, // ... a content divider ) else if (spr.FoundPos == Bx) { if ((Traits.NonDividerIsWord == true) && (Traits.IsDividerDelim(spr.FoundPat.DelimClassification) == false)) { Results.SetWord( spr.FoundPat.PatternValue, spr.FoundPat.DelimClassification.ToWordClassification( ).Value, Bx, spr.FoundPat.LeadChar); } // start of CommentToEnd comment. This is a word, not a delim. Find the // end of the comment and set the delim to that end position. else if (sprdc == DelimClassification.CommentToEnd) { spr = Scanner.ScanWord_IsolateWord_CommentToEnd( Text, spr.FoundPos, ref Results, Traits); } else { Results.SetNullWord(); } } // we have a word that ends with a delim. else { int lx = spr.FoundPos - Bx; string wordText = Text.Substring(Bx, lx); Results.SetWord(wordText, WordClassification.Identifier, Bx); } } // return ScanPatternResults of the delim that ends the word. return(spr); }
// ------------------------ ScanNextWord ------------------------- // Scans to the next word in the string. ( a word being the text bounded by the // delimeter and whitespace characters as spcfd in the TextTraits argument ) // Return null when end of string. public static ScanWordCursor ScanNextWord( ScanStream ScanStream, TextTraits Traits, ScanWordCursor CurrentWord) { // components of the next word. TextWord wordPart = null; TextLocation wordBx = null; ScanPattern nonWordPat = null; TextLocation nonWordLoc = null; int nonWordIx = -1; // stay at the current location. return copy of the cursor, but with stayatflag // turned off. if (CurrentWord.StayAtFlag == true) { nonWordPat = CurrentWord.DelimPattern; nonWordLoc = CurrentWord.DelimBx; wordPart = CurrentWord.Word; wordBx = CurrentWord.WordBx; } else { #region STEP1 setup the begin pos of the next word. // ----------------------------- STEP 1 ------------------------------ // setup the begin pos of the next word. int bx; { // calc scan start position bx = ScanWord.CalcScanNextStart(ScanStream, Traits, CurrentWord); // advance past whitespace if (bx != -1) { bx = Scanner.ScanNotEqual(ScanStream.Stream, Traits.WhitespacePatterns, bx); } } // end STEP 1. #endregion #region STEP 2. Isolate either numeric lib, quoted lit or scan to non word pattern // ------------------------------- STEP 2 ---------------------------------- // Isolate either numeric literal, quoted literal or scan to the next non word // pattern. LiteralType?litType = null; string litText = null; { // got a decimal digit. isolate the numeric literal string. if ((bx != -1) && (Char.IsDigit(ScanStream.Stream[bx]) == true)) { var rv = ScanWord.IsolateNumericLiteral(ScanStream, Traits, bx); litType = rv.Item1; litText = rv.Item2; nonWordPat = rv.Item3; // the non word pattern immed after numeric literal nonWordIx = rv.Item4; // pos of foundPat } // got something. now scan forward for the pattern that delimits the word. else if (bx != -1) { { var rv = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.DelimPatterns); nonWordPat = rv.Item1; nonWordIx = rv.Item2; } // got a quote char. Isolate the quoted string, then find the delim that follows // the quoted string. if ((nonWordPat != null) && (nonWordPat.DelimClassification == DelimClassification.Quote) && (nonWordIx == bx)) { var rv = IsolateQuotedWord(ScanStream, Traits, nonWordIx); litType = rv.Item1; litText = rv.Item2; nonWordPat = rv.Item3; // the non word pattern immed after quoted literal nonWordIx = rv.Item4; // pos of foundPat. } } } // end STEP 2. #endregion #region STEP 3 - setup wordBx and wordPart with the found word. { // got nothing. if (bx == -1) { } // no delim found. word text all the way to the end. else if (nonWordIx == -1) { var rv = ScanWord.IsolateWordText( ScanStream, Traits, litType, litText, bx, null); wordBx = rv.Item1; wordPart = rv.Item2; #if skip wordBx = new StreamLocation(bx).ToTextLocation(ScanStream); if (litType != null) { wordPart = new TextWord(litText, WordClassification.Quoted, Traits); } else { wordPart = new TextWord( ScanStream.Substring(bx), WordClassification.Identifier, Traits); } #endif } // got a word and a non word pattern. else if (nonWordIx > bx) { var rv = ScanWord.IsolateWordText( ScanStream, Traits, litType, litText, bx, nonWordIx); wordBx = rv.Item1; wordPart = rv.Item2; #if skip wordBx = new StreamLocation(bx).ToTextLocation(ScanStream); int lx = foundIx - bx; wordPart = new TextWord( ScanStream.Substring(bx, lx), WordClassification.Identifier, Traits); #endif nonWordLoc = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); } // no word. just delim. else { nonWordLoc = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); // the delim is comment to end. store as a word. if (nonWordPat.DelimClassification == DelimClassification.CommentToEnd) { var rv = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.NewLinePatterns); var eolPat = rv.Item1; var eolIx = rv.Item2; if (eolPat == null) { wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); wordPart = new TextWord( ScanStream.Substring(nonWordIx), WordClassification.CommentToEnd, Traits); nonWordLoc = null; nonWordPat = null; } else { wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); int lx = eolIx - nonWordIx; var sloc = wordBx.ToStreamLocation(ScanStream); wordPart = new TextWord( ScanStream.Substring(sloc.Value, lx), WordClassification.CommentToEnd, Traits); nonWordLoc = new StreamLocation(eolIx).ToTextLocation(ScanStream); nonWordPat = eolPat; } } // if the delim pattern is not non word ( a divider ), store the pattern also // as the word. else if (Traits.DelimPatternsThatAreNonWords.Contains(nonWordPat) == false) { wordBx = nonWordLoc; wordPart = new TextWord( nonWordPat.PatternValue, nonWordPat.DelimClassification.ToWordClassification().Value, Traits); } } } #endregion // delim is whitespace. scan ahead for something more meaningful than whitespace. if ((nonWordPat != null) && (Traits.IsWhitespace(nonWordPat))) { StreamLocation dx = nonWordLoc.ToStreamLocation(ScanStream); int fx = Scanner.ScanNotEqual( ScanStream.Stream, Traits.WhitespacePatterns, dx.Value + nonWordPat.Length); var pat = Traits.DelimPatterns.MatchAt(ScanStream.Stream, fx); if (pat != null) { nonWordLoc = new StreamLocation(fx).ToTextLocation(ScanStream); nonWordPat = pat; } } } // store the results in the return cursor. ScanWordCursor nx = null; if ((wordPart == null) && (nonWordPat == null)) { nx = new ScanWordCursor( ); nx.Position = RelativePosition.End; } else { nx = new ScanWordCursor(wordPart, wordBx, nonWordLoc, nonWordPat); nx.Position = RelativePosition.At; } return(nx); }