// --------------------------- IsolateDelim_SetDelimIsWhitespace ---------- private static void IsolateDelim_SetDelimIsWhitespace( string Text, TextTraits Traits, WordCursor Results, int WsIx) { // store the actual string of whitespace characters. ( the whitespace can be // checked later to see if it contains tabs or newlines ) ScanPatternResults spr = Scanner.ScanNotEqual( Text, WsIx, Text.Length - 1, Traits.WhitespacePatterns); string delimVlu = spr.ScannedOverString; Results.SetDelim( Text, delimVlu, WsIx, DelimClassification.Whitespace); Results.DelimIsWhitespace = true; }
// -------------------- IsolateDelim --------------------------- private static void IsolateDelim( string Text, ScanPatternResults PatternResults, ref WordCursor Results, TextTraits Traits) { // did not find a nonword char. must have hit end of string. if (PatternResults.IsNotFound) { Results.DelimClass = DelimClassification.EndOfString; } // we have a delimiter of some kind. else { DelimClassification sprdc = PatternResults.FoundPat.DelimClassification; // delim is whitespace of some sort. Continue to look ahead for a non // whitespace pattern. if (Traits.IsWhitespaceDelimClass(sprdc) == true) { int bx = PatternResults.FoundPos; var spr = Scanner.ScanNotEqual( Text, bx, Text.Length - 1, Traits.WhitespacePatterns); if (spr.FoundPat != null) { } } Results.WhitespaceFollowsWord = false; Results.WhitespaceFollowsDelim = false; Results.DelimIsWhitespace = false; // the delim is a hard delim ( not whitespace ) if (sprdc != DelimClassification.Whitespace) { // Want the openContent brace to be processed as a standalone word. Use // virtual whitespace so the word that this open brace is the delim of will // have what appears to be a whitespace delim. Then the following word will // be the standalone open content brace char. if ((sprdc == DelimClassification.OpenContentBraced) && (Traits.VirtualWhitespace == true)) { Results.SetDelim( Text, null, PatternResults.FoundPos, DelimClassification.VirtualWhitespace); } else { // delim is either as classified in the collection of NonWords or is // a PathPart delim. ScanPattern pat = Traits.GetPathPartDelim( Text, PatternResults.FoundPos); if (pat != null) { Results.SetDelim( Text, pat.PatternValue, PatternResults.FoundPos, DelimClassification.PathSep); } else { Results.SetDelim( Text, PatternResults.FoundPat.PatternValue, PatternResults.FoundPos, sprdc); } } } // whitespace immed follows the word text else { ScanWord.IsolateDelim_WhitespaceFollows( Text, PatternResults, ref Results, Traits); } } }
/// <summary> /// The delim after the word is whitspace. If what follows the whitespace /// is a delim char, then this whitspace is disregarded as the delim, and /// the delim is what follows the whitespace. /// </summary> /// <param name="InBoundedString"></param> /// <param name="InNonWordResults"></param> /// <param name="InOutResults"></param> /// <param name="InTraits"></param> private static void IsolateDelim_WhitespaceFollows( string Text, ScanPatternResults PatternResults, ref WordCursor Results, TextTraits Traits) { Results.WhitespaceFollowsWord = true; ScanPattern nwPat = null; int nwMatchLx = 0; // Look for hard delim after the ws. ScanPatternResults scanResults = Scanner.ScanNotEqual( Text, PatternResults.FoundPos, Text.Length - 1, Traits.WhitespacePatterns); // look that the char after the ws is a nonword. if (scanResults.FoundPos != -1) { var rv = Traits.NonWordPatterns.MatchPatternsAtStringLocation( Text, scanResults.FoundPos, Text.Length - 1); nwPat = rv.Item1; nwMatchLx = rv.Item2; } // the char after the whitespace is a non word (delim) char. if (nwPat != null) { DelimClassification nwdc = nwPat.DelimClassification; // is the delim actually a sep char in a path name. // so the delim is the whitespace. if (Traits.IsPathPartDelim(Text, scanResults.FoundPos)) { ScanWord.IsolateDelim_SetDelimIsWhitespace( Text, Traits, Results, PatternResults.FoundPos); } // is a content open brace char. delim stays as whitespace because // content braces are considered standalone words. else if (nwPat.DelimClassification.IsOpenBraced( )) { ScanWord.IsolateDelim_SetDelimIsWhitespace( Text, Traits, Results, PatternResults.FoundPos); } // is a quote char. the quoted string is considered a word. else if (nwdc == DelimClassification.Quote) { ScanWord.IsolateDelim_SetDelimIsWhitespace( Text, Traits, Results, PatternResults.FoundPos); } // is an actual delim. else { Results.SetDelim( Text, nwPat.PatternValue, scanResults.FoundPos, nwdc); } } // the whitespace char is the delim of record. else { ScanWord.IsolateDelim_SetDelimIsWhitespace( Text, Traits, Results, PatternResults.FoundPos); } }
IsolateQuotedWord( string Text, int Bx, TextTraits Traits) { ScanPatternResults spr = null; int? ex = null; string wordText = null; char ch1 = Text[Bx]; LiteralType litType = LiteralType.none; // is start of a verbatim string literal if ((Traits.VerbatimLiteralPattern != null) && (Traits.VerbatimLiteralPattern.Match(Text, Bx))) { var rv = VerbatimLiteral.ScanCloseQuote( Text, Traits.VerbatimLiteralPattern, Bx); ex = rv.Item1; wordText = rv.Item2; litType = LiteralType.VerbatimString; } // is a quoted literal else if (Traits.IsQuoteChar(ch1) == true) { ex = Scanner.ScanCloseQuote(Text, Bx, Traits.QuoteEncapsulation); if (ex.Value != -1) { int lx = ex.Value - Bx + 1; wordText = Text.Substring(Bx, lx); // correct the following at some point. Should be either string or // char lit. litType = LiteralType.String; } } // not a quoted literal if ((ex == null) || (ex.Value == -1)) { throw (new ApplicationException( "Closing quote not found starting at position " + Bx.ToString() + " in " + Text)); } else { // setup the non word which follows the closing quote. int ix = ex.Value + 1; if (Text.IsPastEnd(ix)) { spr = new ScanPatternResults(-1); } else { // the char that follows the closing quote must be a delim int remLx = Text.Length - ix; spr = Scanner.ScanEqualAny(Text, ix, remLx, Traits.NonWordPatterns); if (spr.FoundPos != ix) { throw new ApplicationException( "invalid char follows close quote at pos " + ix.ToString() + " in " + Stringer.Head(Text, 80)); } } } return(new Tuple <int, int?, string, LiteralType, ScanPatternResults> (Bx, ex, wordText, litType, spr)); }
// -------------------- IsolateWord --------------------------- // We have a word starting at InBx. Scan to the end of the word. // Returns the word in the InOutResults parm. // Returns the word delim in the return argument. private static ScanPatternResults IsolateWord( string Text, int Bx, ref WordCursor Results, TextTraits Traits) { int bx; ScanPatternResults spr = null; bx = Bx; char ch1 = Text[bx]; // is start of either verbatim string literal or quoted literal. if ( ((Traits.VerbatimLiteralPattern != null) && (Traits.VerbatimLiteralPattern.Match(Text, bx))) || (Traits.IsQuoteChar(ch1) == true) ) { var rv = ScanWord.IsolateQuotedWord(Text, bx, Traits); bx = rv.Item1; int? ex = rv.Item2; string wordText = rv.Item3; WordClassification wc = WordClassification.Quoted; var litType = rv.Item4; spr = rv.Item5; Results.SetWord(wordText, wc, bx); Results.Word.LiteralType = litType; } else { // Scan the string for any of the non word patterns spcfd in Traits. DelimClassification sprdc = DelimClassification.None; int remLx = Text.Length - bx; spr = Scanner.ScanEqualAny(Text, bx, remLx, Traits.NonWordPatterns); if (spr.IsNotFound == false) { sprdc = spr.FoundPat.DelimClassification; } // a quote character within the name. this is an error. if (sprdc == DelimClassification.Quote) { throw new ApplicationException( "quote character immed follows name character at position " + spr.FoundPos.ToString() + " in " + Text); } // no delim found. all word to the end of the string. else if (spr.IsNotFound) { string wordText = Text.Substring(Bx); Results.SetWord(wordText, WordClassification.Identifier, Bx); } // found an open named brace char // Open named braced words are words that combine the word and the braced contents. // debateable that this feature is needed and should be retained. else if (sprdc == DelimClassification.OpenNamedBraced) { Scanner.ScanWord_IsolateWord_Braced( Text, bx, spr, ref Results, Traits); } // delim is same position as the word. so either the word is the delim ( an // expression symbol ) or the word is empty ( the delim is a comma, semicolon, // ... a content divider ) else if (spr.FoundPos == Bx) { if ((Traits.NonDividerIsWord == true) && (Traits.IsDividerDelim(spr.FoundPat.DelimClassification) == false)) { Results.SetWord( spr.FoundPat.PatternValue, spr.FoundPat.DelimClassification.ToWordClassification( ).Value, Bx, spr.FoundPat.LeadChar); } // start of CommentToEnd comment. This is a word, not a delim. Find the // end of the comment and set the delim to that end position. else if (sprdc == DelimClassification.CommentToEnd) { spr = Scanner.ScanWord_IsolateWord_CommentToEnd( Text, spr.FoundPos, ref Results, Traits); } else { Results.SetNullWord(); } } // we have a word that ends with a delim. else { int lx = spr.FoundPos - Bx; string wordText = Text.Substring(Bx, lx); Results.SetWord(wordText, WordClassification.Identifier, Bx); } } // return ScanPatternResults of the delim that ends the word. return(spr); }
// ------------------------ ScanNextWord ------------------------- // Scans to the next word in the string. ( a word being the text bounded by the // delimeter and whitespace characters as spcfd in the TextTraits argument ) // Return null when end of string. public static WordCursor ScanNextWord( string Text, TextTraits Traits, WordCursor CurrentWord) { int Bx; WordCursor results = null; ScanPatternResults spr = null; // stay at the current location. return copy of the cursor, but with stayatflag // turned off. if (CurrentWord.StayAtFlag == true) { WordCursor nx = new WordCursor(CurrentWord); nx.StayAtFlag = false; } else { // calc scan start position Bx = ScanWord.CalcStartBx(Text, CurrentWord); // advance past whitespace if ((Bx != -1) && (Bx <= (Text.Length - 1))) { Bx = Scanner.ScanNotEqual( Text, Bx, Text.Length - 1, CurrentWord.TextTraits.WhitespacePatterns).FoundPos; } // got the start of something. scan for the delimeter (could be the current char) spr = null; DelimClassification sprdc = DelimClassification.None; if ((Bx != -1) && (Bx <= (Text.Length - 1))) { spr = ScanWord.IsolateWord(Text, Bx, ref results, CurrentWord.TextTraits); if (spr.IsNotFound == true) { sprdc = DelimClassification.EndOfString; } else { sprdc = spr.FoundPat.DelimClassification; } } if (spr == null) { results.Position = RelativePosition.End; results.SetDelim(Text, null, -1, DelimClassification.EndOfString); } else { // depending on the word, isolate and store the delim that follows. // OpenNamedBraced. delim is the open brace char. if (results.WordClassification == WordClassification.OpenNamedBraced) { ScanPatternResults spr2; int remLx = Text.Length - Bx; spr2 = Scanner.ScanEqualAny( Text, Bx, remLx, CurrentWord.TextTraits.OpenNamedBracedPatterns); results.SetDelim( Text, spr2.FoundPat.PatternValue, spr2.FoundPos, DelimClassification.OpenNamedBraced); } // OpenContentBraced. word and delim are the same. else if (results.WordClassification == WordClassification.OpenContentBraced) { results.SetDelim( Text, results.Word.Value, results.WordBx, DelimClassification.OpenContentBraced); } // word is CommentToEnd. delim is end of line. else if (results.WordClassification == WordClassification.CommentToEnd) { results.SetDelim(Text, spr, sprdc); } // process the NonWordResults returned by "ScanWord_IsolateWord" else { ScanWord.IsolateDelim( Text, spr, ref results, CurrentWord.TextTraits); } } // current word position. if (results.ScanEx == -1) { results.Position = RelativePosition.End; results.SetDelim(Text, null, -1, DelimClassification.EndOfString); } else { results.Position = RelativePosition.At; } } return(results); }