public static ScanPatternResults ScanEqualAny( string InString, ScanPatterns InPatterns) { ScanPatternResults spr = ScanEqualAny(InString, 0, InString.Length, InPatterns); return(spr); }
// ----------------------- ScanWord_IsolatedWord_Braced ----------------------- public static void ScanWord_IsolateWord_Braced( string Text, int WordBx, ScanPatternResults NonWordResults, ref WordCursor Results, TextTraits Traits) { string wordText; int Lx, Ix; int braceIx = NonWordResults.FoundPos; char braceChar = NonWordResults.FoundPat.LeadChar; if (Traits.BracedTreatment == ScannerBracedTreatment.Parts) { // a standalone open brace char. the brace char is the word ( and it will // also be the delim ) if (WordBx == braceIx) { Results.SetWord( NonWordResults.FoundPat.PatternValue, WordClassification.OpenContentBraced, WordBx, braceChar); } else { wordText = Text.Substring(WordBx, braceIx - WordBx); Results.SetWord( wordText, WordClassification.OpenNamedBraced, WordBx, braceChar); } } // the whole braced word. braced word runs all the way to the closing brace. else if (Traits.BracedTreatment == ScannerBracedTreatment.Whole) { int remLx = Text.Length - braceIx; Ix = ScanCloseBrace( Text, braceIx, remLx, Traits.QuoteEncapsulation); if (Ix == -1) { throw new ApplicationException( "Closing brace not found starting at position " + braceIx + " in " + Text); } Lx = Ix - WordBx + 1; wordText = Text.Substring(WordBx, Lx); if (WordBx == braceIx) { Results.SetWord( wordText, WordClassification.ContentBraced, WordBx, braceChar); } else { Results.SetWord( wordText, WordClassification.NamedBraced, WordBx, braceChar); } } }
// ------------------ ScanWord_IsolatedWord_CommentToEnd ------------------- private static ScanPatternResults ScanWord_IsolateWord_CommentToEnd( BoundedString InBoundedString, int InWordBx, ref WordCursor InOutResults, TextTraits InTraits) { string wordText; ScanPatternResults spr = null; // look for end of comment. ( either end of line or end of string ) int fx = Scanner.ScanEqual(InBoundedString, InWordBx, Environment.NewLine).ResultPos; if (fx >= 0) { int Lx = fx - InWordBx; wordText = InBoundedString.Substring(InWordBx, Lx); ScanPattern pat = InTraits.NonWordPatterns.FindPatternAtSubstring(InBoundedString, fx); spr = new ScanPatternResults(fx, pat); } else { wordText = InBoundedString.Substring(InWordBx); spr = new ScanPatternResults(-1); } // store info on the word found in the return WordCursor argument. InOutResults.SetWord(wordText, WordClassification.CommentToEnd, InWordBx); // return value of method contains info on the word delim. return(spr); }
// ------------------ ScanWord_IsolatedWord_CommentToEnd ------------------- public static ScanPatternResults ScanWord_IsolateWord_CommentToEnd( string Text, int WordBx, ref WordCursor Results, TextTraits Traits) { string wordText; ScanPatternResults spr = null; // look for end of comment. ( either end of line or end of string ) int fx = Text.IndexOf(Environment.NewLine, WordBx); if (fx >= 0) { int Lx = fx - WordBx; wordText = Text.Substring(WordBx, Lx); ScanPattern pat = Traits.NonWordPatterns.FindPatternAtSubstring(Text, fx, 2); spr = new ScanPatternResults(fx, pat); } else { wordText = Text.Substring(WordBx); spr = new ScanPatternResults(-1); } // store info on the word found in the return WordCursor argument. Results.SetWord(wordText, WordClassification.CommentToEnd, WordBx); // return value of method contains info on the word delim. return(spr); }
// ----------------------- ScanWord_IsolatedWord_Braced ----------------------- private static void ScanWord_IsolateWord_Braced( BoundedString InBoundedString, int InWordBx, ScanPatternResults InNonWordResults, ref WordCursor InOutResults, TextTraits InTraits) { string wordText; int Lx, Ix; int braceIx = InNonWordResults.FoundPos; char braceChar = InNonWordResults.FoundPat.LeadChar; if (InTraits.BracedTreatment == ScannerBracedTreatment.Parts) { // a standalone open brace char. the brace char is the word ( and it will // also be the delim ) if (InWordBx == braceIx) { InOutResults.SetWord( InNonWordResults.FoundPat.PatternValue, WordClassification.OpenContentBraced, InWordBx, braceChar); } else { wordText = InBoundedString.String.Substring(InWordBx, braceIx - InWordBx); InOutResults.SetWord( wordText, WordClassification.OpenNamedBraced, InWordBx, braceChar); } } // the whole braced word. braced word runs all the way to the closing brace. else if (InTraits.BracedTreatment == ScannerBracedTreatment.Whole) { Ix = ScanCloseBrace( InBoundedString.String, braceIx, InBoundedString.Ex, InTraits.QuoteEncapsulation); if (Ix == -1) { throw new ApplicationException( "Closing brace not found starting at position " + braceIx + " in " + InBoundedString.String); } Lx = Ix - InWordBx + 1; wordText = InBoundedString.String.Substring(InWordBx, Lx); if (InWordBx == braceIx) { InOutResults.SetWord( wordText, WordClassification.ContentBraced, InWordBx, braceChar); } else { InOutResults.SetWord( wordText, WordClassification.NamedBraced, InWordBx, braceChar); } } }
public static ScanPatternResults ScanEqualAny( BoundedString InString, int InIx, ScanPatterns InPatterns) { int lx = InString.Ex - InIx + 1; ScanPatternResults spr = ScanEqualAny(InString.String, InIx, lx, InPatterns); return(spr); }
/// <summary> /// Scan forward in string for any of the pattern strings. /// </summary> /// <param name="InString"></param> /// <param name="InBx"></param> /// <param name="InLx"></param> /// <param name="InPattern"></param> /// <returns></returns> public static ScanPatternResults ScanEqualAnyStrings( string InString, int InBx, int InLx, string[] InPattern) { // build array of pattern leading characters. char[] patChars = Arrayer.StringArrayToLeadingCharArray(InPattern); ScanPatternResults spr = ScanEqualAnyStrings( InString, InBx, InLx, InPattern, patChars); return(spr); }
// ----------------------------------- SetDelim ------------------------------------ public void SetDelim( BoundedString InBoundedString, ScanPatternResults InScanResults, DelimClassification InDelimClass) { if (InScanResults.IsNotFound == true) { mDelim = null; mDelimBx = -1; this.DelimClass = DelimClassification.EndOfString; } else { SetDelim( InBoundedString, InScanResults.FoundPat.PatternValue, InScanResults.FoundPos, InDelimClass); } }
// --------------------------- ScanWord_IsolateDelim_SetDelimIsWhitespace ---------- private static void ScanWord_IsolateDelim_SetDelimIsWhitespace( BoundedString InBoundedString, TextTraits InTraits, WordCursor InOutResults, int InWsIx) { // store the actual string of whitespace characters. ( the whitespace can be // checked later to see if it contains tabs or newlines ) ScanPatternResults spr = ScanNotEqual( InBoundedString.String, InWsIx, InBoundedString.Ex, InTraits.WhitespacePatterns); string delimVlu = spr.ScannedOverString; InOutResults.SetDelim( InBoundedString, delimVlu, InWsIx, DelimClassification.Whitespace); InOutResults.DelimIsWhitespace = true; }
// ----------------------------------- SetDelim ------------------------------------ public void SetDelim( string Text, ScanPatternResults ScanResults, DelimClassification DelimClass) { if (ScanResults.IsNotFound == true) { mDelim = null; mDelimBx = -1; this.DelimClass = DelimClassification.EndOfString; } else { SetDelim( Text, ScanResults.FoundPat.PatternValue, ScanResults.FoundPos, DelimClass); } }
// -------------------- ScanWord_IsolatexWord --------------------------- private static TextLinesWordCursor ScanWord_IsolatexWord( TextLines InLines, TextLinesCursor InBxCsr, TextTraits InTraits) { TextLinesWordCursor tlwc = null; TextLinesCursor csr = null; TextLinesCursor endcsr = null; ScanPatternResults spr = null; csr = new TextLinesCursor(InBxCsr); char ch1 = InBxCsr.CursorChar; // look ahead to see if this word is braced. if (IsOpenQuoteChar(ch1) == false) { spr = ScanEqualAny(csr.LineData, csr.LineOx, csr.LineData.Length - 1, InTraits.NonWordPatterns); } else { spr = new ScanPatternResults(-1); } // the rule is only braced words can span multiple lines. so if the word is // not braced, it can be parsed by the more general purpose IsolatexWord method. if ((IsOpenQuoteChar(ch1) == true) || (InTraits.IsOpenBraceChar(spr.FoundChar.Value) == false)) { ScanBoundedString bs = new ScanBoundedString(csr.LineData); WordCursor wc = new WordCursor(); wc.SetTraits(InTraits); ScanWord_IsolateWord(bs, csr.LineOx, ref wc, InTraits); endcsr = new TextLinesCursor(csr.LinesNode, wc.ScanEx, AcRelativePosition.At); tlwc = new TextLinesWordCursor(wc, csr, endcsr); } else { } return(tlwc); }
// ------------------------ AdvanceNextWord ------------------------- // Scans to the next word in the string. ( a word being the text bounded by the // delimeter and whitespace characters as spcfd in the TextTraits argument ) // Return null when end of string. public static void AdvancexNextWord( string InString, WordCursor InOutWordCursor, TextTraits InTraits) { int Bx; BoundedString boundedString = new BoundedString(InString); ScanPatternResults spr = null; // calc scan start position Bx = ScanWord_CalcStartBx(boundedString, InOutWordCursor); // empty the word parts of the cursor. InOutWordCursor.EmptyWordParts(); // advance past whitespace if (Bx <= boundedString.Ex) { Bx = ScanNotEqual( boundedString.String, Bx, boundedString.Ex, InTraits.WhitespacePatterns).FoundPos; } // got the start of something. scan for the delimeter ( could be the current char ) spr = null; if (Bx <= boundedString.Ex) { spr = ScanWord_IsolateWord(boundedString, Bx, ref InOutWordCursor, InTraits); } // depending on the word, isolate and store the delim that follows. ScanWord_IsolateDelim(boundedString, spr, ref InOutWordCursor, InTraits); // current word position. if (InOutWordCursor.ScanEx == -1) { InOutWordCursor.Position = RelativePosition.End; } else { InOutWordCursor.Position = RelativePosition.At; } }
/// <summary> /// Scan string for any of the pattern strings in ScanPatterns. /// </summary> /// <param name="InString"></param> /// <param name="InBx"></param> /// <param name="InLx"></param> /// <param name="InPatterns"></param> /// <returns></returns> public static ScanPatternResults ScanEqualAny( string InString, int InIx, int InLx, ScanPatterns InPatterns) { ScanPattern pat = null; ScanPatternResults spr = null; int ix = InIx; int ex = InIx + InLx - 1; while (true) { spr = null; int remLx = ex - ix + 1; if (remLx <= 0) { break; } ScanCharResults scr = ScanEqualAny( InString, ix, remLx, InPatterns.LeadChars); if (scr.IsNotFound == true) { spr = new ScanPatternResults(-1); break; } pat = InPatterns.FindPatternAtSubstring(InString, scr.ResultPos, ex); if (pat != null) { spr = new ScanPatternResults(scr.ResultPos, pat); break; } // advance ix to resume scan after the found lead char. ix = scr.ResultPos + 1; } return(spr); }
// -------------------- ScanWord_IsolateDelim --------------------------- private static void ScanWord_IsolateDelim( BoundedString InBoundedString, ScanPatternResults InPatternResults, ref WordCursor InOutResults, TextTraits InTraits) { // did not find a nonword char. must have hit end of string. if (InPatternResults.IsNotFound) { InOutResults.DelimClass = DelimClassification.EndOfString; } // we have a delimiter of some kind. else { DelimClassification sprdc = InPatternResults.FoundPat.DelimClassification; InOutResults.WhitespaceFollowsWord = false; InOutResults.WhitespaceFollowsDelim = false; InOutResults.DelimIsWhitespace = false; // the delim is a hard delim ( not whitespace ) if (sprdc != DelimClassification.Whitespace) { // Want the openContent brace to be processed as a standalone word. Use // virtual whitespace so the word that this open brace is the delim of will // have what appears to be a whitespace delim. Then the following word will // be the standalone open content brace char. if (sprdc == DelimClassification.OpenContentBraced) { InOutResults.SetDelim( InBoundedString, null, InPatternResults.FoundPos, DelimClassification.VirtualWhitespace); } else { // delim is either as classified in the collection of NonWords or is // a PathPart delim. ScanPattern pat = InTraits.GetPathPartDelim( InBoundedString, InPatternResults.FoundPos); if (pat != null) { InOutResults.SetDelim( InBoundedString, pat.PatternValue, InPatternResults.FoundPos, DelimClassification.PathSep); } else { InOutResults.SetDelim( InBoundedString, InPatternResults.FoundPat.PatternValue, InPatternResults.FoundPos, sprdc); } } } // whitespace immed follows the word text else { ScanWord_IsolateDelim_WhitespaceFollows( InBoundedString, InPatternResults, ref InOutResults, InTraits); } } }
// -------------------------------- ScanNotEqual ------------------------ public static ScanPatternResults ScanNotEqual( string InString, int InIx, int InEx, ScanPatterns InScanPatterns) { // step thru the string 1 char at a time. int stringIx = InIx; while (true) { if (stringIx > InEx) { stringIx = -1; break; } char ch1 = InString[stringIx]; // the current char is not equal any of the pattern lead chars. int patIx = Array.IndexOf <char>(InScanPatterns.LeadChars, ch1); if (patIx == -1) { break; } ScanPattern equalPat = null; ScanPattern pat = InScanPatterns.ScanPatternsArray[patIx]; while (pat != null) { bool rv = Stringer.CompareSubstringEqual(InString, stringIx, InEx, pat.PatternValue); if (rv == true) { if (equalPat == null) { equalPat = pat; } // Matching pattern already found, but this pattern also matches and it is // longer. Always return the longer pattern. else if (pat.PatternValue.Length > equalPat.PatternValue.Length) { equalPat = pat; } } pat = pat.NextSameLeadChar; } // check for the substring at the current location in string as not equal any // of the ScanNotEqual pattern strings. if (equalPat == null) { break; } // advance past the whitespace string. stringIx += equalPat.PatternValue.Length; } // return the scan results ScanPatternResults spr = null; if (stringIx == -1) { spr = new ScanPatternResults(-1); } else { spr = new ScanPatternResults(stringIx, InString[stringIx]); } spr.ScannedString = InString; spr.ScanStartIx = InIx; spr.ScanBoundsEx = InEx; return(spr); }
// ------------------------ ScanNextWord ------------------------- // Scans to the next word in the string. ( a word being the text bounded by the // delimeter and whitespace characters as spcfd in the TextTraits argument ) // Return null when end of string. public static WordCursor ScanNextWord( BoundedString BoundedString, WordCursor CurrentWord) { int Bx; WordCursor results = null; ScanPatternResults spr = null; // stay at the current location. if (CurrentWord.StayAtFlag == true) { if ((CurrentWord.Position != RelativePosition.At) && (CurrentWord.Position != RelativePosition.End)) { throw new ApplicationException("cursor not position at location to stay at"); } results = new WordCursor(CurrentWord); results.StayAtFlag = false; } else { results = new WordCursor() .SetString(BoundedString.String) .SetTraits(CurrentWord.TextTraits); results.VirtualCursor = WordCursor.enumVirtualCursor.None; // calc scan start position Bx = ScanWord_CalcStartBx(BoundedString, CurrentWord); // advance past whitespace if ((Bx != -1) && (Bx <= BoundedString.Ex)) { Bx = ScanNotEqual( BoundedString.String, Bx, BoundedString.Ex, CurrentWord.TextTraits.WhitespacePatterns).FoundPos; } // got the start of something. scan for the delimeter (could be the current char) spr = null; DelimClassification sprdc = DelimClassification.None; if ((Bx != -1) && (Bx <= BoundedString.Ex)) { spr = ScanWord_IsolateWord( BoundedString, Bx, ref results, CurrentWord.TextTraits); if (spr.IsNotFound == true) { sprdc = DelimClassification.EndOfString; } else { sprdc = spr.FoundPat.DelimClassification; } } if (spr == null) { results.Position = RelativePosition.End; results.SetDelim(BoundedString, null, -1, DelimClassification.EndOfString); } else { // depending on the word, isolate and store the delim that follows. // OpenNamedBraced. delim is the open brace char. if (results.WordClassification == WordClassification.OpenNamedBraced) { ScanPatternResults spr2; spr2 = ScanEqualAny( BoundedString, Bx, CurrentWord.TextTraits.OpenNamedBracedPatterns); results.SetDelim( BoundedString, spr2.FoundPat.PatternValue, spr2.FoundPos, DelimClassification.OpenNamedBraced); } // OpenContentBraced. word and delim are the same. else if (results.WordClassification == WordClassification.OpenContentBraced) { results.SetDelim( BoundedString, results.Word.Value, results.WordBx, DelimClassification.OpenContentBraced); } // word is CommentToEnd. delim is end of line. else if (results.WordClassification == WordClassification.CommentToEnd) { results.SetDelim(BoundedString, spr, sprdc); } // process the NonWordResults returned by "ScanWord_IsolateWord" else { ScanWord_IsolateDelim( BoundedString, spr, ref results, CurrentWord.TextTraits); } } // current word position. if (results.ScanEx == -1) { results.Position = RelativePosition.End; results.SetDelim(BoundedString, null, -1, DelimClassification.EndOfString); } else { results.Position = RelativePosition.At; } } return(results); }
// -------------------- ScanWord_IsolateWord --------------------------- // We have a word starting at InBx. Scan to the end of the word. // Returns the word in the InOutResults parm. // Returns the word delim in the return argument. private static ScanPatternResults ScanWord_IsolateWord( BoundedString InBoundedString, int InBx, ref WordCursor InOutResults, TextTraits Traits) { int Bx, Ix, Lx; string wordText; ScanPatternResults spr = null; Bx = InBx; char ch1 = InBoundedString.String[Bx]; // is start of a verbatim string literal if ((Traits.VerbatimLiteralPattern != null) && (Traits.VerbatimLiteralPattern.Match(InBoundedString, Bx))) { } // is quoted. the word runs to the closing quote. else if (IsOpenQuoteChar(ch1) == true) { Ix = ScanCloseQuote(InBoundedString.String, Bx, Traits.QuoteEncapsulation); if ((Ix == -1) || (Ix > InBoundedString.Ex)) { throw (new ApplicationException( "Closing quote not found starting at position " + Bx.ToString() + " in " + InBoundedString.String)); } Lx = Ix - Bx + 1; wordText = InBoundedString.String.Substring(Bx, Lx); InOutResults.SetWord(wordText, WordClassification.Quoted, Bx); // setup the non word which follows the closing quote. Bx = Ix + 1; if (InBoundedString.IsOutsideBounds(Bx)) { spr = new ScanPatternResults(-1); } else { // the char that follows the closing quote must be a delim spr = ScanEqualAny(InBoundedString, Bx, Traits.NonWordPatterns); if (spr.FoundPos != Bx) { throw new ApplicationException( "invalid char follows close quote at pos " + Ix.ToString() + " in " + Stringer.Head(InBoundedString.String, 80)); } } } else { // Scan the string for any of the non word patterns spcfd in Traits. DelimClassification sprdc = DelimClassification.None; spr = ScanEqualAny(InBoundedString, Bx, Traits.NonWordPatterns); if (spr.IsNotFound == false) { sprdc = spr.FoundPat.DelimClassification; } // a quote character within the name. this is an error. if (sprdc == DelimClassification.Quote) { throw new ApplicationException( "quote character immed follows name character at position " + spr.FoundPos.ToString() + " in " + InBoundedString.String); } // no delim found. all word to the end of the string. else if (spr.IsNotFound) { wordText = InBoundedString.Substring(Bx); InOutResults.SetWord(wordText, WordClassification.Identifier, InBx); } // found an open named brace char else if (sprdc == DelimClassification.OpenNamedBraced) { ScanWord_IsolateWord_Braced( InBoundedString, Bx, spr, ref InOutResults, Traits); } // delim is same position as the word. so there is no word, only a delim. else if (spr.FoundPos == InBx) { if (Scanner.IsOpenBraced(sprdc)) { InOutResults.SetWord( spr.FoundPat.PatternValue, WordClassification.OpenContentBraced, Bx, spr.FoundPat.LeadChar); } // start of CommentToEnd comment. This is a word, not a delim. Find the // end of the comment and set the delim to that end position. else if (sprdc == DelimClassification.CommentToEnd) { spr = ScanWord_IsolateWord_CommentToEnd( InBoundedString, spr.FoundPos, ref InOutResults, Traits); } else { InOutResults.SetNullWord(); } } // we have a word that ends with a delim. else { Lx = spr.FoundPos - InBx; wordText = InBoundedString.Substring(InBx, Lx); InOutResults.SetWord(wordText, WordClassification.Identifier, InBx); } } // return ScanPatternResults of the delim that ends the word. return(spr); }
/// <summary> /// The delim after the word is whitspace. If what follows the whitespace /// is a delim char, then this whitspace is disregarded as the delim, and /// the delim is what follows the whitespace. /// </summary> /// <param name="InBoundedString"></param> /// <param name="InNonWordResults"></param> /// <param name="InOutResults"></param> /// <param name="InTraits"></param> private static void ScanWord_IsolateDelim_WhitespaceFollows( BoundedString InBoundedString, ScanPatternResults InPatternResults, ref WordCursor InOutResults, TextTraits InTraits) { InOutResults.WhitespaceFollowsWord = true; ScanPattern nwPat = null; // Look for hard delim after the ws. ScanPatternResults scanResults = ScanNotEqual( InBoundedString.String, InPatternResults.FoundPos, InBoundedString.Ex, InTraits.WhitespacePatterns); // look that the char after the ws is a nonword. if (scanResults.FoundPos != -1) { nwPat = InTraits.NonWordPatterns.FindPatternAtSubstring( InBoundedString, scanResults.FoundPos); } // the char after the whitespace is a non word (delim) char. if (nwPat != null) { DelimClassification nwdc = nwPat.DelimClassification; // is the delim actually a sep char in a path name. // so the delim is the whitespace. if (InTraits.IsPathPartDelim(InBoundedString, scanResults.FoundPos)) { ScanWord_IsolateDelim_SetDelimIsWhitespace( InBoundedString, InTraits, InOutResults, InPatternResults.FoundPos); } // is a content open brace char. delim stays as whitespace because // content braces are considered standalone words. else if (nwPat.DelimClassification.IsOpenBraced( )) { ScanWord_IsolateDelim_SetDelimIsWhitespace( InBoundedString, InTraits, InOutResults, InPatternResults.FoundPos); } // is a quote char. the quoted string is considered a word. else if (nwdc == DelimClassification.Quote) { ScanWord_IsolateDelim_SetDelimIsWhitespace( InBoundedString, InTraits, InOutResults, InPatternResults.FoundPos); } // is an actual delim. else { InOutResults.SetDelim( InBoundedString, nwPat.PatternValue, scanResults.FoundPos, nwdc); } } // the whitespace char is the delim of record. else { ScanWord_IsolateDelim_SetDelimIsWhitespace( InBoundedString, InTraits, InOutResults, InPatternResults.FoundPos); } }
XmlUnit CrackUnits_CrackNext(XmlUnit InCurUnit) { int ix; int vluBx; XmlUnit unit = null; // calc scan start point if (InCurUnit == null) { ix = 0; } else if (InCurUnit.UnitCode == XmlUnitCode.None) { ix = 0; } else { ix = InCurUnit.Bx + InCurUnit.Lx; } // advance past whitespace vluBx = ix; int streamEx = mStream.Length - 1; ScanPatternResults res = Scanner.ScanNotEqual( mStream, ix, streamEx, mTraits.WhitespacePatterns); ix = res.FoundPos; // end of stream. if (ix == -1) { unit = null; } // some sort of open or close unit. else if (res.FoundChar.Value == '<') { Nullable <char> nxChar = Scanner.ScanNotEqual( mStream, ix + 1, streamEx, mTraits.WhitespacePatterns).FoundChar; // starting an xml close unit </name> if ((nxChar != null) && (nxChar.Value == '/')) { unit = CrackUnits_ScanCloseUnit(res.FoundPos); } // a document infrastructure unit <? ...... ?> else if ((nxChar != null) && (nxChar.Value == '?')) { unit = CrackUnits_ScanInfrastructureUnit(res.FoundPos); } // starting an xml open unit <name xxxxxx> else { unit = CrackUnits_ScanOpenUnit(res.FoundPos); } } // a value unit is the xml text between the open and close unit. else { unit = ScanValueUnit(vluBx); } return(unit); }