public ScanPattern() { mPatternValue = null; mNextSameLeadChar = null; mLeadChar = '\0'; this.DelimClassification = Text.Enums.DelimClassification.None; }
public ScanPattern(ScanPattern Pattern) : this() { this.ReplacementValue = Pattern.ReplacementValue; this.PatternValue = Pattern.PatternValue; this.DelimClassification = Pattern.DelimClassification; this.OverlapClassification = Pattern.OverlapClassification; }
public void AddDistinct(ScanPattern Pattern, DelimClassification DelimClass) { int px = base.AddDistinct(Pattern.PatternValue); if (px != -1) { mWip.Add(DelimClass); } }
public ScanWordCursor( TextWord Word, TextLocation WordBx, TextLocation DelimBx, ScanPattern DelimPattern) : this() { this.Word = Word; this.WordBx = WordBx; this.DelimBx = DelimBx; this.DelimPattern = DelimPattern; }
public ScanAtomCursor( AtomText AtomText, ScanPattern AtomPattern, RelativePosition Position = RelativePosition.At) : this() { this.AtomText = AtomText; this.AtomPattern = AtomPattern; this.Position = Position; }
public ScanPatternResults(int InFoundPos, string InFoundPattern, int InAnyPatternIx) { mScanStartIx = -1; mScannedString = null; mScanBoundsEx = -1; mFoundPos = InFoundPos; mFoundChar = InFoundPattern[0]; mFoundPattern = InFoundPattern; mAnyPatternIx = InAnyPatternIx; mFoundPat = null; }
public ScanPatternResults(int InFoundPos, char InFoundChar) { mScanStartIx = -1; mScannedString = null; mScanBoundsEx = -1; mFoundPos = InFoundPos; mFoundChar = InFoundChar; mFoundPattern = null; mAnyPatternIx = -1; mFoundPat = null; }
int mAnyPatternIx; // location of the found pattern in the array of any pattern // to scan for. public ScanPatternResults(int InNotFoundIx) { mScanStartIx = -1; mScannedString = null; mScanBoundsEx = -1; mFoundPos = InNotFoundIx; mFoundChar = null; mFoundPattern = null; mAnyPatternIx = -1; mFoundPat = null; }
public ScanPatternResults(int InFoundPos, ScanPattern InFoundPattern) { mScanStartIx = -1; mScannedString = null; mScanBoundsEx = -1; mFoundPos = InFoundPos; mFoundChar = InFoundPattern.LeadChar; mFoundPattern = InFoundPattern.PatternValue; mAnyPatternIx = InFoundPattern.ArrayPosition; mFoundPat = InFoundPattern; }
IsolateNumericLiteral( ScanStream ScanStream, TextTraits Traits, int Bx) { // for now, all numeric literals are simple integers. // have to expand to determine if a float, decimal, what the sign is, // what the precision is. LiteralType litType = LiteralType.Integer; string litText = null; ScanPattern foundPat = null; int foundIx = -1; // step from char to char. Look for a char that is not part of the // numeric literal. int ix = Bx; int litEx = Bx; while (true) { if (ix >= ScanStream.Stream.Length) { break; } char ch1 = ScanStream.Stream[ix]; if (Char.IsDigit(ch1) == false) { break; } litEx = ix; ix += 1; } // isolate the numeric literal. int lx = litEx - Bx + 1; litText = ScanStream.Substring(Bx, lx); // isolate the delim that follows that numeric literal. int bx = litEx + 1; if (bx < ScanStream.Stream.Length) { var rv = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.DelimPatterns); foundPat = rv.Item1; foundIx = rv.Item2; } return(new Tuple <LiteralType, string, ScanPattern, int>( litType, litText, foundPat, foundIx)); }
public bool IsOpenBraced(ScanPattern InPattern) { DelimClassification dc = GetDelimClass(InPattern); if ((dc == DelimClassification.OpenContentBraced) || (dc == DelimClassification.OpenNamedBraced)) { return(true); } else { return(false); } }
IsolateQuotedWord( ScanStream ScanStream, TextTraits Traits, int Bx) { LiteralType litType = LiteralType.none; string litText = null; char ch1 = ScanStream.Stream[Bx]; ScanPattern foundPat = null; int foundIx = -1; int quoteEx = -1; // is start of a verbatim string literal if ((Traits.VerbatimLiteralPattern != null) && (Traits.VerbatimLiteralPattern.Match(ScanStream.Stream, Bx))) { var rv = VerbatimLiteral.ScanCloseQuote( ScanStream.Stream, Traits.VerbatimLiteralPattern, Bx); quoteEx = rv.Item1; litText = rv.Item2; litType = LiteralType.VerbatimString; } // is a quoted literal else if (Traits.IsQuoteChar(ch1) == true) { quoteEx = Scanner.ScanCloseQuote(ScanStream.Stream, Bx, Traits.QuoteEncapsulation); if (quoteEx != -1) { int lx = quoteEx - Bx + 1; litText = ScanStream.Substring(Bx, lx); // correct the following at some point. Should be either string or // char lit. litType = LiteralType.String; } } // isolate the delim that follows that quoted word. { int bx = quoteEx + 1; var rv = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.DelimPatterns); foundPat = rv.Item1; foundIx = rv.Item2; } return(new Tuple <LiteralType, string, ScanPattern, int>( litType, litText, foundPat, foundIx)); }
public override bool Equals(object obj) { ScanPattern compareTo = obj as ScanPattern; if ((compareTo.DelimClassification == this.DelimClassification) && (compareTo.PatternValue == this.PatternValue) && (compareTo.UserCode == this.UserCode)) { return(true); } else { return(false); } }
public static bool IsPatternStartChar(this ScanPattern Pattern, char Char) { bool rv = false; if (Pattern == null) { rv = false; } else if (Char == Pattern.LeadChar) { rv = true; } else { rv = false; } return(rv); }
public void AddFound(ScanPattern Pattern, int Pos, int Lgth) { this.Position = Pos; if (FoundPattern == null) { this.FoundPattern = new MatchScanPattern(Pattern, Pos, Lgth); } else { if (FoundPatterns == null) { FoundPatterns = new MatchScanPatternList(); FoundPatterns.Add(this.FoundPattern); } { MatchScanPattern matPat = new MatchScanPattern(Pattern, Pos, Lgth); FoundPatterns.Add(matPat); } } }
public void AddFound(ScanPattern Pattern, int Pos, int Lgth) { if (FoundPattern == null) { this.FoundPattern = Pattern; this.FoundPos = Pos; this.MatchLgth = Lgth; } else { if (FoundPatterns == null) { FoundPatterns = new List <MatchScanPattern>(); MatchScanPattern matPat = new MatchScanPattern(FoundPattern, MatchLgth); FoundPatterns.Add(matPat); } { MatchScanPattern matPat = new MatchScanPattern(Pattern, Lgth); FoundPatterns.Add(matPat); } } }
public ScanPattern(string PatternValue, DelimClassification DelimClassification) { this.PatternValue = PatternValue; mNextSameLeadChar = null; this.DelimClassification = DelimClassification; }
// ------------------------ ScanNextWord ------------------------- // Scans to the next word in the string. ( a word being the text bounded by the // delimeter and whitespace characters as spcfd in the TextTraits argument ) // Return null when end of string. public static ScanWordCursor ScanNextWord( ScanStream ScanStream, TextTraits Traits, ScanWordCursor CurrentWord) { // components of the next word. TextWord wordPart = null; TextLocation wordBx = null; ScanPattern nonWordPat = null; TextLocation nonWordLoc = null; int nonWordIx = -1; // stay at the current location. return copy of the cursor, but with stayatflag // turned off. if (CurrentWord.StayAtFlag == true) { nonWordPat = CurrentWord.DelimPattern; nonWordLoc = CurrentWord.DelimBx; wordPart = CurrentWord.Word; wordBx = CurrentWord.WordBx; } else { #region STEP1 setup the begin pos of the next word. // ----------------------------- STEP 1 ------------------------------ // setup the begin pos of the next word. int bx; { // calc scan start position bx = ScanWord.CalcScanNextStart(ScanStream, Traits, CurrentWord); // advance past whitespace if (bx != -1) { bx = Scanner.ScanNotEqual(ScanStream.Stream, Traits.WhitespacePatterns, bx); } } // end STEP 1. #endregion #region STEP 2. Isolate either numeric lib, quoted lit or scan to non word pattern // ------------------------------- STEP 2 ---------------------------------- // Isolate either numeric literal, quoted literal or scan to the next non word // pattern. LiteralType?litType = null; string litText = null; { // got a decimal digit. isolate the numeric literal string. if ((bx != -1) && (Char.IsDigit(ScanStream.Stream[bx]) == true)) { var rv = ScanWord.IsolateNumericLiteral(ScanStream, Traits, bx); litType = rv.Item1; litText = rv.Item2; nonWordPat = rv.Item3; // the non word pattern immed after numeric literal nonWordIx = rv.Item4; // pos of foundPat } // got something. now scan forward for the pattern that delimits the word. else if (bx != -1) { { var rv = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.DelimPatterns); nonWordPat = rv.Item1; nonWordIx = rv.Item2; } // got a quote char. Isolate the quoted string, then find the delim that follows // the quoted string. if ((nonWordPat != null) && (nonWordPat.DelimClassification == DelimClassification.Quote) && (nonWordIx == bx)) { var rv = IsolateQuotedWord(ScanStream, Traits, nonWordIx); litType = rv.Item1; litText = rv.Item2; nonWordPat = rv.Item3; // the non word pattern immed after quoted literal nonWordIx = rv.Item4; // pos of foundPat. } } } // end STEP 2. #endregion #region STEP 3 - setup wordBx and wordPart with the found word. { // got nothing. if (bx == -1) { } // no delim found. word text all the way to the end. else if (nonWordIx == -1) { var rv = ScanWord.IsolateWordText( ScanStream, Traits, litType, litText, bx, null); wordBx = rv.Item1; wordPart = rv.Item2; #if skip wordBx = new StreamLocation(bx).ToTextLocation(ScanStream); if (litType != null) { wordPart = new TextWord(litText, WordClassification.Quoted, Traits); } else { wordPart = new TextWord( ScanStream.Substring(bx), WordClassification.Identifier, Traits); } #endif } // got a word and a non word pattern. else if (nonWordIx > bx) { var rv = ScanWord.IsolateWordText( ScanStream, Traits, litType, litText, bx, nonWordIx); wordBx = rv.Item1; wordPart = rv.Item2; #if skip wordBx = new StreamLocation(bx).ToTextLocation(ScanStream); int lx = foundIx - bx; wordPart = new TextWord( ScanStream.Substring(bx, lx), WordClassification.Identifier, Traits); #endif nonWordLoc = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); } // no word. just delim. else { nonWordLoc = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); // the delim is comment to end. store as a word. if (nonWordPat.DelimClassification == DelimClassification.CommentToEnd) { var rv = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.NewLinePatterns); var eolPat = rv.Item1; var eolIx = rv.Item2; if (eolPat == null) { wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); wordPart = new TextWord( ScanStream.Substring(nonWordIx), WordClassification.CommentToEnd, Traits); nonWordLoc = null; nonWordPat = null; } else { wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); int lx = eolIx - nonWordIx; var sloc = wordBx.ToStreamLocation(ScanStream); wordPart = new TextWord( ScanStream.Substring(sloc.Value, lx), WordClassification.CommentToEnd, Traits); nonWordLoc = new StreamLocation(eolIx).ToTextLocation(ScanStream); nonWordPat = eolPat; } } // if the delim pattern is not non word ( a divider ), store the pattern also // as the word. else if (Traits.DelimPatternsThatAreNonWords.Contains(nonWordPat) == false) { wordBx = nonWordLoc; wordPart = new TextWord( nonWordPat.PatternValue, nonWordPat.DelimClassification.ToWordClassification().Value, Traits); } } } #endregion // delim is whitespace. scan ahead for something more meaningful than whitespace. if ((nonWordPat != null) && (Traits.IsWhitespace(nonWordPat))) { StreamLocation dx = nonWordLoc.ToStreamLocation(ScanStream); int fx = Scanner.ScanNotEqual( ScanStream.Stream, Traits.WhitespacePatterns, dx.Value + nonWordPat.Length); var pat = Traits.DelimPatterns.MatchAt(ScanStream.Stream, fx); if (pat != null) { nonWordLoc = new StreamLocation(fx).ToTextLocation(ScanStream); nonWordPat = pat; } } } // store the results in the return cursor. ScanWordCursor nx = null; if ((wordPart == null) && (nonWordPat == null)) { nx = new ScanWordCursor( ); nx.Position = RelativePosition.End; } else { nx = new ScanWordCursor(wordPart, wordBx, nonWordLoc, nonWordPat); nx.Position = RelativePosition.At; } return(nx); }
public MatchScanPattern(ScanPattern MatchPattern, int Pos, int MatchLength) { this.MatchPattern = MatchPattern; this.Pos = Pos; this.MatchLength = MatchLength; }
/// <summary> /// Add the ScanPattern to the list of ScanPattern. /// That is, add to the list if another ScanPattern with the same delim class does /// not exist in the list. /// If a ScanPattern with the same delim class does exist, replace that ScanPattern /// with this one if the match length exceeds the pattern in the list. /// As an example, the ** pattern is found. It could be the start of a comment or the /// dereference of a pointer to a pointer. Also, a * could be a mult symbol, a /// dereference of a pointer, or the start of a special value. /// The ** that is a unary operator would replace the * unary operator in the list. /// The other entries in the list, the ones for delim class comment begin and /// special value starter would stay. /// </summary> /// <param name="Pat"></param> /// <param name="MatchLx"></param> public void Add(ScanPattern Pat, int Pos, int MatchLx) { MatchScanPattern matpat = new MatchScanPattern(Pat, Pos, MatchLx); Add(matpat); }
// -------------------- IsolateDelim --------------------------- private static void IsolateDelim( string Text, ScanPatternResults PatternResults, ref WordCursor Results, TextTraits Traits) { // did not find a nonword char. must have hit end of string. if (PatternResults.IsNotFound) { Results.DelimClass = DelimClassification.EndOfString; } // we have a delimiter of some kind. else { DelimClassification sprdc = PatternResults.FoundPat.DelimClassification; // delim is whitespace of some sort. Continue to look ahead for a non // whitespace pattern. if (Traits.IsWhitespaceDelimClass(sprdc) == true) { int bx = PatternResults.FoundPos; var spr = Scanner.ScanNotEqual( Text, bx, Text.Length - 1, Traits.WhitespacePatterns); if (spr.FoundPat != null) { } } Results.WhitespaceFollowsWord = false; Results.WhitespaceFollowsDelim = false; Results.DelimIsWhitespace = false; // the delim is a hard delim ( not whitespace ) if (sprdc != DelimClassification.Whitespace) { // Want the openContent brace to be processed as a standalone word. Use // virtual whitespace so the word that this open brace is the delim of will // have what appears to be a whitespace delim. Then the following word will // be the standalone open content brace char. if ((sprdc == DelimClassification.OpenContentBraced) && (Traits.VirtualWhitespace == true)) { Results.SetDelim( Text, null, PatternResults.FoundPos, DelimClassification.VirtualWhitespace); } else { // delim is either as classified in the collection of NonWords or is // a PathPart delim. ScanPattern pat = Traits.GetPathPartDelim( Text, PatternResults.FoundPos); if (pat != null) { Results.SetDelim( Text, pat.PatternValue, PatternResults.FoundPos, DelimClassification.PathSep); } else { Results.SetDelim( Text, PatternResults.FoundPat.PatternValue, PatternResults.FoundPos, sprdc); } } } // whitespace immed follows the word text else { ScanWord.IsolateDelim_WhitespaceFollows( Text, PatternResults, ref Results, Traits); } } }
/// <summary> /// The delim after the word is whitspace. If what follows the whitespace /// is a delim char, then this whitspace is disregarded as the delim, and /// the delim is what follows the whitespace. /// </summary> /// <param name="InBoundedString"></param> /// <param name="InNonWordResults"></param> /// <param name="InOutResults"></param> /// <param name="InTraits"></param> private static void IsolateDelim_WhitespaceFollows( string Text, ScanPatternResults PatternResults, ref WordCursor Results, TextTraits Traits) { Results.WhitespaceFollowsWord = true; ScanPattern nwPat = null; int nwMatchLx = 0; // Look for hard delim after the ws. ScanPatternResults scanResults = Scanner.ScanNotEqual( Text, PatternResults.FoundPos, Text.Length - 1, Traits.WhitespacePatterns); // look that the char after the ws is a nonword. if (scanResults.FoundPos != -1) { var rv = Traits.NonWordPatterns.MatchPatternsAtStringLocation( Text, scanResults.FoundPos, Text.Length - 1); nwPat = rv.Item1; nwMatchLx = rv.Item2; } // the char after the whitespace is a non word (delim) char. if (nwPat != null) { DelimClassification nwdc = nwPat.DelimClassification; // is the delim actually a sep char in a path name. // so the delim is the whitespace. if (Traits.IsPathPartDelim(Text, scanResults.FoundPos)) { ScanWord.IsolateDelim_SetDelimIsWhitespace( Text, Traits, Results, PatternResults.FoundPos); } // is a content open brace char. delim stays as whitespace because // content braces are considered standalone words. else if (nwPat.DelimClassification.IsOpenBraced( )) { ScanWord.IsolateDelim_SetDelimIsWhitespace( Text, Traits, Results, PatternResults.FoundPos); } // is a quote char. the quoted string is considered a word. else if (nwdc == DelimClassification.Quote) { ScanWord.IsolateDelim_SetDelimIsWhitespace( Text, Traits, Results, PatternResults.FoundPos); } // is an actual delim. else { Results.SetDelim( Text, nwPat.PatternValue, scanResults.FoundPos, nwdc); } } // the whitespace char is the delim of record. else { ScanWord.IsolateDelim_SetDelimIsWhitespace( Text, Traits, Results, PatternResults.FoundPos); } }
public FoundNonWord(ScanPattern Pattern, int Pos, int Lgth) { this.AddFound(Pattern, Pos, Lgth); }
public virtual ScanPattern Duplicate() { ScanPattern dup = new ScanPattern(this); return(dup); }
// ------------------------ ScanNextAtom ------------------------- // Scans to the next atom in the string. ( a word being the text bounded by the // delimeter and whitespace characters as spcfd in the TextTraits argument ) // Return null when end of string. public static ScanAtomCursor ScanNextAtom( ScanStream ScanStream, TextTraits Traits, ScanAtomCursor CurrentWord) { // components of the next word. TextLocation wordBx = null; int nonWordIx = -1; int nonWordLx = 0; ScanPattern nonWordPat = null; List <MatchScanPattern> nonWordPatList = null; AtomText atomText = null; List <MatchScanPattern> atomTextList = null; AtomText whitespaceText = null; // ScanAtomCode? priorCode = null; bool?priorCodeIsWhitespaceSignificant = null; // stay at the current location. return copy of the cursor, but with stayatflag // turned off. if (CurrentWord.StayAtFlag == true) { atomText = CurrentWord.AtomText; nonWordPat = CurrentWord.AtomPattern; wordBx = CurrentWord.StartLoc; } else { #region STEP1 setup the begin pos of the next word. // ----------------------------- STEP 1 ------------------------------ // setup the begin pos of the next word. int bx; { // save the ScanAtomCode of the prior word. if ((CurrentWord.Position == RelativePosition.At) || (CurrentWord.Position == RelativePosition.After)) { priorCodeIsWhitespaceSignificant = CurrentWord.WhitespaceIsSignificant; // priorCode = CurrentWord.AtomText.AtomCode; } // calc scan start position bx = ScanAtom.CalcScanNextStart(ScanStream, Traits, CurrentWord); // advance past whitespace if (bx != -1) { int saveBx = bx; bx = Scanner.ScanNotEqual(ScanStream.Stream, Traits.WhitespacePatterns, bx); // there is some whitespace. depending on what preceeds and follows, may // return this as the atom. if ((priorCodeIsWhitespaceSignificant != null) && (priorCodeIsWhitespaceSignificant.Value == true)) { if (bx != saveBx) { int whitespaceEx = -1; if (bx == -1) { whitespaceEx = ScanStream.Stream.Length - 1; } else { whitespaceEx = bx - 1; } int whitespaceLx = whitespaceEx - saveBx + 1; whitespaceText = new AtomText( ScanAtomCode.Whitespace, ScanStream.Stream.Substring(saveBx, whitespaceLx), " ", new StreamLocation(saveBx).ToTextLocation(ScanStream), new StreamLocation(whitespaceEx).ToTextLocation(ScanStream)); } } } } // end STEP 1. #endregion #region STEP 2. Isolate either numeric lib, quoted lit or scan to non word pattern // ------------------------------- STEP 2 ---------------------------------- // Isolate either numeric literal, quoted literal or scan to the next non word // pattern. LiteralType?litType = null; string litText = null; { // got a decimal digit. isolate the numeric literal string. if ((bx != -1) && (Char.IsDigit(ScanStream.Stream[bx]) == true)) { var rv = Scanner.IsolateNumericLiteral(ScanStream, Traits, bx); litType = rv.Item1; litText = rv.Item2; nonWordPat = rv.Item3; // the non word pattern immed after numeric literal nonWordIx = rv.Item4; // pos of foundPat } // got something. now scan forward for the pattern that delimits the word. else if (bx != -1) { { var rv = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.DelimPatterns); nonWordPat = rv.Item1; nonWordIx = rv.Item2; nonWordLx = rv.Item3; nonWordPatList = rv.Item4; } // got a quote char. Isolate the quoted string, then find the delim that follows // the quoted string. if ((nonWordPat != null) && (nonWordPat.DelimClassification == DelimClassification.Quote) && (nonWordIx == bx)) { var rv = Scanner.IsolateQuotedWord(ScanStream, Traits, nonWordIx); litType = rv.Item1; litText = rv.Item2; nonWordPat = rv.Item3; // the non word pattern immed after quoted literal nonWordIx = rv.Item4; // pos of foundPat. } } } // end STEP 2. #endregion #region STEP 3 - setup wordBx and wordPart with the found word. { // got nothing. if (bx == -1) { } // no delim found. word text all the way to the end. else if (nonWordIx == -1) { if (whitespaceText != null) { atomText = whitespaceText; nonWordPat = null; nonWordPatList = null; } else { var rv = Scanner.IsolateWordText( ScanStream, Traits, litType, litText, bx, null); atomText = rv.Item3; wordBx = atomText.StartLoc; } } // got a word and a non word pattern. else if (nonWordIx > bx) { if (whitespaceText != null) { atomText = whitespaceText; nonWordPat = null; nonWordPatList = null; } else { var rv = Scanner.IsolateWordText( ScanStream, Traits, litType, litText, bx, nonWordIx); atomText = rv.Item3; wordBx = atomText.StartLoc; } } // no word. just delim. else { // the delim is comment to end. store as a word. if (nonWordPat.DelimClassification == DelimClassification.CommentToEnd) { var rv = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.NewLinePatterns); var eolPat = rv.Item1; var eolIx = rv.Item2; if (eolPat == null) { int ex = ScanStream.Stream.Length - 1; wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); TextLocation wordEx = new StreamLocation(ex).ToTextLocation(ScanStream); string commentText = ScanStream.Substring(nonWordIx); atomText = new AtomText( ScanAtomCode.CommentToEnd, commentText, null, wordBx, wordEx); nonWordPat = null; nonWordPatList = null; } else { wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); int lx = eolIx - nonWordIx; TextLocation wordEx = new StreamLocation(nonWordIx + lx - 1).ToTextLocation(ScanStream); string commentText = ScanStream.Substring(nonWordIx, lx); atomText = new AtomText( ScanAtomCode.CommentToEnd, commentText, null, wordBx, wordEx); var sloc = wordBx.ToStreamLocation(ScanStream); nonWordPat = eolPat; nonWordPatList = null; } } // the word found is a non word or keyword pattern. else { // got whitespace followed by keyword. Return the whitespace. if ((nonWordPat.DelimClassification == DelimClassification.Keyword) && (whitespaceText != null)) { atomText = whitespaceText; nonWordPat = null; nonWordPatList = null; } // there are more than one scan patterns that match. else if (nonWordPatList != null) { atomTextList = new List <MatchScanPattern>(); foreach (var pat in nonWordPatList) { wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); int lx = pat.MatchLength; TextLocation wordEx = new StreamLocation(nonWordIx + lx - 1).ToTextLocation(ScanStream); string scanText = ScanStream.Stream.Substring(nonWordIx, lx); atomText = new AtomText( pat.MatchPattern.DelimClassification.ToScanAtomCode().Value, scanText, pat.MatchPattern.ReplacementValue, wordBx, wordEx); pat.AtomText = atomText; atomTextList.Add(pat); } } else { wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); int lx = nonWordLx; TextLocation wordEx = new StreamLocation(nonWordIx + lx - 1).ToTextLocation(ScanStream); string scanText = ScanStream.Stream.Substring(nonWordIx, lx); atomText = new AtomText( nonWordPat.DelimClassification.ToScanAtomCode().Value, scanText, nonWordPat.ReplacementValue, wordBx, wordEx); } } } } #endregion } // store the results in the return cursor. ScanAtomCursor nx = null; if (atomText == null) { nx = new ScanAtomCursor( ); nx.Position = RelativePosition.End; } else if (atomTextList != null) { nx = new ScanAtomCursor(atomTextList); } else { nx = new ScanAtomCursor(atomText, nonWordPat); nx.Position = RelativePosition.At; } return(nx); }
public PatternScanResults(ScanPattern Pattern, int Pos, int Lgth) { this.AddFound(Pattern, Pos, Lgth); }
public DelimClassification GetDelimClass(ScanPattern InPattern) { DelimClassification dc = Delims[InPattern.ArrayPosition]; return(dc); }
ClassifyAsComment(ScanStream ScanStream, TextTraits Traits, int Bx) { TextLocation wordBx = null; AtomText atomText = null; ScanPattern nonWordPat = null; PatternScanResults nonWord = null; // look prior to see if this comment to the end of the line is the first non // blank on the line. bool isFirstNonBlankOnLine = false; if (Bx == 0) { isFirstNonBlankOnLine = true; } else { // go back to the first non blank. int ix = Scanner.ScanReverseNotEqual( ScanStream.Stream, Bx - 1, Traits.WhitespaceWithoutNewLinePatterns); if (ix == -1) // nothing but blanks to start of string. { isFirstNonBlankOnLine = true; } else { var rv = Traits.NewLinePatterns.MatchFirstPatternEndsAtStringLocation( ScanStream.Stream, ix); var pat = rv.Item1; var patBx = rv.Item2; // is a new line pattern. there is nothing but spaces between this new line // and the start of the comment. if (pat != null) { isFirstNonBlankOnLine = true; } } } // set the atomCode of this atom depending on if the comment starts the line. ScanAtomCode atomCode = ScanAtomCode.CommentToEnd; if (isFirstNonBlankOnLine == true) { atomCode = ScanAtomCode.EntireLineCommentToEnd; } // scan for a new line. That is the end of the comment. { nonWord = Scanner.ScanEqualAny(ScanStream.Stream, Bx, Traits.NewLinePatterns); // eolPat = rv.Item1; // eolIx = rv.Item2; // nonWord = rv.Item3; } // no newline pattern found. Comment to the end of the text stream. if (nonWord.IsEmpty == true) // if (eolPat == null) { int ex = ScanStream.Stream.Length - 1; wordBx = new StreamLocation(Bx).ToTextLocation(ScanStream); TextLocation wordEx = new StreamLocation(ex).ToTextLocation(ScanStream); string commentText = ScanStream.Substring(Bx); string userCode = null; atomText = new AtomText( atomCode, commentText, null, wordBx, wordEx, userCode); // nonWordPat = eolPat; } else { wordBx = new StreamLocation(Bx).ToTextLocation(ScanStream); int lx = nonWord.Position - Bx; // int lx = eolIx - Bx; TextLocation wordEx = new StreamLocation(Bx + lx - 1).ToTextLocation(ScanStream); string commentText = ScanStream.Substring(Bx, lx); string userCode = null; atomText = new AtomText( atomCode, commentText, null, wordBx, wordEx, userCode); var sloc = wordBx.ToStreamLocation(ScanStream); // nonWordPat = eolPat; } return(new Tuple <TextLocation, AtomText, ScanPattern, PatternScanResults> (wordBx, atomText, nonWordPat, nonWord)); }
// ------------------------ ScanNextAtom ------------------------- // Scans to the next atom in the string. ( a word being the text bounded by the // delimiter and whitespace characters as spcfd in the TextTraits argument ) // Return null when end of string. public static ScanAtomCursor OrigScanNextAtom( ScanStream ScanStream, TextTraits Traits, ScanAtomCursor CurrentWord) { // components of the next word. TextLocation wordBx = null; int nonWordIx = -1; int nonWordLx = 0; ScanPattern nonWordPat = null; PatternScanResults nonWord = null; AtomText atomText = null; List <MatchScanPattern> atomTextList = null; AtomText whitespaceText = null; ScanAtomCode?tokenCode = null; // ScanAtomCode of this token. int? tokenLx = null; ScanAtomCode?priorTokenCode = null; // ScanAtomCode? priorCode = null; bool?priorCodeIsWhitespaceSignificant = null; // stay at the current location. return copy of the cursor, but with stayatflag // turned off. if (CurrentWord.StayAtFlag == true) { atomText = CurrentWord.AtomText; tokenCode = atomText.AtomCode; priorTokenCode = null; nonWordPat = CurrentWord.AtomPattern; wordBx = CurrentWord.StartLoc; } else { #region STEP1 setup the begin pos of the next word. // ----------------------------- STEP 1 ------------------------------ // setup the begin pos of the next word. int bx; { // save the ScanAtomCode of the prior word. if ((CurrentWord.Position == RelativePosition.At) || (CurrentWord.Position == RelativePosition.After)) { priorTokenCode = CurrentWord.AtomCode; priorCodeIsWhitespaceSignificant = CurrentWord.WhitespaceIsSignificant; } // calc scan start position bx = ScanAtom.CalcScanNextStart(ScanStream, Traits, CurrentWord); // advance past whitespace if (bx != -1) { int saveBx = bx; bx = Scanner.ScanNotEqual(ScanStream.Stream, Traits.WhitespacePatterns, bx); // there is some whitespace. Isolate it as AtomText. // This method will return the whitespace as the token. But need to look at // the token before and after to classify the whitespace as significant or // not. ( whitespace between identifiers or keywords is significant. // Whitespace between symbols is not significant. // note: even insignificant whitespace is returned as a token because the // whitespace is needed when redisplaying the statement text. if (bx != saveBx) { int whitespaceEx = -1; if (bx == -1) { whitespaceEx = ScanStream.Stream.Length - 1; } else { whitespaceEx = bx - 1; } int whitespaceLx = whitespaceEx - saveBx + 1; string userCode = null; whitespaceText = new AtomText( ScanAtomCode.Whitespace, ScanStream.Stream.Substring(saveBx, whitespaceLx), " ", new StreamLocation(saveBx).ToTextLocation(ScanStream), new StreamLocation(whitespaceEx).ToTextLocation(ScanStream), userCode); } } } // end STEP 1. #endregion #region STEP 2. Isolate either numeric lit, quoted lit or identifier/keyword. // ------------------------------- STEP 2 ---------------------------------- // Isolate either numeric literal, quoted literal or scan to the next non word // pattern. LiteralType?litType = null; string litText = null; { // got a decimal digit. isolate the numeric literal string. if ((bx != -1) && (Char.IsDigit(ScanStream.Stream[bx]) == true)) { var rv = Scanner.IsolateNumericLiteral(ScanStream, Traits, bx); litType = rv.Item1; litText = rv.Item2; nonWord = rv.Item3; // nonWordPat = rv.Item4; // the non word pattern immed after numeric literal // nonWordIx = rv.Item5; // pos of foundPat tokenCode = ScanAtomCode.Numeric; } // got something. now scan forward for the pattern that delimits the word. else if (bx != -1) { { nonWord = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.DelimPatterns); // nonWordPat = rv.Item1; // nonWordIx = rv.Item2; // nonWordLx = rv.Item3; // nonWord = rv.Item3; } // a special value starter. scan further for the spcval word. var startPat = nonWord.FindPattern(DelimClassification.SpecialValueStarter); if (startPat != null) { } // word chars all the way to the end. // if (nonWordPat == null) if (nonWord == null) { tokenCode = ScanAtomCode.Identifier; tokenLx = ScanStream.Stream.Length - bx; } else if (nonWord.FoundAtPosition(DelimClassification.Quote, bx)) { var rv = Scanner.IsolateQuotedWord(ScanStream, Traits, bx); litType = rv.Item1; litText = rv.Item2; // nonWordPat = rv.Item3; // the non word pattern immed after quoted literal // nonWordIx = rv.Item4; // pos of foundPat. nonWord = rv.Item3; tokenCode = ScanAtomCode.Quoted; } #if skip // got a quote char. Isolate the quoted string, then find the delim that follows // the quoted string. else if ((nonWordPat.DelimClassification == DelimClassification.Quote) && (nonWordIx == bx)) { var rv = Scanner.IsolateQuotedWord(ScanStream, Traits, nonWordIx); litType = rv.Item1; litText = rv.Item2; nonWordPat = rv.Item3; // the non word pattern immed after quoted literal nonWordIx = rv.Item4; // pos of foundPat. nonWord = rv.Item5; tokenCode = ScanAtomCode.Quoted; } #endif // delim pattern found past the start of the scan. That means there are // identifier chars from the start of the scan to the found delim. else if (bx != nonWord.Position) // else if (bx != nonWordIx) { tokenCode = ScanAtomCode.Identifier; tokenLx = nonWord.Position - bx; // tokenLx = nonWordIx - bx; } else if (nonWordPat != null) { tokenCode = nonWordPat.DelimClassification.ToScanAtomCode(); } // should never get here. else { tokenCode = null; } } // attempt to classify the identifier token as a keyword. if ((tokenCode != null) && (tokenCode.Value == ScanAtomCode.Identifier)) { var rv = Traits.KeywordPatterns.MatchPatternToSubstring( ScanStream.Stream, bx, tokenLx.Value); var kwdResults = rv.Item3; var kwdPat = kwdResults.FirstFoundPattern; if (kwdPat != null) { tokenCode = kwdPat.MatchPattern.DelimClassification.ToScanAtomCode(); nonWordPat = kwdPat.MatchPattern; nonWord = kwdResults; nonWordIx = bx; nonWordLx = kwdPat.MatchLength; } #if skip var matchPat = rv.Item1; var keywordTextLx = rv.Item2; // the actual lgth of matched text. if (matchPat != null) { tokenCode = matchPat.DelimClassification.ToScanAtomCode(); nonWordPat = matchPat; nonWordPatList = null; nonWord = null; nonWordIx = bx; nonWordLx = keywordTextLx; } #endif } } // end STEP 2. #endregion #region STEP 3 - setup wordBx and wordPart with the found word. { // got whitespace. if (whitespaceText != null) { ScanAtomCode wstc = ScanAtomCode.Whitespace; if (priorTokenCode == null) { wstc = ScanAtomCode.InsignificantWhitespace; } else if (tokenCode == null) { wstc = ScanAtomCode.InsignificantWhitespace; } else if ((priorTokenCode.Value.WhitespaceIsSignificant() == true) && (tokenCode.Value.WhitespaceIsSignificant() == true)) { wstc = ScanAtomCode.Whitespace; } else { wstc = ScanAtomCode.InsignificantWhitespace; } atomText = whitespaceText; atomText.AtomCode = wstc; } // got nothing. else if (bx == -1) { } // no delim found. word text all the way to the end. else if (nonWord.IsEmpty == true) // else if (nonWordIx == -1) { if (whitespaceText != null) { atomText = whitespaceText; nonWordPat = null; } else { // get the text from start of scan to end of string. var rv = Scanner.IsolateWordText( ScanStream, Traits, litType, litText, bx, null); atomText = rv.Item3; wordBx = atomText.StartLoc; } } // got a word and a non word pattern. else if (nonWord.Position > bx) // else if (nonWordIx > bx) { if (whitespaceText != null) { atomText = whitespaceText; nonWord = new PatternScanResults(); nonWordPat = null; } else { var rv = Scanner.IsolateWordText( ScanStream, Traits, litType, litText, bx, nonWord.Position); // var rv = Scanner.IsolateWordText( // ScanStream, Traits, litType, litText, bx, nonWordIx); atomText = rv.Item3; wordBx = atomText.StartLoc; } } // no word. just delim. else { // the delim is comment to end. store as a word. if (nonWordPat.DelimClassification == DelimClassification.CommentToEnd) { var rv = ScanAtom.ClassifyAsComment(ScanStream, Traits, bx); wordBx = rv.Item1; atomText = rv.Item2; nonWordPat = rv.Item3; nonWord = rv.Item4; #if skip var rv = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.NewLinePatterns); var eolPat = rv.Item1; var eolIx = rv.Item2; // no newline pattern found. Comment to the end of the text stream. if (eolPat == null) { int ex = ScanStream.Stream.Length - 1; wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); TextLocation wordEx = new StreamLocation(ex).ToTextLocation(ScanStream); string commentText = ScanStream.Substring(nonWordIx); string userCode = null; atomText = new AtomText( ScanAtomCode.CommentToEnd, commentText, null, wordBx, wordEx, userCode); nonWordPat = null; nonWordPatList = null; } else { wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); int lx = eolIx - nonWordIx; TextLocation wordEx = new StreamLocation(nonWordIx + lx - 1).ToTextLocation(ScanStream); string commentText = ScanStream.Substring(nonWordIx, lx); string userCode = null; atomText = new AtomText( ScanAtomCode.CommentToEnd, commentText, null, wordBx, wordEx, userCode); var sloc = wordBx.ToStreamLocation(ScanStream); nonWordPat = eolPat; nonWordPatList = null; } #endif } // the word found is a non word or keyword pattern. else { // got whitespace followed by keyword. Return the whitespace. if ((nonWordPat.DelimClassification == DelimClassification.Keyword) && (whitespaceText != null)) { atomText = whitespaceText; nonWord = new PatternScanResults(); nonWordPat = null; } // there are more than one scan patterns that match. else if (nonWord.FoundCount > 1) // else if (nonWordPatList != null) { atomTextList = new List <MatchScanPattern>(); foreach (var pat in nonWord) { wordBx = new StreamLocation(nonWord.Position).ToTextLocation(ScanStream); int lx = pat.MatchLength; TextLocation wordEx = new StreamLocation(nonWord.Position + lx - 1).ToTextLocation(ScanStream); string scanText = ScanStream.Stream.Substring(nonWord.Position, lx); atomText = new AtomText( pat.MatchPattern.DelimClassification.ToScanAtomCode().Value, scanText, pat.MatchPattern.ReplacementValue, wordBx, wordEx, pat.MatchPattern.UserCode); pat.AtomText = atomText; atomTextList.Add(pat); } #if skip foreach (var pat in nonWordPatList) { wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); int lx = pat.MatchLength; TextLocation wordEx = new StreamLocation(nonWordIx + lx - 1).ToTextLocation(ScanStream); string scanText = ScanStream.Stream.Substring(nonWordIx, lx); atomText = new AtomText( pat.MatchPattern.DelimClassification.ToScanAtomCode().Value, scanText, pat.MatchPattern.ReplacementValue, wordBx, wordEx, pat.MatchPattern.UserCode); pat.AtomText = atomText; atomTextList.Add(pat); } #endif } else { var foundPat = nonWord.FirstFoundPattern; wordBx = new StreamLocation(nonWord.Position).ToTextLocation(ScanStream); int lx = foundPat.MatchLength; TextLocation wordEx = new StreamLocation(nonWord.Position + lx - 1).ToTextLocation(ScanStream); string scanText = ScanStream.Stream.Substring(nonWord.Position, lx); atomText = new AtomText( foundPat.MatchPattern.DelimClassification.ToScanAtomCode().Value, scanText, foundPat.MatchPattern.ReplacementValue, wordBx, wordEx, foundPat.MatchPattern.UserCode); #if skip wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); int lx = nonWordLx; TextLocation wordEx = new StreamLocation(nonWordIx + lx - 1).ToTextLocation(ScanStream); string scanText = ScanStream.Stream.Substring(nonWordIx, lx); atomText = new AtomText( nonWordPat.DelimClassification.ToScanAtomCode().Value, scanText, nonWordPat.ReplacementValue, wordBx, wordEx, nonWordPat.UserCode); #endif } } } } #endregion } // store the results in the return cursor. ScanAtomCursor nx = null; if (atomText == null) { nx = new ScanAtomCursor(); nx.Position = RelativePosition.End; } else if (atomTextList != null) { nx = new ScanAtomCursor(atomTextList); } else { // nx = new ScanAtomCursor(atomText, nonWordPat); if ((nonWord == null) || (nonWord.IsEmpty == true)) { nx = new ScanAtomCursor(atomText, nonWordPat); } else { nx = new ScanAtomCursor(atomText, nonWord.FirstFoundPattern.MatchPattern); } nx.Position = RelativePosition.At; } return(nx); }