/// <summary> /// Split the string of mail addresses on the "," that separates them. /// </summary> /// <param name="InString"></param> /// <returns></returns> public static ArrayList SplitStringOfMailAddresses(string InString) { ArrayList addrList = new ArrayList( ); TextTraits traits; traits = new TextTraits() .SetQuoteEncapsulation(QuoteEncapsulation.Escape); traits.DividerPatterns.Replace(new string[] { ",", "\t" }, DelimClassification.DividerSymbol); traits.WhitespacePatterns.Replace(" ", "\t", DelimClassification.Whitespace); WordCursor word = Scanner.PositionBeginWord(InString, traits); while (true) { ObjectPair pair = ScanNextAddress(InString, word); word = (WordCursor )pair.b; // got nothing. end of string. if (pair.a == null) { break; } // isolate the mail address string. string mailAddr = PullMailAddress(InString, pair); // add the address string to list of such strings. addrList.Add(mailAddr); } // return the split list of address strings. return(addrList); }
public static AcNamedValues Parse(string InString) { AcNamedValues vlus = new AcNamedValues(); TextTraits traits = new TextTraits(); traits.OpenNamedBracedPatterns.Replace("[", Text.Enums.DelimClassification.OpenNamedBraced); traits.DividerPatterns.AddDistinct(",", Text.Enums.DelimClassification.DividerSymbol); WordCursor csr = Scanner.PositionBeginWord(InString, traits); while (true) { csr = Scanner.ScanNextWord(InString, csr); if (csr.IsEndOfString == true) { break; } else if ((csr.IsDelimOnly == false) && (csr.Word.Class == WordClassification.ContentBraced)) { KeyValuePair <string, string> pair = ParsePair(csr.Word.BracedText); vlus.Add(pair.Key, pair.Value); } else { throw new ApplicationException( "serialized AcNamedValues string in invalid format"); } } return(vlus); }
// ------------------------- ParseContentType ---------------------------- public static PartProperty.ContentType ParseContentType(string InString) { TextTraits traits = new TextTraits() .SetQuoteEncapsulation(QuoteEncapsulation.Escape); traits.DividerPatterns.Replace( new string[] { "/", ":", ";", " ", "\t", "=" }, DelimClassification.DividerSymbol); traits.WhitespacePatterns.Replace(" ", "\t", DelimClassification.Whitespace); PartProperty.ContentType results = new PartProperty.ContentType(); WordCursor csr = Scanner.PositionBeginWord(InString, traits); while (true) { csr = Scanner.ScanNextWord(InString, csr); if (csr.IsEndOfString) { break; } // content type if (csr.DelimValue == "/") { results.Type = csr.Word.ToString( ).ToLower( ); } // content sub type. else if (csr.DelimValue == ";") { results.SubType = csr.Word.ToString( ).ToLower( ); } // a kwd else if (csr.DelimValue == "=") { WordCursor nxCsr = csr.NextWord( ); if ((nxCsr.DelimClass == DelimClassification.EndOfString) || (nxCsr.DelimClass == DelimClassification.Whitespace)) { string kwd = csr.Word.ToString( ).ToLower( ); csr = nxCsr; if (kwd == "charset") { results.CharSet = csr.Word.NonQuotedSimpleValue; } else if (kwd == "boundary") { results.Boundary = csr.Word.NonQuotedSimpleValue; } else if (kwd == "name") { results.Name = csr.Word.NonQuotedSimpleValue; } } } } return(results); }
// ------------------------- ParseContentType ---------------------------- public static PartProperty.ContentType ParseContentType(string InString) { TextTraits traits = new TextTraits( ) .SetDelimChars("/:; \t=") .SetWhitespaceChars(" \t") .SetQuoteEncapsulation(QuoteEncapsulation.Escape); PartProperty.ContentType results = new PartProperty.ContentType( ); Scanner.WordCursor csr = Scanner.PositionBeginWord( ); while (true) { csr = Scanner.ScanNextWord(InString, csr, traits); if (csr.IsEndOfString) { break; } // content type if (csr.Delim == "/") { results.Type = csr.Word.ToString( ).ToLower( ); } // content sub type. else if (csr.Delim == ";") { results.SubType = csr.Word.ToString( ).ToLower( ); } // a kwd else if (csr.Delim == "=") { Scanner.WordCursor nxCsr = csr.NextWord( ); if ((nxCsr.DelimClass == DelimClassification.End) || (nxCsr.DelimClass == DelimClassification.Whitespace)) { string kwd = csr.Word.ToString( ).ToLower( ); csr = nxCsr; if (kwd == "charset") { results.CharSet = csr.Word.NonQuotedWord; } else if (kwd == "boundary") { results.Boundary = csr.Word.NonQuotedWord; } else if (kwd == "name") { results.Name = csr.Word.NonQuotedWord; } } } } return(results); }
// ------------------------ Load ---------------------------------- public MimeContentTypexLine Load(string InLine) { // traits used when stepping word to word in the content-type line. mTraits = new TextTraits( ) .SetDelimChars("/:; \t=") .SetWhitespaceChars(" \t") .SetQuoteEncapsulation(QuoteEncapsulation.Escape); mLine = InLine; Parse( ); return(this); }
IsolateNumericLiteral( ScanStream ScanStream, TextTraits Traits, int Bx) { // for now, all numeric literals are simple integers. // have to expand to determine if a float, decimal, what the sign is, // what the precision is. LiteralType litType = LiteralType.Integer; string litText = null; ScanPattern foundPat = null; int foundIx = -1; // step from char to char. Look for a char that is not part of the // numeric literal. int ix = Bx; int litEx = Bx; while (true) { if (ix >= ScanStream.Stream.Length) { break; } char ch1 = ScanStream.Stream[ix]; if (Char.IsDigit(ch1) == false) { break; } litEx = ix; ix += 1; } // isolate the numeric literal. int lx = litEx - Bx + 1; litText = ScanStream.Substring(Bx, lx); // isolate the delim that follows that numeric literal. int bx = litEx + 1; if (bx < ScanStream.Stream.Length) { var rv = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.DelimPatterns); foundPat = rv.Item1; foundIx = rv.Item2; } return(new Tuple <LiteralType, string, ScanPattern, int>( litType, litText, foundPat, foundIx)); }
// ----------------------- CalcScanNextStart --------------------------- // calc start position from which to start scan to the next word. private static int CalcScanNextStart( ScanStream ScanStream, TextTraits Traits, ScanWordCursor Cursor) { int bx; switch (Cursor.Position) { case RelativePosition.Begin: bx = 0; break; case RelativePosition.Before: bx = Cursor.CursorBx.ToStreamLocation(ScanStream).Value; break; case RelativePosition.After: case RelativePosition.At: if (Traits.IsNonWordPattern(Cursor.DelimPattern)) { bx = Cursor.DelimEx.ToStreamLocation(ScanStream).Value + 1; } else if (Cursor.WordIsDelim == true) { bx = Cursor.WordEx.ToStreamLocation(ScanStream).Value + 1; } else { bx = Cursor.DelimBx.ToStreamLocation(ScanStream).Value; } break; case RelativePosition.End: bx = ScanStream.Stream.Length; break; case RelativePosition.None: bx = -1; break; default: bx = -1; break; } if (bx > (ScanStream.Stream.Length - 1)) { bx = -1; } return(bx); }
IsolateQuotedWord( ScanStream ScanStream, TextTraits Traits, int Bx) { LiteralType litType = LiteralType.none; string litText = null; char ch1 = ScanStream.Stream[Bx]; ScanPattern foundPat = null; int foundIx = -1; int quoteEx = -1; // is start of a verbatim string literal if ((Traits.VerbatimLiteralPattern != null) && (Traits.VerbatimLiteralPattern.Match(ScanStream.Stream, Bx))) { var rv = VerbatimLiteral.ScanCloseQuote( ScanStream.Stream, Traits.VerbatimLiteralPattern, Bx); quoteEx = rv.Item1; litText = rv.Item2; litType = LiteralType.VerbatimString; } // is a quoted literal else if (Traits.IsQuoteChar(ch1) == true) { quoteEx = Scanner.ScanCloseQuote(ScanStream.Stream, Bx, Traits.QuoteEncapsulation); if (quoteEx != -1) { int lx = quoteEx - Bx + 1; litText = ScanStream.Substring(Bx, lx); // correct the following at some point. Should be either string or // char lit. litType = LiteralType.String; } } // isolate the delim that follows that quoted word. { int bx = quoteEx + 1; var rv = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.DelimPatterns); foundPat = rv.Item1; foundIx = rv.Item2; } return(new Tuple <LiteralType, string, ScanPattern, int>( litType, litText, foundPat, foundIx)); }
string[] Parse_CrackWords(string InLine) { List <string> words = new List <string>(); TextTraits traits = new TextTraits(); traits.DividerPatterns.Clear( ); traits.OpenContentBracedPatterns.Clear(); traits.OpenNamedBracedPatterns.Clear(); // the dir entry name can contain spaces. This traits object // is used TextTraits entryNameTraits = new TextTraits(traits); entryNameTraits.WhitespacePatterns.Replace("\t", Text.Enums.DelimClassification.Whitespace); WordCursor csr = null; csr = Scanner.PositionBeginWord(InLine, traits); while (true) { // the 4th word is the file/dir name. This word has a diff char set, // it can have a space in the name. if (words.Count == 3) { csr.TextTraits = entryNameTraits; } else { csr.TextTraits = traits; } csr = Scanner.ScanNextWord(InLine, csr); if (csr.IsEndOfString == true) { break; } if (words.Count == 3) { words.Add(csr.Word.Value.Trim()); } else { words.Add(csr.Word.Value); } } return(words.ToArray()); }
// ------------------------ Load ---------------------------------- public MimeContentTypexLine Load(string InLine) { // traits used when stepping word to word in the content-type line. mTraits = new TextTraits() .SetQuoteEncapsulation(QuoteEncapsulation.Escape); mTraits.DividerPatterns.AddDistinct(new ScanPatterns( new string[] { "/", ":", ";", " ", "\t", "=" }, Text.Enums.DelimClassification.DividerSymbol)); mTraits.WhitespacePatterns.Replace(" ", "\t", Text.Enums.DelimClassification.Whitespace); mLine = InLine; Parse( ); return(this); }
// --------------------------- IsolateDelim_SetDelimIsWhitespace ---------- private static void IsolateDelim_SetDelimIsWhitespace( string Text, TextTraits Traits, WordCursor Results, int WsIx) { // store the actual string of whitespace characters. ( the whitespace can be // checked later to see if it contains tabs or newlines ) ScanPatternResults spr = Scanner.ScanNotEqual( Text, WsIx, Text.Length - 1, Traits.WhitespacePatterns); string delimVlu = spr.ScannedOverString; Results.SetDelim( Text, delimVlu, WsIx, DelimClassification.Whitespace); Results.DelimIsWhitespace = true; }
// ------------------------------ ScanNextAddress --------------------------- private static ObjectPair ScanNextAddress( string InString, WordCursor InWord) { TextTraits traits; traits = new TextTraits() .SetQuoteEncapsulation(QuoteEncapsulation.Escape); traits.DividerPatterns.Replace(new string[] { ",", "\t" }, DelimClassification.DividerSymbol); traits.WhitespacePatterns.Replace(" ", "\t", DelimClassification.Whitespace); WordCursor bgnAddrWord = null; WordCursor endAddrWord = null; // advance from word to word in the string until the comma between addresses // or the end of the string. WordCursor word = InWord; word.TextTraits = traits; while (true) { word = Scanner.ScanNextWord(InString, word); if (word.IsEndOfString) { break; } // expand the word range of the current mail address string. if (bgnAddrWord == null) { bgnAddrWord = word; } endAddrWord = word; if (word.DelimValue == ",") // end of this address. { break; } if (word.DelimValue == "") // end of the string. { break; } } return(new ObjectPair(bgnAddrWord, endAddrWord)); }
ParseContentDisposition(string InString) { TextTraits traits = new TextTraits() .SetQuoteEncapsulation(QuoteEncapsulation.Escape); traits.DividerPatterns.Replace( new string[] { ";", " ", "\t", "=" }, DelimClassification.DividerSymbol); traits.WhitespacePatterns.Replace(" ", "\t", DelimClassification.Whitespace); PartProperty.ContentDisposition results = new PartProperty.ContentDisposition(); WordCursor csr = Scanner.PositionBeginWord(InString, traits); while (true) { csr = Scanner.ScanNextWord(InString, csr); if (csr.IsEndOfString) { break; } // content disposition if (csr.DelimValue == ";") { results.Disposition = csr.Word.ToString( ).ToLower( ); } // a kwd else if (csr.DelimValue == "=") { WordCursor nxCsr = csr.NextWord( ); if ((nxCsr.DelimClass == DelimClassification.EndOfString) || (nxCsr.DelimClass == DelimClassification.Whitespace)) { string kwd = csr.Word.ToString( ).ToLower( ); csr = nxCsr; if (kwd == "filename") { results.FileName = csr.Word.NonQuotedSimpleValue; } } } } return(results); }
// ------------------------------ ScanNextAddress --------------------------- private static ObjectPair ScanNextAddress( string InString, Scanner.WordCursor InWord) { TextTraits traits; traits = new TextTraits( ) .SetDelimChars(", \t") .SetWhitespaceChars(" \t") .SetQuoteEncapsulation(QuoteEncapsulation.Escape); Scanner.WordCursor bgnAddrWord = null; Scanner.WordCursor endAddrWord = null; // advance from word to word in the string until the comma between addresses // or the end of the string. Scanner.WordCursor word = InWord; while (true) { word = Scanner.ScanNextWord(InString, word, traits); if (word.IsEndOfString) { break; } // expand the word range of the current mail address string. if (bgnAddrWord == null) { bgnAddrWord = word; } endAddrWord = word; if (word.Delim == ",") // end of this address. { break; } if (word.Delim == "") // end of the string. { break; } } return(new ObjectPair(bgnAddrWord, endAddrWord)); }
ParseContentDisposition(string InString) { TextTraits traits = new TextTraits( ) .SetDelimChars("; \t=") .SetWhitespaceChars(" \t") .SetQuoteEncapsulation(QuoteEncapsulation.Escape); PartProperty.ContentDisposition results = new PartProperty.ContentDisposition( ); Scanner.WordCursor csr = Scanner.PositionBeginWord( ); while (true) { csr = Scanner.ScanNextWord(InString, csr, traits); if (csr.IsEndOfString) { break; } // content disposition if (csr.Delim == ";") { results.Disposition = csr.Word.ToString( ).ToLower( ); } // a kwd else if (csr.Delim == "=") { Scanner.WordCursor nxCsr = csr.NextWord( ); if ((nxCsr.DelimClass == DelimClassification.End) || (nxCsr.DelimClass == DelimClassification.Whitespace)) { string kwd = csr.Word.ToString( ).ToLower( ); csr = nxCsr; if (kwd == "filename") { results.FileName = csr.Word.NonQuotedWord; } } } } return(results); }
// ----------------------- CalcScanNextStart --------------------------- // calc start position from which to start scan to the next word. private static int CalcScanNextStart( ScanStream ScanStream, TextTraits Traits, ScanAtomCursor Cursor) { int bx; switch (Cursor.Position) { case RelativePosition.Begin: bx = 0; break; case RelativePosition.Before: bx = Cursor.StartLoc.ToStreamLocation(ScanStream).Value; break; case RelativePosition.After: case RelativePosition.At: bx = Cursor.EndLoc.ToStreamLocation(ScanStream).Value + 1; break; case RelativePosition.End: bx = ScanStream.Stream.Length; break; case RelativePosition.None: bx = -1; break; default: bx = -1; break; } if (bx > (ScanStream.Stream.Length - 1)) { bx = -1; } return(bx); }
private static Tuple <TextLocation, TextWord> IsolateWordText( ScanStream ScanStream, TextTraits Traits, LiteralType?LiteralType, string LitText, int Bx, int?NonWordBx) { TextLocation wordBx = null; TextWord wordPart = null; // not a literal. A word that runs from Bx to immed before NonWordBx. if (LiteralType == null) { wordBx = new StreamLocation(Bx).ToTextLocation(ScanStream); int lx; if (NonWordBx == null) { lx = ScanStream.Stream.Length - Bx; } else { lx = NonWordBx.Value - Bx; } wordPart = new TextWord( ScanStream.Substring(Bx, lx), WordClassification.Identifier, Traits); } // a quoted or numeric literal else { wordBx = new StreamLocation(Bx).ToTextLocation(ScanStream); wordPart = new TextWord(LitText, LiteralType.Value, Traits); } return(new Tuple <TextLocation, TextWord>(wordBx, wordPart)); }
// -------------------- IsolateDelim --------------------------- private static void IsolateDelim( string Text, ScanPatternResults PatternResults, ref WordCursor Results, TextTraits Traits) { // did not find a nonword char. must have hit end of string. if (PatternResults.IsNotFound) { Results.DelimClass = DelimClassification.EndOfString; } // we have a delimiter of some kind. else { DelimClassification sprdc = PatternResults.FoundPat.DelimClassification; // delim is whitespace of some sort. Continue to look ahead for a non // whitespace pattern. if (Traits.IsWhitespaceDelimClass(sprdc) == true) { int bx = PatternResults.FoundPos; var spr = Scanner.ScanNotEqual( Text, bx, Text.Length - 1, Traits.WhitespacePatterns); if (spr.FoundPat != null) { } } Results.WhitespaceFollowsWord = false; Results.WhitespaceFollowsDelim = false; Results.DelimIsWhitespace = false; // the delim is a hard delim ( not whitespace ) if (sprdc != DelimClassification.Whitespace) { // Want the openContent brace to be processed as a standalone word. Use // virtual whitespace so the word that this open brace is the delim of will // have what appears to be a whitespace delim. Then the following word will // be the standalone open content brace char. if ((sprdc == DelimClassification.OpenContentBraced) && (Traits.VirtualWhitespace == true)) { Results.SetDelim( Text, null, PatternResults.FoundPos, DelimClassification.VirtualWhitespace); } else { // delim is either as classified in the collection of NonWords or is // a PathPart delim. ScanPattern pat = Traits.GetPathPartDelim( Text, PatternResults.FoundPos); if (pat != null) { Results.SetDelim( Text, pat.PatternValue, PatternResults.FoundPos, DelimClassification.PathSep); } else { Results.SetDelim( Text, PatternResults.FoundPat.PatternValue, PatternResults.FoundPos, sprdc); } } } // whitespace immed follows the word text else { ScanWord.IsolateDelim_WhitespaceFollows( Text, PatternResults, ref Results, Traits); } } }
/// <summary> /// The delim after the word is whitspace. If what follows the whitespace /// is a delim char, then this whitspace is disregarded as the delim, and /// the delim is what follows the whitespace. /// </summary> /// <param name="InBoundedString"></param> /// <param name="InNonWordResults"></param> /// <param name="InOutResults"></param> /// <param name="InTraits"></param> private static void IsolateDelim_WhitespaceFollows( string Text, ScanPatternResults PatternResults, ref WordCursor Results, TextTraits Traits) { Results.WhitespaceFollowsWord = true; ScanPattern nwPat = null; int nwMatchLx = 0; // Look for hard delim after the ws. ScanPatternResults scanResults = Scanner.ScanNotEqual( Text, PatternResults.FoundPos, Text.Length - 1, Traits.WhitespacePatterns); // look that the char after the ws is a nonword. if (scanResults.FoundPos != -1) { var rv = Traits.NonWordPatterns.MatchPatternsAtStringLocation( Text, scanResults.FoundPos, Text.Length - 1); nwPat = rv.Item1; nwMatchLx = rv.Item2; } // the char after the whitespace is a non word (delim) char. if (nwPat != null) { DelimClassification nwdc = nwPat.DelimClassification; // is the delim actually a sep char in a path name. // so the delim is the whitespace. if (Traits.IsPathPartDelim(Text, scanResults.FoundPos)) { ScanWord.IsolateDelim_SetDelimIsWhitespace( Text, Traits, Results, PatternResults.FoundPos); } // is a content open brace char. delim stays as whitespace because // content braces are considered standalone words. else if (nwPat.DelimClassification.IsOpenBraced( )) { ScanWord.IsolateDelim_SetDelimIsWhitespace( Text, Traits, Results, PatternResults.FoundPos); } // is a quote char. the quoted string is considered a word. else if (nwdc == DelimClassification.Quote) { ScanWord.IsolateDelim_SetDelimIsWhitespace( Text, Traits, Results, PatternResults.FoundPos); } // is an actual delim. else { Results.SetDelim( Text, nwPat.PatternValue, scanResults.FoundPos, nwdc); } } // the whitespace char is the delim of record. else { ScanWord.IsolateDelim_SetDelimIsWhitespace( Text, Traits, Results, PatternResults.FoundPos); } }
// ------------------------ ScanNextAtom ------------------------- // Scans to the next atom in the string. ( a word being the text bounded by the // delimeter and whitespace characters as spcfd in the TextTraits argument ) // Return null when end of string. public static ScanAtomCursor ScanNextAtom( ScanStream ScanStream, TextTraits Traits, ScanAtomCursor CurrentWord) { // components of the next word. TextLocation wordBx = null; int nonWordIx = -1; int nonWordLx = 0; ScanPattern nonWordPat = null; List <MatchScanPattern> nonWordPatList = null; AtomText atomText = null; List <MatchScanPattern> atomTextList = null; AtomText whitespaceText = null; // ScanAtomCode? priorCode = null; bool?priorCodeIsWhitespaceSignificant = null; // stay at the current location. return copy of the cursor, but with stayatflag // turned off. if (CurrentWord.StayAtFlag == true) { atomText = CurrentWord.AtomText; nonWordPat = CurrentWord.AtomPattern; wordBx = CurrentWord.StartLoc; } else { #region STEP1 setup the begin pos of the next word. // ----------------------------- STEP 1 ------------------------------ // setup the begin pos of the next word. int bx; { // save the ScanAtomCode of the prior word. if ((CurrentWord.Position == RelativePosition.At) || (CurrentWord.Position == RelativePosition.After)) { priorCodeIsWhitespaceSignificant = CurrentWord.WhitespaceIsSignificant; // priorCode = CurrentWord.AtomText.AtomCode; } // calc scan start position bx = ScanAtom.CalcScanNextStart(ScanStream, Traits, CurrentWord); // advance past whitespace if (bx != -1) { int saveBx = bx; bx = Scanner.ScanNotEqual(ScanStream.Stream, Traits.WhitespacePatterns, bx); // there is some whitespace. depending on what preceeds and follows, may // return this as the atom. if ((priorCodeIsWhitespaceSignificant != null) && (priorCodeIsWhitespaceSignificant.Value == true)) { if (bx != saveBx) { int whitespaceEx = -1; if (bx == -1) { whitespaceEx = ScanStream.Stream.Length - 1; } else { whitespaceEx = bx - 1; } int whitespaceLx = whitespaceEx - saveBx + 1; whitespaceText = new AtomText( ScanAtomCode.Whitespace, ScanStream.Stream.Substring(saveBx, whitespaceLx), " ", new StreamLocation(saveBx).ToTextLocation(ScanStream), new StreamLocation(whitespaceEx).ToTextLocation(ScanStream)); } } } } // end STEP 1. #endregion #region STEP 2. Isolate either numeric lib, quoted lit or scan to non word pattern // ------------------------------- STEP 2 ---------------------------------- // Isolate either numeric literal, quoted literal or scan to the next non word // pattern. LiteralType?litType = null; string litText = null; { // got a decimal digit. isolate the numeric literal string. if ((bx != -1) && (Char.IsDigit(ScanStream.Stream[bx]) == true)) { var rv = Scanner.IsolateNumericLiteral(ScanStream, Traits, bx); litType = rv.Item1; litText = rv.Item2; nonWordPat = rv.Item3; // the non word pattern immed after numeric literal nonWordIx = rv.Item4; // pos of foundPat } // got something. now scan forward for the pattern that delimits the word. else if (bx != -1) { { var rv = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.DelimPatterns); nonWordPat = rv.Item1; nonWordIx = rv.Item2; nonWordLx = rv.Item3; nonWordPatList = rv.Item4; } // got a quote char. Isolate the quoted string, then find the delim that follows // the quoted string. if ((nonWordPat != null) && (nonWordPat.DelimClassification == DelimClassification.Quote) && (nonWordIx == bx)) { var rv = Scanner.IsolateQuotedWord(ScanStream, Traits, nonWordIx); litType = rv.Item1; litText = rv.Item2; nonWordPat = rv.Item3; // the non word pattern immed after quoted literal nonWordIx = rv.Item4; // pos of foundPat. } } } // end STEP 2. #endregion #region STEP 3 - setup wordBx and wordPart with the found word. { // got nothing. if (bx == -1) { } // no delim found. word text all the way to the end. else if (nonWordIx == -1) { if (whitespaceText != null) { atomText = whitespaceText; nonWordPat = null; nonWordPatList = null; } else { var rv = Scanner.IsolateWordText( ScanStream, Traits, litType, litText, bx, null); atomText = rv.Item3; wordBx = atomText.StartLoc; } } // got a word and a non word pattern. else if (nonWordIx > bx) { if (whitespaceText != null) { atomText = whitespaceText; nonWordPat = null; nonWordPatList = null; } else { var rv = Scanner.IsolateWordText( ScanStream, Traits, litType, litText, bx, nonWordIx); atomText = rv.Item3; wordBx = atomText.StartLoc; } } // no word. just delim. else { // the delim is comment to end. store as a word. if (nonWordPat.DelimClassification == DelimClassification.CommentToEnd) { var rv = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.NewLinePatterns); var eolPat = rv.Item1; var eolIx = rv.Item2; if (eolPat == null) { int ex = ScanStream.Stream.Length - 1; wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); TextLocation wordEx = new StreamLocation(ex).ToTextLocation(ScanStream); string commentText = ScanStream.Substring(nonWordIx); atomText = new AtomText( ScanAtomCode.CommentToEnd, commentText, null, wordBx, wordEx); nonWordPat = null; nonWordPatList = null; } else { wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); int lx = eolIx - nonWordIx; TextLocation wordEx = new StreamLocation(nonWordIx + lx - 1).ToTextLocation(ScanStream); string commentText = ScanStream.Substring(nonWordIx, lx); atomText = new AtomText( ScanAtomCode.CommentToEnd, commentText, null, wordBx, wordEx); var sloc = wordBx.ToStreamLocation(ScanStream); nonWordPat = eolPat; nonWordPatList = null; } } // the word found is a non word or keyword pattern. else { // got whitespace followed by keyword. Return the whitespace. if ((nonWordPat.DelimClassification == DelimClassification.Keyword) && (whitespaceText != null)) { atomText = whitespaceText; nonWordPat = null; nonWordPatList = null; } // there are more than one scan patterns that match. else if (nonWordPatList != null) { atomTextList = new List <MatchScanPattern>(); foreach (var pat in nonWordPatList) { wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); int lx = pat.MatchLength; TextLocation wordEx = new StreamLocation(nonWordIx + lx - 1).ToTextLocation(ScanStream); string scanText = ScanStream.Stream.Substring(nonWordIx, lx); atomText = new AtomText( pat.MatchPattern.DelimClassification.ToScanAtomCode().Value, scanText, pat.MatchPattern.ReplacementValue, wordBx, wordEx); pat.AtomText = atomText; atomTextList.Add(pat); } } else { wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); int lx = nonWordLx; TextLocation wordEx = new StreamLocation(nonWordIx + lx - 1).ToTextLocation(ScanStream); string scanText = ScanStream.Stream.Substring(nonWordIx, lx); atomText = new AtomText( nonWordPat.DelimClassification.ToScanAtomCode().Value, scanText, nonWordPat.ReplacementValue, wordBx, wordEx); } } } } #endregion } // store the results in the return cursor. ScanAtomCursor nx = null; if (atomText == null) { nx = new ScanAtomCursor( ); nx.Position = RelativePosition.End; } else if (atomTextList != null) { nx = new ScanAtomCursor(atomTextList); } else { nx = new ScanAtomCursor(atomText, nonWordPat); nx.Position = RelativePosition.At; } return(nx); }
// ------------------------ ScanNextWord ------------------------- // Scans to the next word in the string. ( a word being the text bounded by the // delimeter and whitespace characters as spcfd in the TextTraits argument ) // Return null when end of string. public static ScanWordCursor ScanNextWord( ScanStream ScanStream, TextTraits Traits, ScanWordCursor CurrentWord) { // components of the next word. TextWord wordPart = null; TextLocation wordBx = null; ScanPattern nonWordPat = null; TextLocation nonWordLoc = null; int nonWordIx = -1; // stay at the current location. return copy of the cursor, but with stayatflag // turned off. if (CurrentWord.StayAtFlag == true) { nonWordPat = CurrentWord.DelimPattern; nonWordLoc = CurrentWord.DelimBx; wordPart = CurrentWord.Word; wordBx = CurrentWord.WordBx; } else { #region STEP1 setup the begin pos of the next word. // ----------------------------- STEP 1 ------------------------------ // setup the begin pos of the next word. int bx; { // calc scan start position bx = ScanWord.CalcScanNextStart(ScanStream, Traits, CurrentWord); // advance past whitespace if (bx != -1) { bx = Scanner.ScanNotEqual(ScanStream.Stream, Traits.WhitespacePatterns, bx); } } // end STEP 1. #endregion #region STEP 2. Isolate either numeric lib, quoted lit or scan to non word pattern // ------------------------------- STEP 2 ---------------------------------- // Isolate either numeric literal, quoted literal or scan to the next non word // pattern. LiteralType?litType = null; string litText = null; { // got a decimal digit. isolate the numeric literal string. if ((bx != -1) && (Char.IsDigit(ScanStream.Stream[bx]) == true)) { var rv = ScanWord.IsolateNumericLiteral(ScanStream, Traits, bx); litType = rv.Item1; litText = rv.Item2; nonWordPat = rv.Item3; // the non word pattern immed after numeric literal nonWordIx = rv.Item4; // pos of foundPat } // got something. now scan forward for the pattern that delimits the word. else if (bx != -1) { { var rv = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.DelimPatterns); nonWordPat = rv.Item1; nonWordIx = rv.Item2; } // got a quote char. Isolate the quoted string, then find the delim that follows // the quoted string. if ((nonWordPat != null) && (nonWordPat.DelimClassification == DelimClassification.Quote) && (nonWordIx == bx)) { var rv = IsolateQuotedWord(ScanStream, Traits, nonWordIx); litType = rv.Item1; litText = rv.Item2; nonWordPat = rv.Item3; // the non word pattern immed after quoted literal nonWordIx = rv.Item4; // pos of foundPat. } } } // end STEP 2. #endregion #region STEP 3 - setup wordBx and wordPart with the found word. { // got nothing. if (bx == -1) { } // no delim found. word text all the way to the end. else if (nonWordIx == -1) { var rv = ScanWord.IsolateWordText( ScanStream, Traits, litType, litText, bx, null); wordBx = rv.Item1; wordPart = rv.Item2; #if skip wordBx = new StreamLocation(bx).ToTextLocation(ScanStream); if (litType != null) { wordPart = new TextWord(litText, WordClassification.Quoted, Traits); } else { wordPart = new TextWord( ScanStream.Substring(bx), WordClassification.Identifier, Traits); } #endif } // got a word and a non word pattern. else if (nonWordIx > bx) { var rv = ScanWord.IsolateWordText( ScanStream, Traits, litType, litText, bx, nonWordIx); wordBx = rv.Item1; wordPart = rv.Item2; #if skip wordBx = new StreamLocation(bx).ToTextLocation(ScanStream); int lx = foundIx - bx; wordPart = new TextWord( ScanStream.Substring(bx, lx), WordClassification.Identifier, Traits); #endif nonWordLoc = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); } // no word. just delim. else { nonWordLoc = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); // the delim is comment to end. store as a word. if (nonWordPat.DelimClassification == DelimClassification.CommentToEnd) { var rv = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.NewLinePatterns); var eolPat = rv.Item1; var eolIx = rv.Item2; if (eolPat == null) { wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); wordPart = new TextWord( ScanStream.Substring(nonWordIx), WordClassification.CommentToEnd, Traits); nonWordLoc = null; nonWordPat = null; } else { wordBx = new StreamLocation(nonWordIx).ToTextLocation(ScanStream); int lx = eolIx - nonWordIx; var sloc = wordBx.ToStreamLocation(ScanStream); wordPart = new TextWord( ScanStream.Substring(sloc.Value, lx), WordClassification.CommentToEnd, Traits); nonWordLoc = new StreamLocation(eolIx).ToTextLocation(ScanStream); nonWordPat = eolPat; } } // if the delim pattern is not non word ( a divider ), store the pattern also // as the word. else if (Traits.DelimPatternsThatAreNonWords.Contains(nonWordPat) == false) { wordBx = nonWordLoc; wordPart = new TextWord( nonWordPat.PatternValue, nonWordPat.DelimClassification.ToWordClassification().Value, Traits); } } } #endregion // delim is whitespace. scan ahead for something more meaningful than whitespace. if ((nonWordPat != null) && (Traits.IsWhitespace(nonWordPat))) { StreamLocation dx = nonWordLoc.ToStreamLocation(ScanStream); int fx = Scanner.ScanNotEqual( ScanStream.Stream, Traits.WhitespacePatterns, dx.Value + nonWordPat.Length); var pat = Traits.DelimPatterns.MatchAt(ScanStream.Stream, fx); if (pat != null) { nonWordLoc = new StreamLocation(fx).ToTextLocation(ScanStream); nonWordPat = pat; } } } // store the results in the return cursor. ScanWordCursor nx = null; if ((wordPart == null) && (nonWordPat == null)) { nx = new ScanWordCursor( ); nx.Position = RelativePosition.End; } else { nx = new ScanWordCursor(wordPart, wordBx, nonWordLoc, nonWordPat); nx.Position = RelativePosition.At; } return(nx); }
// ------------------------ ScanNextAtom ------------------------- // Scans to the next atom in the string. ( a word being the text bounded by the // delimiter and whitespace characters as spcfd in the TextTraits argument ) // Return null when end of string. public static ScanAtomCursor ScanNextAtom( ScanStream ScanStream, TextTraits Traits, ScanAtomCursor CurrentWord) { PatternScanResults nonWord = null; AtomText atomText = null; List <MatchScanPattern> atomTextList = null; AtomText whitespaceText = null; ScanAtomCode?tokenCode = null; // ScanAtomCode of this token. int? tokenLx = null; ScanAtomCode?priorTokenCode = null; bool?priorCodeIsWhitespaceSignificant = null; // stay at the current location. return copy of the cursor, but with stayatflag // turned off. if (CurrentWord.StayAtFlag == true) { atomText = CurrentWord.AtomText; tokenCode = atomText.AtomCode; priorTokenCode = null; nonWord = new PatternScanResults( CurrentWord.AtomPattern, CurrentWord.StartLoc.ToStreamLocation(ScanStream).Value, CurrentWord.AtomPattern.Length); } else { #region STEP1 setup the begin pos of the next word. // ----------------------------- STEP 1 ------------------------------ // setup the begin pos of the next word. int bx; { // save the ScanAtomCode of the prior word. if ((CurrentWord.Position == RelativePosition.At) || (CurrentWord.Position == RelativePosition.After)) { priorTokenCode = CurrentWord.AtomCode; priorCodeIsWhitespaceSignificant = CurrentWord.WhitespaceIsSignificant; } // calc scan start position bx = ScanAtom.CalcScanNextStart(ScanStream, Traits, CurrentWord); // advance past whitespace if (bx != -1) { int saveBx = bx; bx = Scanner.ScanNotEqual(ScanStream.Stream, Traits.WhitespacePatterns, bx); // there is some whitespace. Isolate it as AtomText. // This method will return the whitespace as the token. But need to look at // the token before and after to classify the whitespace as significant or // not. ( whitespace between identifiers or keywords is significant. // Whitespace between symbols is not significant. // note: even insignificant whitespace is returned as a token because the // whitespace is needed when redisplaying the statement text. if (bx != saveBx) { int whitespaceEx = -1; if (bx == -1) { whitespaceEx = ScanStream.Stream.Length - 1; } else { whitespaceEx = bx - 1; } int whitespaceLx = whitespaceEx - saveBx + 1; // split the whitespace between space/tab and EOL { int fx1 = ScanStream.Stream.IndexOfAny(new char[] { ' ', '\t' }, saveBx); int fx2 = ScanStream.Stream.IndexOfAny(new char[] { '\r', '\n' }, saveBx); if (fx1 > whitespaceEx) { fx1 = -1; } if (fx2 > whitespaceEx) { fx2 = -1; } if ((fx1 == saveBx) && (fx2 != -1)) { whitespaceEx = fx2 - 1; } if ((fx2 == saveBx) && (fx1 != -1)) { whitespaceEx = fx1 - 1; } whitespaceLx = whitespaceEx - saveBx + 1; } string userCode = null; whitespaceText = new AtomText( ScanAtomCode.Whitespace, ScanStream.Stream.Substring(saveBx, whitespaceLx), " ", new StreamLocation(saveBx).ToTextLocation(ScanStream), new StreamLocation(whitespaceEx).ToTextLocation(ScanStream), userCode); } } } // end STEP 1. #endregion #region STEP 2. Isolate either numeric lit, quoted lit or identifier/keyword. // ------------------------------- STEP 2 ---------------------------------- // Isolate either numeric literal, quoted literal or scan to the next non word // pattern. LiteralType?litType = null; string litText = null; { // got a decimal digit. isolate the numeric literal string. if ((bx != -1) && (Char.IsDigit(ScanStream.Stream[bx]) == true)) { var rv = Scanner.IsolateNumericLiteral(ScanStream, Traits, bx); litType = rv.Item1; litText = rv.Item2; nonWord = rv.Item3; // the non word pattern immed after numeric literal tokenCode = ScanAtomCode.Numeric; } // got something. now scan forward for the pattern that delimits the word. else if (bx != -1) { { nonWord = Scanner.ScanEqualAny(ScanStream.Stream, bx, Traits.DelimPatterns); } // a special value starter. scan further for the spcval word. // If an identifier follows var startPat = nonWord.FindPattern(DelimClassification.SpecialValueStarter); if (startPat != null) { var csr = new ScanAtomCursor(startPat, ScanStream); var nx = ScanAtom.ScanNextAtom(ScanStream, Traits, csr); if ((nx.Position == RelativePosition.At) && (nx.AtomCode.IsIdentifier() == true)) { atomText = AtomText.Combine( startPat.AtomText, nx.AtomText, ScanAtomCode.SpecialValue); } } // got the AtomText of the token. if (atomText != null) { } // word chars all the way to the end. else if (nonWord == null) { tokenCode = ScanAtomCode.Identifier; tokenLx = ScanStream.Stream.Length - bx; } else if (nonWord.FoundAtPosition(DelimClassification.Quote, bx)) { var rv = Scanner.IsolateQuotedWord(ScanStream, Traits, bx); litType = rv.Item1; litText = rv.Item2; nonWord = rv.Item3; // the non word pattern immed after quoted literal tokenCode = ScanAtomCode.Quoted; } // delim pattern found past the start of the scan. That means there are // identifier chars from the start of the scan to the found delim. else if (bx != nonWord.Position) { tokenCode = ScanAtomCode.Identifier; tokenLx = nonWord.Position - bx; } else if (nonWord.IsEmpty == false) { tokenCode = nonWord.FirstFoundPattern.MatchPattern.DelimClassification.ToScanAtomCode(); } // should never get here. else { tokenCode = null; } } // attempt to classify the identifier token as a keyword. if (atomText == null) { if ((tokenCode != null) && (tokenCode.Value == ScanAtomCode.Identifier)) { var rv = Traits.KeywordPatterns.MatchPatternToSubstring( ScanStream.Stream, bx, tokenLx.Value); var kwdResults = rv.Item3; var kwdPat = kwdResults.FirstFoundPattern; if (kwdPat != null) { tokenCode = kwdPat.MatchPattern.DelimClassification.ToScanAtomCode(); nonWord = kwdResults; } } } } // end STEP 2. #endregion #region STEP 3 - setup atomText of the found token. { // got the atomText of the token. if (atomText != null) { nonWord = null; } // got whitespace. else if (whitespaceText != null) { ScanAtomCode wstc = ScanAtomCode.Whitespace; if (priorTokenCode == null) { wstc = ScanAtomCode.InsignificantWhitespace; } else if (tokenCode == null) { wstc = ScanAtomCode.InsignificantWhitespace; } else if ((priorTokenCode.Value.WhitespaceIsSignificant() == true) && (tokenCode.Value.WhitespaceIsSignificant() == true)) { wstc = ScanAtomCode.Whitespace; } else { wstc = ScanAtomCode.InsignificantWhitespace; } atomText = whitespaceText; atomText.AtomCode = wstc; } // got nothing. else if (bx == -1) { } // no delim found. word text all the way to the end. else if (nonWord.IsEmpty == true) { if (whitespaceText != null) { atomText = whitespaceText; } else { // get the text from start of scan to end of string. var rv = Scanner.IsolateWordText( ScanStream, Traits, litType, litText, bx, null); atomText = rv.Item3; } } // got a word followed by non word pattern. return the word. else if (nonWord.Position > bx) { if (whitespaceText != null) { atomText = whitespaceText; nonWord = new PatternScanResults(); } else { var rv = Scanner.IsolateWordText( ScanStream, Traits, litType, litText, bx, nonWord.Position); atomText = rv.Item3; } } // no word. just delim. else { // the delim is comment to end. store as a word. if (nonWord.FirstFoundPattern.MatchPattern.DelimClassification == DelimClassification.CommentToEnd) { var rv = ScanAtom.ClassifyAsComment(ScanStream, Traits, bx); atomText = rv.Item2; nonWord = rv.Item4; } // the word found is a non word or keyword pattern. else { // got whitespace followed by keyword. Return the whitespace. if ((nonWord.FirstFoundPattern.MatchPattern.DelimClassification == DelimClassification.Keyword) && (whitespaceText != null)) { atomText = whitespaceText; nonWord = new PatternScanResults(); } // there are more than one scan patterns that match. else if (nonWord.FoundCount > 1) { atomTextList = new List <MatchScanPattern>(); foreach (var pat in nonWord) { pat.AssignAtomText(ScanStream); atomTextList.Add(pat); } } else { var foundPat = nonWord.FirstFoundPattern; foundPat.AssignAtomText(ScanStream); atomText = foundPat.AtomText; } } } } #endregion } // store the results in the return cursor. { ScanAtomCursor nx = null; if (atomText == null) { nx = new ScanAtomCursor(); nx.Position = RelativePosition.End; } else if (atomTextList != null) { nx = new ScanAtomCursor(atomTextList); } else { if ((nonWord == null) || (nonWord.IsEmpty == true)) { nx = new ScanAtomCursor(atomText, null); } else { nx = new ScanAtomCursor(atomText, nonWord.FirstFoundPattern.MatchPattern); } nx.Position = RelativePosition.At; } return(nx); } }
XmlUnit CrackUnits_ScanOpenUnit(int InBx) { Scanner.ScanCharResults res; XmlUnit unit = new XmlUnit(); unit.UnitCode = XmlUnitCode.Open; WordCursor nxWord = null; BoundedString boundedStream = new BoundedString(mStream); // unit starts with "<" if (boundedStream[InBx] != '<') { ThrowIncorrectlyFormedXmlException(InBx); } unit.Bx = InBx; // scan for the end of the unit. ( there should be a > before an < ) res = Scanner.ScanEqualAny_BypassQuoted( boundedStream, InBx + 1, new char[] { '>', '<' }, QuoteEncapsulation.Double); if ((res.ResultPos == -1) || (res.ResultChar == '<')) { ThrowIncorrectlyFormedXmlException(InBx); } else { unit.Ex = res.ResultPos; } // setup to step from word to word in the unit. boundedStream = new BoundedString(mStream, InBx + 1, res.ResultPos - 1); TextTraits traits = new TextTraits(); traits.OpenNamedBracedPatterns.Clear( ); traits.DividerPatterns.Add("/", "=", DelimClassification.DividerSymbol); traits.WhitespacePatterns.AddDistinct( Environment.NewLine, DelimClassification.Whitespace); // isolate the words of the open unit. WordCursor csr = Scanner.ScanFirstWord(boundedStream, traits); while (true) { if (csr.IsEndOfString == true) { break; } // the unit name if (ScanOpenUnit_CursorAtUnitName(csr) == true) { if (unit.NameWord != null) { ThrowIncorrectlyFormedXmlException(InBx); // already have a unit name } else { unit.NameWord = csr; } } // no word. just the ending "/". ( handle a little later. ) else if ((csr.Word == null) && (csr.DelimValue == "/")) { } else if (csr.Word == null) { ThrowIncorrectlyFormedXmlException(InBx); } // handle as an element attribute ( a named value pair ) else { nxWord = ScanOpenUnit_Attribute_GetValue(boundedStream, csr); if (nxWord != null) { // note: attributes values are stored in their xml encoded // state. unit.AddAttribute(csr, nxWord); csr = nxWord; } else { ThrowIncorrectlyFormedXmlException(InBx); } } // process the "/" delimeter. ( must be the end of the OpenUnit ) if (csr.DelimValue == "/") { WordCursor nx = Scanner.ScanNextWord(boundedStream, csr); if (nx.IsEndOfString == true) { unit.UnitCode = XmlUnitCode.Single; break; } else { ThrowIncorrectlyFormedXmlException(InBx); } } csr = Scanner.ScanNextWord(boundedStream, csr); } return(unit); }
private XmlUnit CrackUnits_ScanCloseUnit(int InBx) { Scanner.ScanCharResults res; XmlUnit unit = new XmlUnit(); unit.UnitCode = XmlUnitCode.Close; WordCursor csr = null; BoundedString boundedStream = new BoundedString(mStream); // unit starts with "<" if (boundedStream[InBx] != '<') { ThrowIncorrectlyFormedXmlException(InBx); } unit.Bx = InBx; // scan for the end of the unit. ( there should be a > before an < ) res = Scanner.ScanEqualAny_BypassQuoted( boundedStream, InBx + 1, new char[] { '>', '<' }, QuoteEncapsulation.Double); if ((res.ResultPos == -1) || (res.ResultChar == '<')) { ThrowIncorrectlyFormedXmlException(InBx); } else { unit.Ex = res.ResultPos; } // setup to step from word to word in the close unit. boundedStream = new BoundedString(mStream, InBx + 1, res.ResultPos - 1); TextTraits traits = new TextTraits(); traits.OpenNamedBracedPatterns.Clear(); traits.DividerPatterns.Add("/", "=", DelimClassification.DividerSymbol); // first word must be an empty word w/ "/" delim. csr = Scanner.ScanFirstWord(boundedStream, traits); if ((csr.IsDelimOnly) && (csr.DelimValue == "/")) { } else { ThrowIncorrectlyFormedXmlException(InBx); } // next is a name with end of string delim. csr = Scanner.ScanNextWord(boundedStream, csr); if ((csr.IsEndOfString) || (csr.DelimClass == DelimClassification.EndOfString)) { } else { ThrowIncorrectlyFormedXmlException(InBx); } // if there is an element name, store it. if (csr.Word != null) { unit.NameWord = csr; } return(unit); }
private void ConstructCommon() { mTraits = new TextTraits(); mTraits.WhitespacePatterns.Replace( new string[] { " ", "\t", "\r", "\n", Environment.NewLine }, DelimClassification.Whitespace); }
ClassifyAsComment(ScanStream ScanStream, TextTraits Traits, int Bx) { TextLocation wordBx = null; AtomText atomText = null; ScanPattern nonWordPat = null; PatternScanResults nonWord = null; // look prior to see if this comment to the end of the line is the first non // blank on the line. bool isFirstNonBlankOnLine = false; if (Bx == 0) { isFirstNonBlankOnLine = true; } else { // go back to the first non blank. int ix = Scanner.ScanReverseNotEqual( ScanStream.Stream, Bx - 1, Traits.WhitespaceWithoutNewLinePatterns); if (ix == -1) // nothing but blanks to start of string. { isFirstNonBlankOnLine = true; } else { var rv = Traits.NewLinePatterns.MatchFirstPatternEndsAtStringLocation( ScanStream.Stream, ix); var pat = rv.Item1; var patBx = rv.Item2; // is a new line pattern. there is nothing but spaces between this new line // and the start of the comment. if (pat != null) { isFirstNonBlankOnLine = true; } } } // set the atomCode of this atom depending on if the comment starts the line. ScanAtomCode atomCode = ScanAtomCode.CommentToEnd; if (isFirstNonBlankOnLine == true) { atomCode = ScanAtomCode.EntireLineCommentToEnd; } // scan for a new line. That is the end of the comment. { nonWord = Scanner.ScanEqualAny(ScanStream.Stream, Bx, Traits.NewLinePatterns); // eolPat = rv.Item1; // eolIx = rv.Item2; // nonWord = rv.Item3; } // no newline pattern found. Comment to the end of the text stream. if (nonWord.IsEmpty == true) // if (eolPat == null) { int ex = ScanStream.Stream.Length - 1; wordBx = new StreamLocation(Bx).ToTextLocation(ScanStream); TextLocation wordEx = new StreamLocation(ex).ToTextLocation(ScanStream); string commentText = ScanStream.Substring(Bx); string userCode = null; atomText = new AtomText( atomCode, commentText, null, wordBx, wordEx, userCode); // nonWordPat = eolPat; } else { wordBx = new StreamLocation(Bx).ToTextLocation(ScanStream); int lx = nonWord.Position - Bx; // int lx = eolIx - Bx; TextLocation wordEx = new StreamLocation(Bx + lx - 1).ToTextLocation(ScanStream); string commentText = ScanStream.Substring(Bx, lx); string userCode = null; atomText = new AtomText( atomCode, commentText, null, wordBx, wordEx, userCode); var sloc = wordBx.ToStreamLocation(ScanStream); // nonWordPat = eolPat; } return(new Tuple <TextLocation, AtomText, ScanPattern, PatternScanResults> (wordBx, atomText, nonWordPat, nonWord)); }
// -------------------- IsolateWord --------------------------- // We have a word starting at InBx. Scan to the end of the word. // Returns the word in the InOutResults parm. // Returns the word delim in the return argument. private static ScanPatternResults IsolateWord( string Text, int Bx, ref WordCursor Results, TextTraits Traits) { int bx; ScanPatternResults spr = null; bx = Bx; char ch1 = Text[bx]; // is start of either verbatim string literal or quoted literal. if ( ((Traits.VerbatimLiteralPattern != null) && (Traits.VerbatimLiteralPattern.Match(Text, bx))) || (Traits.IsQuoteChar(ch1) == true) ) { var rv = ScanWord.IsolateQuotedWord(Text, bx, Traits); bx = rv.Item1; int? ex = rv.Item2; string wordText = rv.Item3; WordClassification wc = WordClassification.Quoted; var litType = rv.Item4; spr = rv.Item5; Results.SetWord(wordText, wc, bx); Results.Word.LiteralType = litType; } else { // Scan the string for any of the non word patterns spcfd in Traits. DelimClassification sprdc = DelimClassification.None; int remLx = Text.Length - bx; spr = Scanner.ScanEqualAny(Text, bx, remLx, Traits.NonWordPatterns); if (spr.IsNotFound == false) { sprdc = spr.FoundPat.DelimClassification; } // a quote character within the name. this is an error. if (sprdc == DelimClassification.Quote) { throw new ApplicationException( "quote character immed follows name character at position " + spr.FoundPos.ToString() + " in " + Text); } // no delim found. all word to the end of the string. else if (spr.IsNotFound) { string wordText = Text.Substring(Bx); Results.SetWord(wordText, WordClassification.Identifier, Bx); } // found an open named brace char // Open named braced words are words that combine the word and the braced contents. // debateable that this feature is needed and should be retained. else if (sprdc == DelimClassification.OpenNamedBraced) { Scanner.ScanWord_IsolateWord_Braced( Text, bx, spr, ref Results, Traits); } // delim is same position as the word. so either the word is the delim ( an // expression symbol ) or the word is empty ( the delim is a comma, semicolon, // ... a content divider ) else if (spr.FoundPos == Bx) { if ((Traits.NonDividerIsWord == true) && (Traits.IsDividerDelim(spr.FoundPat.DelimClassification) == false)) { Results.SetWord( spr.FoundPat.PatternValue, spr.FoundPat.DelimClassification.ToWordClassification( ).Value, Bx, spr.FoundPat.LeadChar); } // start of CommentToEnd comment. This is a word, not a delim. Find the // end of the comment and set the delim to that end position. else if (sprdc == DelimClassification.CommentToEnd) { spr = Scanner.ScanWord_IsolateWord_CommentToEnd( Text, spr.FoundPos, ref Results, Traits); } else { Results.SetNullWord(); } } // we have a word that ends with a delim. else { int lx = spr.FoundPos - Bx; string wordText = Text.Substring(Bx, lx); Results.SetWord(wordText, WordClassification.Identifier, Bx); } } // return ScanPatternResults of the delim that ends the word. return(spr); }
IsolateQuotedWord( string Text, int Bx, TextTraits Traits) { ScanPatternResults spr = null; int? ex = null; string wordText = null; char ch1 = Text[Bx]; LiteralType litType = LiteralType.none; // is start of a verbatim string literal if ((Traits.VerbatimLiteralPattern != null) && (Traits.VerbatimLiteralPattern.Match(Text, Bx))) { var rv = VerbatimLiteral.ScanCloseQuote( Text, Traits.VerbatimLiteralPattern, Bx); ex = rv.Item1; wordText = rv.Item2; litType = LiteralType.VerbatimString; } // is a quoted literal else if (Traits.IsQuoteChar(ch1) == true) { ex = Scanner.ScanCloseQuote(Text, Bx, Traits.QuoteEncapsulation); if (ex.Value != -1) { int lx = ex.Value - Bx + 1; wordText = Text.Substring(Bx, lx); // correct the following at some point. Should be either string or // char lit. litType = LiteralType.String; } } // not a quoted literal if ((ex == null) || (ex.Value == -1)) { throw (new ApplicationException( "Closing quote not found starting at position " + Bx.ToString() + " in " + Text)); } else { // setup the non word which follows the closing quote. int ix = ex.Value + 1; if (Text.IsPastEnd(ix)) { spr = new ScanPatternResults(-1); } else { // the char that follows the closing quote must be a delim int remLx = Text.Length - ix; spr = Scanner.ScanEqualAny(Text, ix, remLx, Traits.NonWordPatterns); if (spr.FoundPos != ix) { throw new ApplicationException( "invalid char follows close quote at pos " + ix.ToString() + " in " + Stringer.Head(Text, 80)); } } } return(new Tuple <int, int?, string, LiteralType, ScanPatternResults> (Bx, ex, wordText, litType, spr)); }
// ------------------------- ParseAddressString ------------------------ public static EmailAddress ParseAddressString(string InString) { TextTraits traits; traits = new TextTraits( ) .SetQuoteEncapsulation(QuoteEncapsulation.Escape); traits.DividerPatterns.AddDistinct( new string[] { " ", "\t" }, Text.Enums.DelimClassification.DividerSymbol); WordCursor bgnFriendly = null; WordCursor endFriendly = null; EmailAddress results = new EmailAddress( ); WordCursor csr = Scanner.PositionBeginWord(InString, traits); while (true) { // advance to the next word in the address string. csr = Scanner.ScanNextWord(InString, csr); if (csr.IsEndOfString) { break; } // the email address itself is <braced>. else if ((csr.Word.Class == WordClassification.ContentBraced) && (csr.Word.BraceChar == '<')) { results.Address = csr.Word.BracedText; } // comment in the email address string. else if ((csr.Word.Class == WordClassification.ContentBraced) && (csr.Word.BraceChar == '(')) { results.Comment = csr.Word.BracedText; results.Comment = MimeCommon.DecodeHeaderString_EncodedOnly(results.Comment); } // word part of the friendly name in the address. extend the word range of // the friendly string. else { if (bgnFriendly == null) { bgnFriendly = csr; } endFriendly = csr; } } // working from the word range, isolate the full friendly name string. string fullFriendly = null; if ((bgnFriendly != null) && (bgnFriendly == endFriendly)) { fullFriendly = bgnFriendly.Word.ToString( ); } else if (bgnFriendly != null) { int Bx = bgnFriendly.WordBx; int Ex = endFriendly.WordEx; fullFriendly = InString.Substring(Bx, Ex - Bx + 1); } // final decode of the friendly name. name could be quoted, could contain // encoded-words. if (fullFriendly != null) { fullFriendly = MimeCommon.DecodeHeaderString_QuotedEncodedEither(fullFriendly); } // the friendly name could actually be the email address. if (results.Address == null) { results.Address = fullFriendly; } else { results.FriendlyName = fullFriendly; } return(results); }
// ------------------------ ScanNextWord ------------------------- // Scans to the next word in the string. ( a word being the text bounded by the // delimeter and whitespace characters as spcfd in the TextTraits argument ) // Return null when end of string. public static WordCursor ScanNextWord( string Text, TextTraits Traits, WordCursor CurrentWord) { int Bx; WordCursor results = null; ScanPatternResults spr = null; // stay at the current location. return copy of the cursor, but with stayatflag // turned off. if (CurrentWord.StayAtFlag == true) { WordCursor nx = new WordCursor(CurrentWord); nx.StayAtFlag = false; } else { // calc scan start position Bx = ScanWord.CalcStartBx(Text, CurrentWord); // advance past whitespace if ((Bx != -1) && (Bx <= (Text.Length - 1))) { Bx = Scanner.ScanNotEqual( Text, Bx, Text.Length - 1, CurrentWord.TextTraits.WhitespacePatterns).FoundPos; } // got the start of something. scan for the delimeter (could be the current char) spr = null; DelimClassification sprdc = DelimClassification.None; if ((Bx != -1) && (Bx <= (Text.Length - 1))) { spr = ScanWord.IsolateWord(Text, Bx, ref results, CurrentWord.TextTraits); if (spr.IsNotFound == true) { sprdc = DelimClassification.EndOfString; } else { sprdc = spr.FoundPat.DelimClassification; } } if (spr == null) { results.Position = RelativePosition.End; results.SetDelim(Text, null, -1, DelimClassification.EndOfString); } else { // depending on the word, isolate and store the delim that follows. // OpenNamedBraced. delim is the open brace char. if (results.WordClassification == WordClassification.OpenNamedBraced) { ScanPatternResults spr2; int remLx = Text.Length - Bx; spr2 = Scanner.ScanEqualAny( Text, Bx, remLx, CurrentWord.TextTraits.OpenNamedBracedPatterns); results.SetDelim( Text, spr2.FoundPat.PatternValue, spr2.FoundPos, DelimClassification.OpenNamedBraced); } // OpenContentBraced. word and delim are the same. else if (results.WordClassification == WordClassification.OpenContentBraced) { results.SetDelim( Text, results.Word.Value, results.WordBx, DelimClassification.OpenContentBraced); } // word is CommentToEnd. delim is end of line. else if (results.WordClassification == WordClassification.CommentToEnd) { results.SetDelim(Text, spr, sprdc); } // process the NonWordResults returned by "ScanWord_IsolateWord" else { ScanWord.IsolateDelim( Text, spr, ref results, CurrentWord.TextTraits); } } // current word position. if (results.ScanEx == -1) { results.Position = RelativePosition.End; results.SetDelim(Text, null, -1, DelimClassification.EndOfString); } else { results.Position = RelativePosition.At; } } return(results); }