public StmtWord(string StmtText, StmtWord Parent, WordCursor WordCursor) { mStmtText = StmtText; mSubWords = null; mWordCursor = WordCursor; this.Parent = Parent; }
// ------------------------------ Parse ---------------------------- private void Parse( ) { WordCursor csr = Scanner.PositionBeginWord(mLine, mTraits); while (true) { csr = Scanner.ScanNextWord(mLine, csr); if (csr.IsEndOfString) { break; } // content-type: type/subtype; if ((csr.Word.ToString( ).ToLower( ) == "content-type") && (csr.DelimValue == ":")) { csr = ParseValue_ContentType(csr); } // boundary="value" else if ((csr.Word.ToString( ).ToLower( ) == "boundary") && (csr.DelimValue == "=")) { csr = ParseValue_Boundary(csr); } // charset=us-ascii else if ((csr.Word.ToString( ).ToLower( ) == "charset") && (csr.DelimValue == "=")) { csr = ParseValue_CharSet(csr); } } }
/// <summary> /// Split the string of mail addresses on the "," that separates them. /// </summary> /// <param name="InString"></param> /// <returns></returns> public static ArrayList SplitStringOfMailAddresses(string InString) { ArrayList addrList = new ArrayList( ); TextTraits traits; traits = new TextTraits() .SetQuoteEncapsulation(QuoteEncapsulation.Escape); traits.DividerPatterns.Replace(new string[] { ",", "\t" }, DelimClassification.DividerSymbol); traits.WhitespacePatterns.Replace(" ", "\t", DelimClassification.Whitespace); WordCursor word = Scanner.PositionBeginWord(InString, traits); while (true) { ObjectPair pair = ScanNextAddress(InString, word); word = (WordCursor )pair.b; // got nothing. end of string. if (pair.a == null) { break; } // isolate the mail address string. string mailAddr = PullMailAddress(InString, pair); // add the address string to list of such strings. addrList.Add(mailAddr); } // return the split list of address strings. return(addrList); }
public static AcNamedValues Parse(string InString) { AcNamedValues vlus = new AcNamedValues(); TextTraits traits = new TextTraits(); traits.OpenNamedBracedPatterns.Replace("[", Text.Enums.DelimClassification.OpenNamedBraced); traits.DividerPatterns.AddDistinct(",", Text.Enums.DelimClassification.DividerSymbol); WordCursor csr = Scanner.PositionBeginWord(InString, traits); while (true) { csr = Scanner.ScanNextWord(InString, csr); if (csr.IsEndOfString == true) { break; } else if ((csr.IsDelimOnly == false) && (csr.Word.Class == WordClassification.ContentBraced)) { KeyValuePair <string, string> pair = ParsePair(csr.Word.BracedText); vlus.Add(pair.Key, pair.Value); } else { throw new ApplicationException( "serialized AcNamedValues string in invalid format"); } } return(vlus); }
// ------------------------- ParseValue_ContentType ----------------------- private WordCursor ParseValue_ContentType( WordCursor InCsr) { WordCursor csr = InCsr; csr.TextTraits = mTraits; while (true) { csr = Scanner.ScanNextWord(mLine, csr); if (csr.IsEndOfString) { break; } if (csr.DelimValue == "/") { mContentType = csr.Word.ToString( ); } else if (csr.DelimValue == ";") { if (mContentType == null) { mContentType = csr.Word.ToString( ); } else { mContentSubType = csr.Word.ToString( ); } break; } } return(csr); }
public TextLinesWordCursor( WordCursor InWordCursor, TextLinesCursor InBeginLineCursor, TextLinesCursor InEndLineCursor) : base(InWordCursor) { mBeginLineCursor = InBeginLineCursor; mEndLineCursor = InEndLineCursor; }
// ------------------------- ParseContentType ---------------------------- public static PartProperty.ContentType ParseContentType(string InString) { TextTraits traits = new TextTraits() .SetQuoteEncapsulation(QuoteEncapsulation.Escape); traits.DividerPatterns.Replace( new string[] { "/", ":", ";", " ", "\t", "=" }, DelimClassification.DividerSymbol); traits.WhitespacePatterns.Replace(" ", "\t", DelimClassification.Whitespace); PartProperty.ContentType results = new PartProperty.ContentType(); WordCursor csr = Scanner.PositionBeginWord(InString, traits); while (true) { csr = Scanner.ScanNextWord(InString, csr); if (csr.IsEndOfString) { break; } // content type if (csr.DelimValue == "/") { results.Type = csr.Word.ToString( ).ToLower( ); } // content sub type. else if (csr.DelimValue == ";") { results.SubType = csr.Word.ToString( ).ToLower( ); } // a kwd else if (csr.DelimValue == "=") { WordCursor nxCsr = csr.NextWord( ); if ((nxCsr.DelimClass == DelimClassification.EndOfString) || (nxCsr.DelimClass == DelimClassification.Whitespace)) { string kwd = csr.Word.ToString( ).ToLower( ); csr = nxCsr; if (kwd == "charset") { results.CharSet = csr.Word.NonQuotedSimpleValue; } else if (kwd == "boundary") { results.Boundary = csr.Word.NonQuotedSimpleValue; } else if (kwd == "name") { results.Name = csr.Word.NonQuotedSimpleValue; } } } } return(results); }
public StmtWord( string StmtText, StmtWord Parent, WordCursor WordCursor, WordCompositeCode CompositeCode) { mStmtText = StmtText; mSubWords = null; mWordCursor = WordCursor; mCompositeCode = CompositeCode; this.Parent = Parent; }
// build a complex that contains the lines to parse concatenated together. // The complex also a cross reference for converting buffer locations to // line positions. // ParseBufferComplex buf = new ParseBufferComplex(InTextLines); public static StmtWord ParseTextLines( ParseBufferComplex ParseBuf, StmtTraits Traits) { StmtWord topWord = null; WordCursor csr = Scanner.PositionBeginWord(ParseBuf.Buffer, Traits); topWord = new StmtWord(ParseBuf.Buffer, null, null, WordCompositeCode.General); csr = ParseParent(ParseBuf.Buffer, Traits, csr, topWord); return(topWord); }
// ------------------------- Crack ------------------------------------ // crack the mail drop LIST line into its message number and size components. public MailDropMessage Crack(string InLine) { mRawLine = InLine; WordCursor csr = new WordCursor( ) .SetString(InLine); csr = csr.NextWord( ); MessageNx = csr.Word.ToString( ); csr = csr.NextWord( ); MessageSx = csr.Word.ToString( ); return(this); }
string[] Parse_CrackWords(string InLine) { List <string> words = new List <string>(); TextTraits traits = new TextTraits(); traits.DividerPatterns.Clear( ); traits.OpenContentBracedPatterns.Clear(); traits.OpenNamedBracedPatterns.Clear(); // the dir entry name can contain spaces. This traits object // is used TextTraits entryNameTraits = new TextTraits(traits); entryNameTraits.WhitespacePatterns.Replace("\t", Text.Enums.DelimClassification.Whitespace); WordCursor csr = null; csr = Scanner.PositionBeginWord(InLine, traits); while (true) { // the 4th word is the file/dir name. This word has a diff char set, // it can have a space in the name. if (words.Count == 3) { csr.TextTraits = entryNameTraits; } else { csr.TextTraits = traits; } csr = Scanner.ScanNextWord(InLine, csr); if (csr.IsEndOfString == true) { break; } if (words.Count == 3) { words.Add(csr.Word.Value.Trim()); } else { words.Add(csr.Word.Value); } } return(words.ToArray()); }
/// <summary> /// The WordCursor locates the named part of a named=value pair. /// Advance to and return the WordCursor of the value part. /// </summary> /// <param name="InBoundedString"></param> /// <param name="InCsr"></param> /// <returns></returns> private static WordCursor ScanOpenUnit_Attribute_GetValue( BoundedString InBoundedString, WordCursor InCsr) { if (InCsr.Word == null) { return(null); } else if (InCsr.DelimValue != "=") { return(null); } // scan to the value part of the attribute. WordCursor nxCsr = Scanner.ScanNextWord(InBoundedString, InCsr); // no value to scan to. the caller should handle this as a // mal formed xml error. if (nxCsr.IsEndOfString == true) { return(null); } else if (nxCsr.Word == null) { return(null); } // got a word. is an attribute value as long as the delimeter // is legit. else if (nxCsr.DelimClass == DelimClassification.Whitespace) { return(nxCsr); } else if (nxCsr.DelimClass == DelimClassification.EndOfString) { return(nxCsr); } else if (nxCsr.DelimValue == "/") { return(nxCsr); } // likely the wrong type of delimiter. return null so the caller // can signal malformed xml error. else { return(null); } }
// --------------------------- PullMailAddress ------------------------------ private static string PullMailAddress(string InString, ObjectPair InPair) { WordCursor bgnAddrWord = (WordCursor)InPair.a; WordCursor endAddrWord = (WordCursor)InPair.b; int Bx = bgnAddrWord.WordBx; int Ex = endAddrWord.WordEx; if ((Bx == -1) || (Ex == -1)) { throw(new ApplicationException( "email address not properly formed: " + InString)); } return(InString.Substring(Bx, Ex - Bx + 1)); }
// --------------------------- IsolateDelim_SetDelimIsWhitespace ---------- private static void IsolateDelim_SetDelimIsWhitespace( string Text, TextTraits Traits, WordCursor Results, int WsIx) { // store the actual string of whitespace characters. ( the whitespace can be // checked later to see if it contains tabs or newlines ) ScanPatternResults spr = Scanner.ScanNotEqual( Text, WsIx, Text.Length - 1, Traits.WhitespacePatterns); string delimVlu = spr.ScannedOverString; Results.SetDelim( Text, delimVlu, WsIx, DelimClassification.Whitespace); Results.DelimIsWhitespace = true; }
public XmlUnit AddAttribute( WordCursor InNameWord, WordCursor InEncodedAttributeValueWord) { if (mSubUnits == null) { mSubUnits = new List <XmlUnit>(); } XmlUnit AttributeUnit = new XmlUnit(); mSubUnits.Add(AttributeUnit); AttributeUnit.Bx = InNameWord.WordBx; AttributeUnit.Ex = InEncodedAttributeValueWord.WordEx; AttributeUnit.UnitCode = XmlUnitCode.Attribute; AttributeUnit.NameWord = InNameWord; AttributeUnit.EncodedAttributeValueWord = InEncodedAttributeValueWord; return(AttributeUnit); }
ParseContentDisposition(string InString) { TextTraits traits = new TextTraits() .SetQuoteEncapsulation(QuoteEncapsulation.Escape); traits.DividerPatterns.Replace( new string[] { ";", " ", "\t", "=" }, DelimClassification.DividerSymbol); traits.WhitespacePatterns.Replace(" ", "\t", DelimClassification.Whitespace); PartProperty.ContentDisposition results = new PartProperty.ContentDisposition(); WordCursor csr = Scanner.PositionBeginWord(InString, traits); while (true) { csr = Scanner.ScanNextWord(InString, csr); if (csr.IsEndOfString) { break; } // content disposition if (csr.DelimValue == ";") { results.Disposition = csr.Word.ToString( ).ToLower( ); } // a kwd else if (csr.DelimValue == "=") { WordCursor nxCsr = csr.NextWord( ); if ((nxCsr.DelimClass == DelimClassification.EndOfString) || (nxCsr.DelimClass == DelimClassification.Whitespace)) { string kwd = csr.Word.ToString( ).ToLower( ); csr = nxCsr; if (kwd == "filename") { results.FileName = csr.Word.NonQuotedSimpleValue; } } } } return(results); }
// ------------------------------ ScanNextAddress --------------------------- private static ObjectPair ScanNextAddress( string InString, WordCursor InWord) { TextTraits traits; traits = new TextTraits() .SetQuoteEncapsulation(QuoteEncapsulation.Escape); traits.DividerPatterns.Replace(new string[] { ",", "\t" }, DelimClassification.DividerSymbol); traits.WhitespacePatterns.Replace(" ", "\t", DelimClassification.Whitespace); WordCursor bgnAddrWord = null; WordCursor endAddrWord = null; // advance from word to word in the string until the comma between addresses // or the end of the string. WordCursor word = InWord; word.TextTraits = traits; while (true) { word = Scanner.ScanNextWord(InString, word); if (word.IsEndOfString) { break; } // expand the word range of the current mail address string. if (bgnAddrWord == null) { bgnAddrWord = word; } endAddrWord = word; if (word.DelimValue == ",") // end of this address. { break; } if (word.DelimValue == "") // end of the string. { break; } } return(new ObjectPair(bgnAddrWord, endAddrWord)); }
// ------------------------- ParseValue_Boundary ----------------------- private WordCursor ParseValue_Boundary( WordCursor InWordCsr) { WordCursor csr = null; // advance to the value after the boundary= kwd. csr = Scanner.ScanNextWord(mLine, InWordCsr); if (csr.IsDelimOnly == false) { if (csr.Word.IsQuoted == true) { mBoundary = csr.Word.DequotedWord; } else { mBoundary = csr.Word.ToString( ); } } return(csr); }
// ------------------------- ParseValue_CharSet ----------------------- private WordCursor ParseValue_CharSet( WordCursor InWordCsr) { WordCursor csr = null; // advance to the value after the charset= kwd. csr = Scanner.ScanNextWord(mLine, InWordCsr); if (csr.IsAtWord) { if (csr.Word.IsQuoted == true) { mCharSet = csr.Word.DequotedWord; } else { mCharSet = csr.Word.ToString( ); } } return(csr); }
private static bool ScanOpenUnit_CursorAtUnitName(WordCursor InCsr) { if (InCsr.Word == null) { return(false); } else if (InCsr.DelimValue == "=") { return(false); } else if ((InCsr.DelimClass == DelimClassification.EndOfString) || (InCsr.DelimClass == DelimClassification.Whitespace) || (InCsr.DelimValue == "/")) { return(true); } else { ThrowIncorrectlyFormedXmlException(InCsr.ScanBx); return(false); } }
// what first pass processing does: // - cracks the stmt stream into a sequence of delim separated words // - organizes the words in a bracket organized hierarchy // - possibly, groups the words into stmt units based on end of stmt and // new line delimeters spcfd in StmtTraits. Also on the comment // markers contained in StmtTraits. // public static WordCursor FirstPass( string InStmtText, StmtTraits InTraits, WordCursor InCsr, StmtWord InParentWord) { WordCursor csr = InCsr; StmtWord fsWord = null; StmtWord parentWord = InParentWord; int xx = 0; while (true) { xx += 1; csr = Scanner.ScanNextWord(InStmtText, csr); if (csr.IsEndOfString == true) { break; } StmtWord word = new StmtWord(InStmtText, parentWord, csr); // this word is start of stmt. if (fsWord == null) { fsWord = word; } // word is start of a sentence. if ((InTraits.FormSentencesFromWhitespaceDelimWords == true) && (parentWord.IsComposite == false)) { if ((csr.DelimIsWhitespace == true) || (csr.DelimIsOpenBrace == true)) { word.CompositeCode = WordCompositeCode.Sentence; parentWord = word; word = new StmtWord(InStmtText, parentWord, csr); } } // the EndStmt delim is considered to seperate stmts within this parent // StmtElem. Since we have saved the reference to the first word of the // parent, the first and last words of the stmt can be marked. if (csr.DelimClass == DelimClassification.EndStmt) { if (fsWord != null) { fsWord.BeginStmtWord = fsWord; fsWord.EndStmtWord = word; word.BeginStmtWord = fsWord; word.EndStmtWord = word; } fsWord = null; } // word is braced ( a function ). collect all the words within the braces. if (csr.WordIsOpenBrace == true) { csr = FirstPass(InStmtText, InTraits, csr, word); // cursor is located at the closing brace. We want the word after the closing // brace to always be a delim only word. In a parent where members are delimed // by comma this is no problem. But in a whitespace sep list, this might not // be the case without a little helpful adjustment. csr = Scanner.ScanNextWord(InStmtText, csr); csr.StayAtFlag = true; if (csr.IsDelimOnly == true) { } else if ((csr.WordClassification == WordClassification.OpenNamedBraced) || (csr.WordClassification == WordClassification.OpenContentBraced)) { csr.SetVirtualCursor_WhitespaceOnly(csr.WordBx - 1); } } // end of a sentence. else if ((parentWord.CompositeCode == WordCompositeCode.Sentence) && (csr.DelimIsWhitespace == false)) { return(csr); } // todo: have to expand this throw exception when the closing brace does // not match the open brace. else if (csr.DelimClass == DelimClassification.CloseBraced) { break; } } return(csr); }
// -------------------- IsolateDelim --------------------------- private static void IsolateDelim( string Text, ScanPatternResults PatternResults, ref WordCursor Results, TextTraits Traits) { // did not find a nonword char. must have hit end of string. if (PatternResults.IsNotFound) { Results.DelimClass = DelimClassification.EndOfString; } // we have a delimiter of some kind. else { DelimClassification sprdc = PatternResults.FoundPat.DelimClassification; // delim is whitespace of some sort. Continue to look ahead for a non // whitespace pattern. if (Traits.IsWhitespaceDelimClass(sprdc) == true) { int bx = PatternResults.FoundPos; var spr = Scanner.ScanNotEqual( Text, bx, Text.Length - 1, Traits.WhitespacePatterns); if (spr.FoundPat != null) { } } Results.WhitespaceFollowsWord = false; Results.WhitespaceFollowsDelim = false; Results.DelimIsWhitespace = false; // the delim is a hard delim ( not whitespace ) if (sprdc != DelimClassification.Whitespace) { // Want the openContent brace to be processed as a standalone word. Use // virtual whitespace so the word that this open brace is the delim of will // have what appears to be a whitespace delim. Then the following word will // be the standalone open content brace char. if ((sprdc == DelimClassification.OpenContentBraced) && (Traits.VirtualWhitespace == true)) { Results.SetDelim( Text, null, PatternResults.FoundPos, DelimClassification.VirtualWhitespace); } else { // delim is either as classified in the collection of NonWords or is // a PathPart delim. ScanPattern pat = Traits.GetPathPartDelim( Text, PatternResults.FoundPos); if (pat != null) { Results.SetDelim( Text, pat.PatternValue, PatternResults.FoundPos, DelimClassification.PathSep); } else { Results.SetDelim( Text, PatternResults.FoundPat.PatternValue, PatternResults.FoundPos, sprdc); } } } // whitespace immed follows the word text else { ScanWord.IsolateDelim_WhitespaceFollows( Text, PatternResults, ref Results, Traits); } } }
/// <summary> /// The delim after the word is whitspace. If what follows the whitespace /// is a delim char, then this whitspace is disregarded as the delim, and /// the delim is what follows the whitespace. /// </summary> /// <param name="InBoundedString"></param> /// <param name="InNonWordResults"></param> /// <param name="InOutResults"></param> /// <param name="InTraits"></param> private static void IsolateDelim_WhitespaceFollows( string Text, ScanPatternResults PatternResults, ref WordCursor Results, TextTraits Traits) { Results.WhitespaceFollowsWord = true; ScanPattern nwPat = null; int nwMatchLx = 0; // Look for hard delim after the ws. ScanPatternResults scanResults = Scanner.ScanNotEqual( Text, PatternResults.FoundPos, Text.Length - 1, Traits.WhitespacePatterns); // look that the char after the ws is a nonword. if (scanResults.FoundPos != -1) { var rv = Traits.NonWordPatterns.MatchPatternsAtStringLocation( Text, scanResults.FoundPos, Text.Length - 1); nwPat = rv.Item1; nwMatchLx = rv.Item2; } // the char after the whitespace is a non word (delim) char. if (nwPat != null) { DelimClassification nwdc = nwPat.DelimClassification; // is the delim actually a sep char in a path name. // so the delim is the whitespace. if (Traits.IsPathPartDelim(Text, scanResults.FoundPos)) { ScanWord.IsolateDelim_SetDelimIsWhitespace( Text, Traits, Results, PatternResults.FoundPos); } // is a content open brace char. delim stays as whitespace because // content braces are considered standalone words. else if (nwPat.DelimClassification.IsOpenBraced( )) { ScanWord.IsolateDelim_SetDelimIsWhitespace( Text, Traits, Results, PatternResults.FoundPos); } // is a quote char. the quoted string is considered a word. else if (nwdc == DelimClassification.Quote) { ScanWord.IsolateDelim_SetDelimIsWhitespace( Text, Traits, Results, PatternResults.FoundPos); } // is an actual delim. else { Results.SetDelim( Text, nwPat.PatternValue, scanResults.FoundPos, nwdc); } } // the whitespace char is the delim of record. else { ScanWord.IsolateDelim_SetDelimIsWhitespace( Text, Traits, Results, PatternResults.FoundPos); } }
public TextLinesWordCursor(WordCursor InWordCursor) : base(InWordCursor) { mBeginLineCursor = null; mEndLineCursor = null; }
XmlUnit CrackUnits_ScanOpenUnit(int InBx) { Scanner.ScanCharResults res; XmlUnit unit = new XmlUnit(); unit.UnitCode = XmlUnitCode.Open; WordCursor nxWord = null; BoundedString boundedStream = new BoundedString(mStream); // unit starts with "<" if (boundedStream[InBx] != '<') { ThrowIncorrectlyFormedXmlException(InBx); } unit.Bx = InBx; // scan for the end of the unit. ( there should be a > before an < ) res = Scanner.ScanEqualAny_BypassQuoted( boundedStream, InBx + 1, new char[] { '>', '<' }, QuoteEncapsulation.Double); if ((res.ResultPos == -1) || (res.ResultChar == '<')) { ThrowIncorrectlyFormedXmlException(InBx); } else { unit.Ex = res.ResultPos; } // setup to step from word to word in the unit. boundedStream = new BoundedString(mStream, InBx + 1, res.ResultPos - 1); TextTraits traits = new TextTraits(); traits.OpenNamedBracedPatterns.Clear( ); traits.DividerPatterns.Add("/", "=", DelimClassification.DividerSymbol); traits.WhitespacePatterns.AddDistinct( Environment.NewLine, DelimClassification.Whitespace); // isolate the words of the open unit. WordCursor csr = Scanner.ScanFirstWord(boundedStream, traits); while (true) { if (csr.IsEndOfString == true) { break; } // the unit name if (ScanOpenUnit_CursorAtUnitName(csr) == true) { if (unit.NameWord != null) { ThrowIncorrectlyFormedXmlException(InBx); // already have a unit name } else { unit.NameWord = csr; } } // no word. just the ending "/". ( handle a little later. ) else if ((csr.Word == null) && (csr.DelimValue == "/")) { } else if (csr.Word == null) { ThrowIncorrectlyFormedXmlException(InBx); } // handle as an element attribute ( a named value pair ) else { nxWord = ScanOpenUnit_Attribute_GetValue(boundedStream, csr); if (nxWord != null) { // note: attributes values are stored in their xml encoded // state. unit.AddAttribute(csr, nxWord); csr = nxWord; } else { ThrowIncorrectlyFormedXmlException(InBx); } } // process the "/" delimeter. ( must be the end of the OpenUnit ) if (csr.DelimValue == "/") { WordCursor nx = Scanner.ScanNextWord(boundedStream, csr); if (nx.IsEndOfString == true) { unit.UnitCode = XmlUnitCode.Single; break; } else { ThrowIncorrectlyFormedXmlException(InBx); } } csr = Scanner.ScanNextWord(boundedStream, csr); } return(unit); }
private XmlUnit CrackUnits_ScanCloseUnit(int InBx) { Scanner.ScanCharResults res; XmlUnit unit = new XmlUnit(); unit.UnitCode = XmlUnitCode.Close; WordCursor csr = null; BoundedString boundedStream = new BoundedString(mStream); // unit starts with "<" if (boundedStream[InBx] != '<') { ThrowIncorrectlyFormedXmlException(InBx); } unit.Bx = InBx; // scan for the end of the unit. ( there should be a > before an < ) res = Scanner.ScanEqualAny_BypassQuoted( boundedStream, InBx + 1, new char[] { '>', '<' }, QuoteEncapsulation.Double); if ((res.ResultPos == -1) || (res.ResultChar == '<')) { ThrowIncorrectlyFormedXmlException(InBx); } else { unit.Ex = res.ResultPos; } // setup to step from word to word in the close unit. boundedStream = new BoundedString(mStream, InBx + 1, res.ResultPos - 1); TextTraits traits = new TextTraits(); traits.OpenNamedBracedPatterns.Clear(); traits.DividerPatterns.Add("/", "=", DelimClassification.DividerSymbol); // first word must be an empty word w/ "/" delim. csr = Scanner.ScanFirstWord(boundedStream, traits); if ((csr.IsDelimOnly) && (csr.DelimValue == "/")) { } else { ThrowIncorrectlyFormedXmlException(InBx); } // next is a name with end of string delim. csr = Scanner.ScanNextWord(boundedStream, csr); if ((csr.IsEndOfString) || (csr.DelimClass == DelimClassification.EndOfString)) { } else { ThrowIncorrectlyFormedXmlException(InBx); } // if there is an element name, store it. if (csr.Word != null) { unit.NameWord = csr; } return(unit); }
// ----------------------- CalcStartBx --------------------------- // calc start position from which to start scan to the next word. private static int CalcStartBx( string Text, WordCursor Word) { int Bx; switch (Word.Position) { case RelativePosition.Begin: Bx = 0; break; case RelativePosition.Before: Bx = Word.ScanBx; break; case RelativePosition.After: if (Word.TextTraits.IsDividerDelim(Word.DelimClass) == true) { Bx = Word.ScanEx + 1; } else if (Word.WordIsDelim == true) { Bx = Word.ScanEx + 1; } else { Bx = Word.DelimBx; } break; // the delim of the current word is itself considered a standalone // word. ( it is a symbol, an open or close enclosure, ... ) // position so the next word is the delim itself. case RelativePosition.At: if (Word.TextTraits.IsDividerDelim(Word.DelimClass) == true) { Bx = Word.ScanEx + 1; } else if (Word.WordIsDelim == true) { Bx = Word.ScanEx + 1; } else { Bx = Word.DelimBx; } break; case RelativePosition.End: Bx = Text.Length; break; case RelativePosition.None: Bx = -1; break; default: Bx = -1; break; } if (Bx > (Text.Length - 1)) { Bx = -1; } return(Bx); }
// -------------------- IsolateWord --------------------------- // We have a word starting at InBx. Scan to the end of the word. // Returns the word in the InOutResults parm. // Returns the word delim in the return argument. private static ScanPatternResults IsolateWord( string Text, int Bx, ref WordCursor Results, TextTraits Traits) { int bx; ScanPatternResults spr = null; bx = Bx; char ch1 = Text[bx]; // is start of either verbatim string literal or quoted literal. if ( ((Traits.VerbatimLiteralPattern != null) && (Traits.VerbatimLiteralPattern.Match(Text, bx))) || (Traits.IsQuoteChar(ch1) == true) ) { var rv = ScanWord.IsolateQuotedWord(Text, bx, Traits); bx = rv.Item1; int? ex = rv.Item2; string wordText = rv.Item3; WordClassification wc = WordClassification.Quoted; var litType = rv.Item4; spr = rv.Item5; Results.SetWord(wordText, wc, bx); Results.Word.LiteralType = litType; } else { // Scan the string for any of the non word patterns spcfd in Traits. DelimClassification sprdc = DelimClassification.None; int remLx = Text.Length - bx; spr = Scanner.ScanEqualAny(Text, bx, remLx, Traits.NonWordPatterns); if (spr.IsNotFound == false) { sprdc = spr.FoundPat.DelimClassification; } // a quote character within the name. this is an error. if (sprdc == DelimClassification.Quote) { throw new ApplicationException( "quote character immed follows name character at position " + spr.FoundPos.ToString() + " in " + Text); } // no delim found. all word to the end of the string. else if (spr.IsNotFound) { string wordText = Text.Substring(Bx); Results.SetWord(wordText, WordClassification.Identifier, Bx); } // found an open named brace char // Open named braced words are words that combine the word and the braced contents. // debateable that this feature is needed and should be retained. else if (sprdc == DelimClassification.OpenNamedBraced) { Scanner.ScanWord_IsolateWord_Braced( Text, bx, spr, ref Results, Traits); } // delim is same position as the word. so either the word is the delim ( an // expression symbol ) or the word is empty ( the delim is a comma, semicolon, // ... a content divider ) else if (spr.FoundPos == Bx) { if ((Traits.NonDividerIsWord == true) && (Traits.IsDividerDelim(spr.FoundPat.DelimClassification) == false)) { Results.SetWord( spr.FoundPat.PatternValue, spr.FoundPat.DelimClassification.ToWordClassification( ).Value, Bx, spr.FoundPat.LeadChar); } // start of CommentToEnd comment. This is a word, not a delim. Find the // end of the comment and set the delim to that end position. else if (sprdc == DelimClassification.CommentToEnd) { spr = Scanner.ScanWord_IsolateWord_CommentToEnd( Text, spr.FoundPos, ref Results, Traits); } else { Results.SetNullWord(); } } // we have a word that ends with a delim. else { int lx = spr.FoundPos - Bx; string wordText = Text.Substring(Bx, lx); Results.SetWord(wordText, WordClassification.Identifier, Bx); } } // return ScanPatternResults of the delim that ends the word. return(spr); }
// ------------------------- ParseAddressString ------------------------ public static EmailAddress ParseAddressString(string InString) { TextTraits traits; traits = new TextTraits( ) .SetQuoteEncapsulation(QuoteEncapsulation.Escape); traits.DividerPatterns.AddDistinct( new string[] { " ", "\t" }, Text.Enums.DelimClassification.DividerSymbol); WordCursor bgnFriendly = null; WordCursor endFriendly = null; EmailAddress results = new EmailAddress( ); WordCursor csr = Scanner.PositionBeginWord(InString, traits); while (true) { // advance to the next word in the address string. csr = Scanner.ScanNextWord(InString, csr); if (csr.IsEndOfString) { break; } // the email address itself is <braced>. else if ((csr.Word.Class == WordClassification.ContentBraced) && (csr.Word.BraceChar == '<')) { results.Address = csr.Word.BracedText; } // comment in the email address string. else if ((csr.Word.Class == WordClassification.ContentBraced) && (csr.Word.BraceChar == '(')) { results.Comment = csr.Word.BracedText; results.Comment = MimeCommon.DecodeHeaderString_EncodedOnly(results.Comment); } // word part of the friendly name in the address. extend the word range of // the friendly string. else { if (bgnFriendly == null) { bgnFriendly = csr; } endFriendly = csr; } } // working from the word range, isolate the full friendly name string. string fullFriendly = null; if ((bgnFriendly != null) && (bgnFriendly == endFriendly)) { fullFriendly = bgnFriendly.Word.ToString( ); } else if (bgnFriendly != null) { int Bx = bgnFriendly.WordBx; int Ex = endFriendly.WordEx; fullFriendly = InString.Substring(Bx, Ex - Bx + 1); } // final decode of the friendly name. name could be quoted, could contain // encoded-words. if (fullFriendly != null) { fullFriendly = MimeCommon.DecodeHeaderString_QuotedEncodedEither(fullFriendly); } // the friendly name could actually be the email address. if (results.Address == null) { results.Address = fullFriendly; } else { results.FriendlyName = fullFriendly; } return(results); }
/// <summary> /// This is the central method where the cracking of statement text into a hierarchy /// of StmtWord(s) takes place. /// </summary> /// <param name="InStmtText"></param> /// <param name="InTraits"></param> /// <param name="InParentStart"></param> /// <param name="InParentWord"></param> /// <returns></returns> static WordCursor ParseParent( string StmtText, StmtTraits Traits, WordCursor ParentStart, StmtWord ParentWord) { WordCursor csr = ParentStart; StmtWord parentWord = ParentWord; StmtWord word = null; while (true) { word = null; // get next word in the stmt string. csr = Scanner.ScanNextWord(StmtText, csr); // end of string. Got nothing. if (csr.IsEndOfString == true) { break; } // word is start of a sentence. if ((Traits.FormSentencesFromWhitespaceDelimWords == true) && (parentWord.IsSentence == false)) { if ((csr.DelimIsWhitespace == true) || (csr.DelimIsOpenBrace == true)) { if (csr.WordClassification != WordClassification.CommentToEnd) { word = new StmtWord(StmtText, parentWord, csr, WordCompositeCode.Sentence); csr.StayAtFlag = true; csr = ParseParent(StmtText, Traits, csr, word); // word.EndCursor.AssignDelimPart(csr); } } } // Word is braced. Make a composite word, then recursively call this method to // parse the contents. bool bracedWordWasParsed = false; if ((word == null) && (csr.WordIsOpenBrace == true)) { word = new StmtWord(StmtText, parentWord, csr, WordCompositeCode.Braced); csr = ParseParent(StmtText, Traits, csr, word); bracedWordWasParsed = true; // this braced word may be the start of a sentence. if ((Traits.FormSentencesFromWhitespaceDelimWords == true) && (parentWord.IsSentence == false)) { if ((csr.DelimIsWhitespace == true) || (csr.DelimIsOpenBrace == true)) { StmtWord w2 = new StmtWord( StmtText, parentWord, word.WordCursor, WordCompositeCode.Sentence); word.Parent = w2; } } } // add the standalone word to the parent word if (word == null) { // this word might be the whitespace after an EndStmt delim sentence and the // end of the braced parent. ( ex: return _Name ; } ) // todo: draw distinction between skipping the empty word before a close brace // and the empty word after a comma delim sequence. ex: { a, b, c, } if (csr.IsDelimOnly == false) { word = new StmtWord(StmtText, parentWord, csr, WordCompositeCode.Atom); } } // is the final word in a sentence. // note: a semicolon or comma will end a sentence. if (parentWord.CompositeCode == WordCompositeCode.Sentence) { if ((csr.DelimIsWhitespace == false) && (csr.DelimIsAssignmentSymbol == false)) { break; } // sentence also ends when word is braced and this braced word is not the // first word in the sentence. ex: get { return _Name ; } else if ((bracedWordWasParsed == true) && (parentWord.SubWords.Count > 1)) { break; } } // final word in a Braced sequence. if (parentWord.CompositeCode == WordCompositeCode.Braced) { // the close brace delim is the closing brace of the parent word. // ex: { wd1 wd2 } the } delim for wd2 applies to the braced word. if ((csr.DelimIsCloseBrace == true) && ((word == null) || (word.OwnsCloseBracedDelim == false))) { // save the location of the closing brace. parentWord.CloseBracePosition = csr.DelimBx; parentWord.CloseBraceCursor = csr; // cursor is located at the closing brace. We want the word after the closing // brace to always be a delim only word. In a parent where members are delimed // by comma this is no problem. But in a whitespace sep list, this might not // be the case without a little helpful adjustment. csr = Scanner.ScanNextWord(StmtText, csr); csr.StayAtFlag = true; if (csr.IsDelimOnly == true) { csr.StayAtFlag = false; parentWord.CloseBraceCursor = csr; } else if ((csr.WordClassification == WordClassification.OpenNamedBraced) || (csr.WordClassification == WordClassification.OpenContentBraced)) { csr.SetVirtualCursor_WhitespaceOnly(csr.WordBx - 1); csr.StayAtFlag = false; } break; } } // line break. consider end of first pass processing of the stmt words // when the paren level is zero. // if ((csr.DelimClass == DelimClassification.NewLine) && ( word.ParenLevel == 0 )) // break ; } return(csr); }