public StmtWord(string StmtText, StmtWord Parent, WordCursor WordCursor)
 {
     mStmtText   = StmtText;
     mSubWords   = null;
     mWordCursor = WordCursor;
     this.Parent = Parent;
 }
        // ------------------------------ Parse ----------------------------
        private void Parse( )
        {
            WordCursor csr = Scanner.PositionBeginWord(mLine, mTraits);

            while (true)
            {
                csr = Scanner.ScanNextWord(mLine, csr);
                if (csr.IsEndOfString)
                {
                    break;
                }

                // content-type: type/subtype;
                if ((csr.Word.ToString( ).ToLower( ) == "content-type") &&
                    (csr.DelimValue == ":"))
                {
                    csr = ParseValue_ContentType(csr);
                }

                // boundary="value"
                else if ((csr.Word.ToString( ).ToLower( ) == "boundary") &&
                         (csr.DelimValue == "="))
                {
                    csr = ParseValue_Boundary(csr);
                }

                // charset=us-ascii
                else if ((csr.Word.ToString( ).ToLower( ) == "charset") &&
                         (csr.DelimValue == "="))
                {
                    csr = ParseValue_CharSet(csr);
                }
            }
        }
Esempio n. 3
0
        /// <summary>
        /// Split the string of mail addresses on the "," that separates them.
        /// </summary>
        /// <param name="InString"></param>
        /// <returns></returns>
        public static ArrayList SplitStringOfMailAddresses(string InString)
        {
            ArrayList addrList = new ArrayList( );

            TextTraits traits;

            traits = new TextTraits()
                     .SetQuoteEncapsulation(QuoteEncapsulation.Escape);
            traits.DividerPatterns.Replace(new string[] { ",", "\t" }, DelimClassification.DividerSymbol);
            traits.WhitespacePatterns.Replace(" ", "\t", DelimClassification.Whitespace);

            WordCursor word = Scanner.PositionBeginWord(InString, traits);

            while (true)
            {
                ObjectPair pair = ScanNextAddress(InString, word);
                word = (WordCursor )pair.b;

                // got nothing. end of string.
                if (pair.a == null)
                {
                    break;
                }

                // isolate the mail address string.
                string mailAddr = PullMailAddress(InString, pair);

                // add the address string to list of such strings.
                addrList.Add(mailAddr);
            }

            // return the split list of address strings.
            return(addrList);
        }
Esempio n. 4
0
        public static AcNamedValues Parse(string InString)
        {
            AcNamedValues vlus = new AcNamedValues();

            TextTraits traits = new TextTraits();

            traits.OpenNamedBracedPatterns.Replace("[", Text.Enums.DelimClassification.OpenNamedBraced);
            traits.DividerPatterns.AddDistinct(",", Text.Enums.DelimClassification.DividerSymbol);
            WordCursor csr = Scanner.PositionBeginWord(InString, traits);

            while (true)
            {
                csr = Scanner.ScanNextWord(InString, csr);
                if (csr.IsEndOfString == true)
                {
                    break;
                }
                else if
                ((csr.IsDelimOnly == false) &&
                 (csr.Word.Class == WordClassification.ContentBraced))
                {
                    KeyValuePair <string, string> pair = ParsePair(csr.Word.BracedText);
                    vlus.Add(pair.Key, pair.Value);
                }
                else
                {
                    throw new ApplicationException(
                              "serialized AcNamedValues string in invalid format");
                }
            }
            return(vlus);
        }
        // ------------------------- ParseValue_ContentType -----------------------
        private WordCursor ParseValue_ContentType(
            WordCursor InCsr)
        {
            WordCursor csr = InCsr;

            csr.TextTraits = mTraits;
            while (true)
            {
                csr = Scanner.ScanNextWord(mLine, csr);
                if (csr.IsEndOfString)
                {
                    break;
                }
                if (csr.DelimValue == "/")
                {
                    mContentType = csr.Word.ToString( );
                }
                else if (csr.DelimValue == ";")
                {
                    if (mContentType == null)
                    {
                        mContentType = csr.Word.ToString( );
                    }
                    else
                    {
                        mContentSubType = csr.Word.ToString( );
                    }
                    break;
                }
            }
            return(csr);
        }
 public TextLinesWordCursor(
     WordCursor InWordCursor,
     TextLinesCursor InBeginLineCursor, TextLinesCursor InEndLineCursor)
     : base(InWordCursor)
 {
     mBeginLineCursor = InBeginLineCursor;
     mEndLineCursor   = InEndLineCursor;
 }
Esempio n. 7
0
        // ------------------------- ParseContentType ----------------------------
        public static PartProperty.ContentType ParseContentType(string InString)
        {
            TextTraits traits = new TextTraits()
                                .SetQuoteEncapsulation(QuoteEncapsulation.Escape);

            traits.DividerPatterns.Replace(
                new string[] { "/", ":", ";", " ", "\t", "=" }, DelimClassification.DividerSymbol);
            traits.WhitespacePatterns.Replace(" ", "\t", DelimClassification.Whitespace);
            PartProperty.ContentType results = new PartProperty.ContentType();

            WordCursor csr = Scanner.PositionBeginWord(InString, traits);

            while (true)
            {
                csr = Scanner.ScanNextWord(InString, csr);
                if (csr.IsEndOfString)
                {
                    break;
                }

                // content type
                if (csr.DelimValue == "/")
                {
                    results.Type = csr.Word.ToString( ).ToLower( );
                }

                // content sub type.
                else if (csr.DelimValue == ";")
                {
                    results.SubType = csr.Word.ToString( ).ToLower( );
                }

                // a kwd
                else if (csr.DelimValue == "=")
                {
                    WordCursor nxCsr = csr.NextWord( );
                    if ((nxCsr.DelimClass == DelimClassification.EndOfString) ||
                        (nxCsr.DelimClass == DelimClassification.Whitespace))
                    {
                        string kwd = csr.Word.ToString( ).ToLower( );
                        csr = nxCsr;
                        if (kwd == "charset")
                        {
                            results.CharSet = csr.Word.NonQuotedSimpleValue;
                        }
                        else if (kwd == "boundary")
                        {
                            results.Boundary = csr.Word.NonQuotedSimpleValue;
                        }
                        else if (kwd == "name")
                        {
                            results.Name = csr.Word.NonQuotedSimpleValue;
                        }
                    }
                }
            }
            return(results);
        }
 public StmtWord(
     string StmtText,
     StmtWord Parent, WordCursor WordCursor, WordCompositeCode CompositeCode)
 {
     mStmtText      = StmtText;
     mSubWords      = null;
     mWordCursor    = WordCursor;
     mCompositeCode = CompositeCode;
     this.Parent    = Parent;
 }
Esempio n. 9
0
        // build a complex that contains the lines to parse concatenated together.
        // The complex also a cross reference for converting buffer locations to
        // line positions.
        // ParseBufferComplex buf = new ParseBufferComplex(InTextLines);

        public static StmtWord ParseTextLines(
            ParseBufferComplex ParseBuf, StmtTraits Traits)
        {
            StmtWord topWord = null;

            WordCursor csr =
                Scanner.PositionBeginWord(ParseBuf.Buffer, Traits);

            topWord = new StmtWord(ParseBuf.Buffer, null, null, WordCompositeCode.General);
            csr     = ParseParent(ParseBuf.Buffer, Traits, csr, topWord);

            return(topWord);
        }
        // ------------------------- Crack ------------------------------------
        // crack the mail drop LIST line into its message number and size components.
        public MailDropMessage Crack(string InLine)
        {
            mRawLine = InLine;

            WordCursor csr = new WordCursor( )
                             .SetString(InLine);

            csr       = csr.NextWord( );
            MessageNx = csr.Word.ToString( );
            csr       = csr.NextWord( );
            MessageSx = csr.Word.ToString( );

            return(this);
        }
Esempio n. 11
0
        string[] Parse_CrackWords(string InLine)
        {
            List <string> words  = new List <string>();
            TextTraits    traits = new TextTraits();

            traits.DividerPatterns.Clear( );
            traits.OpenContentBracedPatterns.Clear();
            traits.OpenNamedBracedPatterns.Clear();

            // the dir entry name can contain spaces. This traits object
            // is used
            TextTraits entryNameTraits = new TextTraits(traits);

            entryNameTraits.WhitespacePatterns.Replace("\t", Text.Enums.DelimClassification.Whitespace);

            WordCursor csr = null;

            csr = Scanner.PositionBeginWord(InLine, traits);
            while (true)
            {
                // the 4th word is the file/dir name. This word has a diff char set,
                // it can have a space in the name.
                if (words.Count == 3)
                {
                    csr.TextTraits = entryNameTraits;
                }
                else
                {
                    csr.TextTraits = traits;
                }

                csr = Scanner.ScanNextWord(InLine, csr);
                if (csr.IsEndOfString == true)
                {
                    break;
                }

                if (words.Count == 3)
                {
                    words.Add(csr.Word.Value.Trim());
                }
                else
                {
                    words.Add(csr.Word.Value);
                }
            }

            return(words.ToArray());
        }
Esempio n. 12
0
        /// <summary>
        /// The WordCursor locates the named part of a named=value pair.
        /// Advance to and return the WordCursor of the value part.
        /// </summary>
        /// <param name="InBoundedString"></param>
        /// <param name="InCsr"></param>
        /// <returns></returns>
        private static WordCursor ScanOpenUnit_Attribute_GetValue(
            BoundedString InBoundedString, WordCursor InCsr)
        {
            if (InCsr.Word == null)
            {
                return(null);
            }
            else if (InCsr.DelimValue != "=")
            {
                return(null);
            }

            // scan to the value part of the attribute.
            WordCursor nxCsr = Scanner.ScanNextWord(InBoundedString, InCsr);

            // no value to scan to. the caller should handle this as a
            // mal formed xml error.
            if (nxCsr.IsEndOfString == true)
            {
                return(null);
            }
            else if (nxCsr.Word == null)
            {
                return(null);
            }

            // got a word. is an attribute value as long as the delimeter
            // is legit.
            else if (nxCsr.DelimClass == DelimClassification.Whitespace)
            {
                return(nxCsr);
            }
            else if (nxCsr.DelimClass == DelimClassification.EndOfString)
            {
                return(nxCsr);
            }
            else if (nxCsr.DelimValue == "/")
            {
                return(nxCsr);
            }

            // likely the wrong type of delimiter. return null so the caller
            // can signal malformed xml error.
            else
            {
                return(null);
            }
        }
Esempio n. 13
0
        // --------------------------- PullMailAddress ------------------------------
        private static string PullMailAddress(string InString, ObjectPair InPair)
        {
            WordCursor bgnAddrWord = (WordCursor)InPair.a;
            WordCursor endAddrWord = (WordCursor)InPair.b;

            int Bx = bgnAddrWord.WordBx;
            int Ex = endAddrWord.WordEx;

            if ((Bx == -1) || (Ex == -1))
            {
                throw(new ApplicationException(
                          "email address not properly formed: " + InString));
            }

            return(InString.Substring(Bx, Ex - Bx + 1));
        }
Esempio n. 14
0
        // --------------------------- IsolateDelim_SetDelimIsWhitespace ----------
        private static void IsolateDelim_SetDelimIsWhitespace(
            string Text, TextTraits Traits,
            WordCursor Results, int WsIx)
        {
            // store the actual string of whitespace characters. ( the whitespace can be
            // checked later to see if it contains tabs or newlines )
            ScanPatternResults spr = Scanner.ScanNotEqual(
                Text, WsIx, Text.Length - 1,
                Traits.WhitespacePatterns);

            string delimVlu = spr.ScannedOverString;

            Results.SetDelim(
                Text, delimVlu, WsIx, DelimClassification.Whitespace);

            Results.DelimIsWhitespace = true;
        }
Esempio n. 15
0
        public XmlUnit AddAttribute(
            WordCursor InNameWord, WordCursor InEncodedAttributeValueWord)
        {
            if (mSubUnits == null)
            {
                mSubUnits = new List <XmlUnit>();
            }
            XmlUnit AttributeUnit = new XmlUnit();

            mSubUnits.Add(AttributeUnit);

            AttributeUnit.Bx       = InNameWord.WordBx;
            AttributeUnit.Ex       = InEncodedAttributeValueWord.WordEx;
            AttributeUnit.UnitCode = XmlUnitCode.Attribute;
            AttributeUnit.NameWord = InNameWord;
            AttributeUnit.EncodedAttributeValueWord = InEncodedAttributeValueWord;
            return(AttributeUnit);
        }
Esempio n. 16
0
        ParseContentDisposition(string InString)
        {
            TextTraits traits = new TextTraits()
                                .SetQuoteEncapsulation(QuoteEncapsulation.Escape);

            traits.DividerPatterns.Replace(
                new string[] { ";", " ", "\t", "=" }, DelimClassification.DividerSymbol);
            traits.WhitespacePatterns.Replace(" ", "\t", DelimClassification.Whitespace);
            PartProperty.ContentDisposition results = new PartProperty.ContentDisposition();

            WordCursor csr = Scanner.PositionBeginWord(InString, traits);

            while (true)
            {
                csr = Scanner.ScanNextWord(InString, csr);
                if (csr.IsEndOfString)
                {
                    break;
                }

                // content disposition
                if (csr.DelimValue == ";")
                {
                    results.Disposition = csr.Word.ToString( ).ToLower( );
                }

                // a kwd
                else if (csr.DelimValue == "=")
                {
                    WordCursor nxCsr = csr.NextWord( );
                    if ((nxCsr.DelimClass == DelimClassification.EndOfString) ||
                        (nxCsr.DelimClass == DelimClassification.Whitespace))
                    {
                        string kwd = csr.Word.ToString( ).ToLower( );
                        csr = nxCsr;
                        if (kwd == "filename")
                        {
                            results.FileName = csr.Word.NonQuotedSimpleValue;
                        }
                    }
                }
            }
            return(results);
        }
Esempio n. 17
0
        // ------------------------------ ScanNextAddress ---------------------------
        private static ObjectPair ScanNextAddress(
            string InString, WordCursor InWord)
        {
            TextTraits traits;

            traits = new TextTraits()
                     .SetQuoteEncapsulation(QuoteEncapsulation.Escape);
            traits.DividerPatterns.Replace(new string[] { ",", "\t" }, DelimClassification.DividerSymbol);
            traits.WhitespacePatterns.Replace(" ", "\t", DelimClassification.Whitespace);
            WordCursor bgnAddrWord = null;
            WordCursor endAddrWord = null;

            // advance from word to word in the string until the comma between addresses
            // or the end of the string.
            WordCursor word = InWord;

            word.TextTraits = traits;
            while (true)
            {
                word = Scanner.ScanNextWord(InString, word);
                if (word.IsEndOfString)
                {
                    break;
                }

                // expand the word range of the current mail address string.
                if (bgnAddrWord == null)
                {
                    bgnAddrWord = word;
                }
                endAddrWord = word;

                if (word.DelimValue == ",")                     // end of this address.
                {
                    break;
                }
                if (word.DelimValue == "")                              // end of the string.
                {
                    break;
                }
            }

            return(new ObjectPair(bgnAddrWord, endAddrWord));
        }
        // ------------------------- ParseValue_Boundary -----------------------
        private WordCursor ParseValue_Boundary(
            WordCursor InWordCsr)
        {
            WordCursor csr = null;

            // advance to the value after the boundary= kwd.
            csr = Scanner.ScanNextWord(mLine, InWordCsr);
            if (csr.IsDelimOnly == false)
            {
                if (csr.Word.IsQuoted == true)
                {
                    mBoundary = csr.Word.DequotedWord;
                }
                else
                {
                    mBoundary = csr.Word.ToString( );
                }
            }
            return(csr);
        }
        // ------------------------- ParseValue_CharSet -----------------------
        private WordCursor ParseValue_CharSet(
            WordCursor InWordCsr)
        {
            WordCursor csr = null;

            // advance to the value after the charset= kwd.
            csr = Scanner.ScanNextWord(mLine, InWordCsr);
            if (csr.IsAtWord)
            {
                if (csr.Word.IsQuoted == true)
                {
                    mCharSet = csr.Word.DequotedWord;
                }
                else
                {
                    mCharSet = csr.Word.ToString( );
                }
            }
            return(csr);
        }
Esempio n. 20
0
 private static bool ScanOpenUnit_CursorAtUnitName(WordCursor InCsr)
 {
     if (InCsr.Word == null)
     {
         return(false);
     }
     else if (InCsr.DelimValue == "=")
     {
         return(false);
     }
     else if ((InCsr.DelimClass == DelimClassification.EndOfString) ||
              (InCsr.DelimClass == DelimClassification.Whitespace) ||
              (InCsr.DelimValue == "/"))
     {
         return(true);
     }
     else
     {
         ThrowIncorrectlyFormedXmlException(InCsr.ScanBx);
         return(false);
     }
 }
Esempio n. 21
0
        // what first pass processing does:
        //   - cracks the stmt stream into a sequence of delim separated words
        //   - organizes the words in a bracket organized hierarchy
        //   - possibly, groups the words into stmt units based on end of stmt and
        //     new line delimeters spcfd in StmtTraits. Also on the comment
        //     markers contained in StmtTraits.
        //
        public static WordCursor FirstPass(
            string InStmtText, StmtTraits InTraits, WordCursor InCsr, StmtWord InParentWord)
        {
            WordCursor csr        = InCsr;
            StmtWord   fsWord     = null;
            StmtWord   parentWord = InParentWord;
            int        xx         = 0;

            while (true)
            {
                xx += 1;
                csr = Scanner.ScanNextWord(InStmtText, csr);

                if (csr.IsEndOfString == true)
                {
                    break;
                }

                StmtWord word = new StmtWord(InStmtText, parentWord, csr);

                // this word is start of stmt.
                if (fsWord == null)
                {
                    fsWord = word;
                }

                // word is start of a sentence.
                if ((InTraits.FormSentencesFromWhitespaceDelimWords == true) &&
                    (parentWord.IsComposite == false))
                {
                    if ((csr.DelimIsWhitespace == true) || (csr.DelimIsOpenBrace == true))
                    {
                        word.CompositeCode = WordCompositeCode.Sentence;
                        parentWord         = word;
                        word = new StmtWord(InStmtText, parentWord, csr);
                    }
                }

                // the EndStmt delim is considered to seperate stmts within this parent
                // StmtElem. Since we have saved the reference to the first word of the
                // parent, the first and last words of the stmt can be marked.
                if (csr.DelimClass == DelimClassification.EndStmt)
                {
                    if (fsWord != null)
                    {
                        fsWord.BeginStmtWord = fsWord;
                        fsWord.EndStmtWord   = word;

                        word.BeginStmtWord = fsWord;
                        word.EndStmtWord   = word;
                    }
                    fsWord = null;
                }

                // word is braced ( a function ). collect all the words within the braces.
                if (csr.WordIsOpenBrace == true)
                {
                    csr = FirstPass(InStmtText, InTraits, csr, word);

                    // cursor is located at the closing brace. We want the word after the closing
                    // brace to always be a delim only word. In a parent where members are delimed
                    // by comma this is no problem. But in a whitespace sep list, this might not
                    // be the case without a little helpful adjustment.
                    csr            = Scanner.ScanNextWord(InStmtText, csr);
                    csr.StayAtFlag = true;

                    if (csr.IsDelimOnly == true)
                    {
                    }
                    else if ((csr.WordClassification == WordClassification.OpenNamedBraced) ||
                             (csr.WordClassification == WordClassification.OpenContentBraced))
                    {
                        csr.SetVirtualCursor_WhitespaceOnly(csr.WordBx - 1);
                    }
                }

                // end of a sentence.
                else if ((parentWord.CompositeCode == WordCompositeCode.Sentence) &&
                         (csr.DelimIsWhitespace == false))
                {
                    return(csr);
                }

                // todo: have to expand this throw exception when the closing brace does
                //       not match the open brace.
                else if (csr.DelimClass == DelimClassification.CloseBraced)
                {
                    break;
                }
            }
            return(csr);
        }
Esempio n. 22
0
        // -------------------- IsolateDelim ---------------------------
        private static void IsolateDelim(
            string Text,
            ScanPatternResults PatternResults,
            ref WordCursor Results,
            TextTraits Traits)
        {
            // did not find a nonword char.  must have hit end of string.
            if (PatternResults.IsNotFound)
            {
                Results.DelimClass = DelimClassification.EndOfString;
            }

            // we have a delimiter of some kind.
            else
            {
                DelimClassification sprdc = PatternResults.FoundPat.DelimClassification;

                // delim is whitespace of some sort. Continue to look ahead for a non
                // whitespace pattern.
                if (Traits.IsWhitespaceDelimClass(sprdc) == true)
                {
                    int bx  = PatternResults.FoundPos;
                    var spr = Scanner.ScanNotEqual(
                        Text, bx, Text.Length - 1,
                        Traits.WhitespacePatterns);
                    if (spr.FoundPat != null)
                    {
                    }
                }

                Results.WhitespaceFollowsWord  = false;
                Results.WhitespaceFollowsDelim = false;
                Results.DelimIsWhitespace      = false;

                // the delim is a hard delim ( not whitespace )
                if (sprdc != DelimClassification.Whitespace)
                {
                    // Want the openContent brace to be processed as a standalone word. Use
                    // virtual whitespace so the word that this open brace is the delim of will
                    // have what appears to be a whitespace delim. Then the following word will
                    // be the standalone open content brace char.
                    if ((sprdc == DelimClassification.OpenContentBraced) &&
                        (Traits.VirtualWhitespace == true))
                    {
                        Results.SetDelim(
                            Text,
                            null, PatternResults.FoundPos, DelimClassification.VirtualWhitespace);
                    }
                    else
                    {
                        // delim is either as classified in the collection of NonWords or is
                        // a PathPart delim.
                        ScanPattern pat = Traits.GetPathPartDelim(
                            Text, PatternResults.FoundPos);
                        if (pat != null)
                        {
                            Results.SetDelim(
                                Text,
                                pat.PatternValue,
                                PatternResults.FoundPos,
                                DelimClassification.PathSep);
                        }
                        else
                        {
                            Results.SetDelim(
                                Text,
                                PatternResults.FoundPat.PatternValue,
                                PatternResults.FoundPos,
                                sprdc);
                        }
                    }
                }

                // whitespace immed follows the word text
                else
                {
                    ScanWord.IsolateDelim_WhitespaceFollows(
                        Text, PatternResults, ref Results, Traits);
                }
            }
        }
Esempio n. 23
0
        /// <summary>
        /// The delim after the word is whitspace. If what follows the whitespace
        /// is a delim char, then this whitspace is disregarded as the delim, and
        /// the delim is what follows the whitespace.
        /// </summary>
        /// <param name="InBoundedString"></param>
        /// <param name="InNonWordResults"></param>
        /// <param name="InOutResults"></param>
        /// <param name="InTraits"></param>
        private static void IsolateDelim_WhitespaceFollows(
            string Text,
            ScanPatternResults PatternResults,
            ref WordCursor Results,
            TextTraits Traits)
        {
            Results.WhitespaceFollowsWord = true;
            ScanPattern nwPat     = null;
            int         nwMatchLx = 0;

            // Look for hard delim after the ws.
            ScanPatternResults scanResults =
                Scanner.ScanNotEqual(
                    Text, PatternResults.FoundPos, Text.Length - 1,
                    Traits.WhitespacePatterns);

            // look that the char after the ws is a nonword.
            if (scanResults.FoundPos != -1)
            {
                var rv = Traits.NonWordPatterns.MatchPatternsAtStringLocation(
                    Text, scanResults.FoundPos, Text.Length - 1);
                nwPat     = rv.Item1;
                nwMatchLx = rv.Item2;
            }

            // the char after the whitespace is a non word (delim) char.
            if (nwPat != null)
            {
                DelimClassification nwdc = nwPat.DelimClassification;

                // is the delim actually a sep char in a path name.
                // so the delim is the whitespace.
                if (Traits.IsPathPartDelim(Text, scanResults.FoundPos))
                {
                    ScanWord.IsolateDelim_SetDelimIsWhitespace(
                        Text, Traits, Results, PatternResults.FoundPos);
                }

                // is a content open brace char. delim stays as whitespace because
                // content braces are considered standalone words.
                else if (nwPat.DelimClassification.IsOpenBraced( ))
                {
                    ScanWord.IsolateDelim_SetDelimIsWhitespace(
                        Text, Traits, Results, PatternResults.FoundPos);
                }

                // is a quote char. the quoted string is considered a word.
                else if (nwdc == DelimClassification.Quote)
                {
                    ScanWord.IsolateDelim_SetDelimIsWhitespace(
                        Text, Traits, Results, PatternResults.FoundPos);
                }

                // is an actual delim.
                else
                {
                    Results.SetDelim(
                        Text,
                        nwPat.PatternValue, scanResults.FoundPos, nwdc);
                }
            }

            // the whitespace char is the delim of record.
            else
            {
                ScanWord.IsolateDelim_SetDelimIsWhitespace(
                    Text, Traits, Results, PatternResults.FoundPos);
            }
        }
 public TextLinesWordCursor(WordCursor InWordCursor)
     : base(InWordCursor)
 {
     mBeginLineCursor = null;
     mEndLineCursor   = null;
 }
Esempio n. 25
0
        XmlUnit CrackUnits_ScanOpenUnit(int InBx)
        {
            Scanner.ScanCharResults res;
            XmlUnit unit = new XmlUnit();

            unit.UnitCode = XmlUnitCode.Open;
            WordCursor nxWord = null;

            BoundedString boundedStream = new BoundedString(mStream);

            // unit starts with "<"
            if (boundedStream[InBx] != '<')
            {
                ThrowIncorrectlyFormedXmlException(InBx);
            }
            unit.Bx = InBx;

            // scan for the end of the unit. ( there should be a > before an < )
            res = Scanner.ScanEqualAny_BypassQuoted(
                boundedStream, InBx + 1, new char[] { '>', '<' }, QuoteEncapsulation.Double);
            if ((res.ResultPos == -1) || (res.ResultChar == '<'))
            {
                ThrowIncorrectlyFormedXmlException(InBx);
            }
            else
            {
                unit.Ex = res.ResultPos;
            }

            // setup to step from word to word in the unit.
            boundedStream = new BoundedString(mStream, InBx + 1, res.ResultPos - 1);
            TextTraits traits = new TextTraits();

            traits.OpenNamedBracedPatterns.Clear( );
            traits.DividerPatterns.Add("/", "=", DelimClassification.DividerSymbol);
            traits.WhitespacePatterns.AddDistinct(
                Environment.NewLine, DelimClassification.Whitespace);

            // isolate the words of the open unit.
            WordCursor csr = Scanner.ScanFirstWord(boundedStream, traits);

            while (true)
            {
                if (csr.IsEndOfString == true)
                {
                    break;
                }

                // the unit name
                if (ScanOpenUnit_CursorAtUnitName(csr) == true)
                {
                    if (unit.NameWord != null)
                    {
                        ThrowIncorrectlyFormedXmlException(InBx); // already have a unit name
                    }
                    else
                    {
                        unit.NameWord = csr;
                    }
                }

                // no word. just the ending "/".  ( handle a little later. )
                else if ((csr.Word == null) && (csr.DelimValue == "/"))
                {
                }
                else if (csr.Word == null)
                {
                    ThrowIncorrectlyFormedXmlException(InBx);
                }

                // handle as an element attribute ( a named value pair )
                else
                {
                    nxWord = ScanOpenUnit_Attribute_GetValue(boundedStream, csr);
                    if (nxWord != null)
                    {
                        // note: attributes values are stored in their xml encoded
                        //       state.
                        unit.AddAttribute(csr, nxWord);
                        csr = nxWord;
                    }
                    else
                    {
                        ThrowIncorrectlyFormedXmlException(InBx);
                    }
                }

                // process the "/" delimeter. ( must be the end of the OpenUnit )
                if (csr.DelimValue == "/")
                {
                    WordCursor nx = Scanner.ScanNextWord(boundedStream, csr);
                    if (nx.IsEndOfString == true)
                    {
                        unit.UnitCode = XmlUnitCode.Single;
                        break;
                    }
                    else
                    {
                        ThrowIncorrectlyFormedXmlException(InBx);
                    }
                }

                csr = Scanner.ScanNextWord(boundedStream, csr);
            }

            return(unit);
        }
Esempio n. 26
0
        private XmlUnit CrackUnits_ScanCloseUnit(int InBx)
        {
            Scanner.ScanCharResults res;
            XmlUnit unit = new XmlUnit();

            unit.UnitCode = XmlUnitCode.Close;
            WordCursor csr = null;

            BoundedString boundedStream = new BoundedString(mStream);

            // unit starts with "<"
            if (boundedStream[InBx] != '<')
            {
                ThrowIncorrectlyFormedXmlException(InBx);
            }
            unit.Bx = InBx;

            // scan for the end of the unit. ( there should be a > before an < )
            res = Scanner.ScanEqualAny_BypassQuoted(
                boundedStream, InBx + 1, new char[] { '>', '<' }, QuoteEncapsulation.Double);
            if ((res.ResultPos == -1) || (res.ResultChar == '<'))
            {
                ThrowIncorrectlyFormedXmlException(InBx);
            }
            else
            {
                unit.Ex = res.ResultPos;
            }

            // setup to step from word to word in the close unit.
            boundedStream = new BoundedString(mStream, InBx + 1, res.ResultPos - 1);
            TextTraits traits = new TextTraits();

            traits.OpenNamedBracedPatterns.Clear();
            traits.DividerPatterns.Add("/", "=", DelimClassification.DividerSymbol);

            // first word must be an empty word w/ "/" delim.
            csr = Scanner.ScanFirstWord(boundedStream, traits);
            if ((csr.IsDelimOnly) && (csr.DelimValue == "/"))
            {
            }
            else
            {
                ThrowIncorrectlyFormedXmlException(InBx);
            }

            // next is a name with end of string delim.
            csr = Scanner.ScanNextWord(boundedStream, csr);
            if ((csr.IsEndOfString) ||
                (csr.DelimClass == DelimClassification.EndOfString))
            {
            }
            else
            {
                ThrowIncorrectlyFormedXmlException(InBx);
            }

            // if there is an element name, store it.
            if (csr.Word != null)
            {
                unit.NameWord = csr;
            }

            return(unit);
        }
Esempio n. 27
0
        // ----------------------- CalcStartBx ---------------------------
        // calc start position from which to start scan to the next word.
        private static int CalcStartBx(
            string Text, WordCursor Word)
        {
            int Bx;

            switch (Word.Position)
            {
            case RelativePosition.Begin:
                Bx = 0;
                break;

            case RelativePosition.Before:
                Bx = Word.ScanBx;
                break;

            case RelativePosition.After:
                if (Word.TextTraits.IsDividerDelim(Word.DelimClass) == true)
                {
                    Bx = Word.ScanEx + 1;
                }
                else if (Word.WordIsDelim == true)
                {
                    Bx = Word.ScanEx + 1;
                }
                else
                {
                    Bx = Word.DelimBx;
                }
                break;

            // the delim of the current word is itself considered a standalone
            // word. ( it is a symbol, an open or close enclosure, ... )
            // position so the next word is the delim itself.
            case RelativePosition.At:
                if (Word.TextTraits.IsDividerDelim(Word.DelimClass) == true)
                {
                    Bx = Word.ScanEx + 1;
                }
                else if (Word.WordIsDelim == true)
                {
                    Bx = Word.ScanEx + 1;
                }
                else
                {
                    Bx = Word.DelimBx;
                }
                break;

            case RelativePosition.End:
                Bx = Text.Length;
                break;

            case RelativePosition.None:
                Bx = -1;
                break;

            default:
                Bx = -1;
                break;
            }

            if (Bx > (Text.Length - 1))
            {
                Bx = -1;
            }

            return(Bx);
        }
Esempio n. 28
0
        // -------------------- IsolateWord ---------------------------
        // We have a word starting at InBx. Scan to the end of the word.
        // Returns the word in the InOutResults parm.
        // Returns the word delim in the return argument.
        private static ScanPatternResults IsolateWord(
            string Text,
            int Bx,
            ref WordCursor Results,
            TextTraits Traits)
        {
            int bx;
            ScanPatternResults spr = null;

            bx = Bx;
            char ch1 = Text[bx];

            // is start of either verbatim string literal or quoted literal.
            if (
                ((Traits.VerbatimLiteralPattern != null) &&
                 (Traits.VerbatimLiteralPattern.Match(Text, bx))) ||
                (Traits.IsQuoteChar(ch1) == true)
                )
            {
                var rv = ScanWord.IsolateQuotedWord(Text, bx, Traits);
                bx = rv.Item1;
                int?               ex       = rv.Item2;
                string             wordText = rv.Item3;
                WordClassification wc       = WordClassification.Quoted;
                var                litType  = rv.Item4;
                spr = rv.Item5;
                Results.SetWord(wordText, wc, bx);
                Results.Word.LiteralType = litType;
            }

            else
            {
                // Scan the string for any of the non word patterns spcfd in Traits.
                DelimClassification sprdc = DelimClassification.None;
                int remLx = Text.Length - bx;
                spr = Scanner.ScanEqualAny(Text, bx, remLx, Traits.NonWordPatterns);
                if (spr.IsNotFound == false)
                {
                    sprdc = spr.FoundPat.DelimClassification;
                }

                // a quote character within the name.  this is an error.
                if (sprdc == DelimClassification.Quote)
                {
                    throw new ApplicationException(
                              "quote character immed follows name character at position " +
                              spr.FoundPos.ToString() + " in " + Text);
                }

                // no delim found. all word to the end of the string.
                else if (spr.IsNotFound)
                {
                    string wordText = Text.Substring(Bx);
                    Results.SetWord(wordText, WordClassification.Identifier, Bx);
                }

                // found an open named brace char
                // Open named braced words are words that combine the word and the braced contents.
                // debateable that this feature is needed and should be retained.
                else if (sprdc == DelimClassification.OpenNamedBraced)
                {
                    Scanner.ScanWord_IsolateWord_Braced(
                        Text, bx, spr, ref Results, Traits);
                }

                // delim is same position as the word.  so either the word is the delim ( an
                // expression symbol ) or the word is empty ( the delim is a comma, semicolon,
                // ... a content divider )
                else if (spr.FoundPos == Bx)
                {
                    if ((Traits.NonDividerIsWord == true) &&
                        (Traits.IsDividerDelim(spr.FoundPat.DelimClassification) == false))
                    {
                        Results.SetWord(
                            spr.FoundPat.PatternValue,
                            spr.FoundPat.DelimClassification.ToWordClassification( ).Value,
                            Bx,
                            spr.FoundPat.LeadChar);
                    }

                    // start of CommentToEnd comment. This is a word, not a delim. Find the
                    // end of the comment and set the delim to that end position.
                    else if (sprdc == DelimClassification.CommentToEnd)
                    {
                        spr = Scanner.ScanWord_IsolateWord_CommentToEnd(
                            Text, spr.FoundPos, ref Results, Traits);
                    }

                    else
                    {
                        Results.SetNullWord();
                    }
                }

                // we have a word that ends with a delim.
                else
                {
                    int    lx       = spr.FoundPos - Bx;
                    string wordText = Text.Substring(Bx, lx);
                    Results.SetWord(wordText, WordClassification.Identifier, Bx);
                }
            }

            // return ScanPatternResults of the delim that ends the word.
            return(spr);
        }
Esempio n. 29
0
        // ------------------------- ParseAddressString ------------------------
        public static EmailAddress ParseAddressString(string InString)
        {
            TextTraits traits;

            traits = new TextTraits( )
                     .SetQuoteEncapsulation(QuoteEncapsulation.Escape);
            traits.DividerPatterns.AddDistinct(
                new string[] { " ", "\t" }, Text.Enums.DelimClassification.DividerSymbol);
            WordCursor bgnFriendly = null;
            WordCursor endFriendly = null;

            EmailAddress results = new EmailAddress( );

            WordCursor csr = Scanner.PositionBeginWord(InString, traits);

            while (true)
            {
                // advance to the next word in the address string.
                csr = Scanner.ScanNextWord(InString, csr);
                if (csr.IsEndOfString)
                {
                    break;
                }

                // the email address itself is <braced>.
                else if ((csr.Word.Class == WordClassification.ContentBraced) &&
                         (csr.Word.BraceChar == '<'))
                {
                    results.Address = csr.Word.BracedText;
                }

                // comment in the email address string.
                else if ((csr.Word.Class == WordClassification.ContentBraced) &&
                         (csr.Word.BraceChar == '('))
                {
                    results.Comment = csr.Word.BracedText;
                    results.Comment =
                        MimeCommon.DecodeHeaderString_EncodedOnly(results.Comment);
                }

                // word part of the friendly name in the address. extend the word range of
                // the friendly string.
                else
                {
                    if (bgnFriendly == null)
                    {
                        bgnFriendly = csr;
                    }
                    endFriendly = csr;
                }
            }

            // working from the word range, isolate the full friendly name string.
            string fullFriendly = null;

            if ((bgnFriendly != null) && (bgnFriendly == endFriendly))
            {
                fullFriendly = bgnFriendly.Word.ToString( );
            }
            else if (bgnFriendly != null)
            {
                int Bx = bgnFriendly.WordBx;
                int Ex = endFriendly.WordEx;
                fullFriendly = InString.Substring(Bx, Ex - Bx + 1);
            }

            // final decode of the friendly name.  name could be quoted, could contain
            // encoded-words.
            if (fullFriendly != null)
            {
                fullFriendly = MimeCommon.DecodeHeaderString_QuotedEncodedEither(fullFriendly);
            }

            // the friendly name could actually be the email address.
            if (results.Address == null)
            {
                results.Address = fullFriendly;
            }
            else
            {
                results.FriendlyName = fullFriendly;
            }

            return(results);
        }
Esempio n. 30
0
        /// <summary>
        /// This is the central method where the cracking of statement text into a hierarchy
        /// of StmtWord(s) takes place.
        /// </summary>
        /// <param name="InStmtText"></param>
        /// <param name="InTraits"></param>
        /// <param name="InParentStart"></param>
        /// <param name="InParentWord"></param>
        /// <returns></returns>
        static WordCursor ParseParent(
            string StmtText, StmtTraits Traits, WordCursor ParentStart,
            StmtWord ParentWord)
        {
            WordCursor csr        = ParentStart;
            StmtWord   parentWord = ParentWord;
            StmtWord   word       = null;

            while (true)
            {
                word = null;

                // get next word in the stmt string.
                csr = Scanner.ScanNextWord(StmtText, csr);

                // end of string. Got nothing.
                if (csr.IsEndOfString == true)
                {
                    break;
                }

                // word is start of a sentence.
                if ((Traits.FormSentencesFromWhitespaceDelimWords == true) &&
                    (parentWord.IsSentence == false))
                {
                    if ((csr.DelimIsWhitespace == true) || (csr.DelimIsOpenBrace == true))
                    {
                        if (csr.WordClassification != WordClassification.CommentToEnd)
                        {
                            word           = new StmtWord(StmtText, parentWord, csr, WordCompositeCode.Sentence);
                            csr.StayAtFlag = true;
                            csr            = ParseParent(StmtText, Traits, csr, word);
//							word.EndCursor.AssignDelimPart(csr);
                        }
                    }
                }

                // Word is braced. Make a composite word, then recursively call this method to
                // parse the contents.
                bool bracedWordWasParsed = false;
                if ((word == null) && (csr.WordIsOpenBrace == true))
                {
                    word = new StmtWord(StmtText, parentWord, csr, WordCompositeCode.Braced);
                    csr  = ParseParent(StmtText, Traits, csr, word);
                    bracedWordWasParsed = true;

                    // this braced word may be the start of a sentence.
                    if ((Traits.FormSentencesFromWhitespaceDelimWords == true) &&
                        (parentWord.IsSentence == false))
                    {
                        if ((csr.DelimIsWhitespace == true) || (csr.DelimIsOpenBrace == true))
                        {
                            StmtWord w2 = new StmtWord(
                                StmtText, parentWord, word.WordCursor, WordCompositeCode.Sentence);
                            word.Parent = w2;
                        }
                    }
                }

                // add the standalone word to the parent word
                if (word == null)
                {
                    // this word might be the whitespace after an EndStmt delim sentence and the
                    // end of the braced parent.  ( ex: return _Name ; } )

                    // todo: draw distinction between skipping the empty word before a close brace
                    //       and the empty word after a comma delim sequence. ex: { a, b, c, }

                    if (csr.IsDelimOnly == false)
                    {
                        word = new StmtWord(StmtText, parentWord, csr, WordCompositeCode.Atom);
                    }
                }

                // is the final word in a sentence.
                // note: a semicolon or comma will end a sentence.
                if (parentWord.CompositeCode == WordCompositeCode.Sentence)
                {
                    if ((csr.DelimIsWhitespace == false) &&
                        (csr.DelimIsAssignmentSymbol == false))
                    {
                        break;
                    }

                    // sentence also ends when word is braced and this braced word is not the
                    // first word in the sentence.  ex: get { return _Name ; }
                    else if ((bracedWordWasParsed == true) && (parentWord.SubWords.Count > 1))
                    {
                        break;
                    }
                }

                // final word in a Braced sequence.
                if (parentWord.CompositeCode == WordCompositeCode.Braced)
                {
                    // the close brace delim is the closing brace of the parent word.
                    // ex: { wd1 wd2 }  the } delim for wd2 applies to the braced word.
                    if ((csr.DelimIsCloseBrace == true) &&
                        ((word == null) || (word.OwnsCloseBracedDelim == false)))
                    {
                        // save the location of the closing brace.
                        parentWord.CloseBracePosition = csr.DelimBx;
                        parentWord.CloseBraceCursor   = csr;

                        // cursor is located at the closing brace. We want the word after the closing
                        // brace to always be a delim only word. In a parent where members are delimed
                        // by comma this is no problem. But in a whitespace sep list, this might not
                        // be the case without a little helpful adjustment.
                        csr            = Scanner.ScanNextWord(StmtText, csr);
                        csr.StayAtFlag = true;

                        if (csr.IsDelimOnly == true)
                        {
                            csr.StayAtFlag = false;
                            parentWord.CloseBraceCursor = csr;
                        }
                        else if ((csr.WordClassification == WordClassification.OpenNamedBraced) ||
                                 (csr.WordClassification == WordClassification.OpenContentBraced))
                        {
                            csr.SetVirtualCursor_WhitespaceOnly(csr.WordBx - 1);
                            csr.StayAtFlag = false;
                        }

                        break;
                    }
                }

                // line break. consider end of first pass processing of the stmt words
                // when the paren level is zero.
                //        if ((csr.DelimClass == DelimClassification.NewLine) && ( word.ParenLevel == 0 ))
                //          break ;
            }

            return(csr);
        }