Beispiel #1
0
        public override bool Read()
        {
            PDFTextOp op = null;
            string    style;

            while (this.MoveToNext())
            {
                if (this.InnerReader.NodeType == XmlNodeType.Element)
                {
                    if (this.InnerReader.Name.Equals("br", StringComparison.CurrentCultureIgnoreCase))
                    {
                        op = new PDFTextNewLineOp();
                        break;
                    }
                    else if (this.IsFontStyleOp(this.InnerReader.Name, out style))
                    {
                        op = new PDFTextFontOp(style, true);
                        break;
                    }
                    else if (this.InnerReader.Name.Equals("span", StringComparison.CurrentCultureIgnoreCase))
                    {
                        throw new NotSupportedException("Span is not a currently supported Component");
                    }
                }
                else if (this.InnerReader.NodeType == XmlNodeType.Text)
                {
                    string text = this.StripWhiteSpace(this.InnerReader.Value.Trim(), op != null);
                    op = new PDFTextDrawOp(text);
                    break;
                }
                else if (this.InnerReader.NodeType == XmlNodeType.EndElement)
                {
                    if (this.IsFontStyleOp(this.InnerReader.Name, out style))
                    {
                        op = new PDFTextFontOp(style, false);

                        break;
                    }
                }
            }
            this._op = op;

            return(op != null);
        }
        //
        // public methods
        //


        /// <summary>
        /// Takes the provided text and splits it into a list of
        /// tokens that can be read in sequence to generate the text
        /// </summary>
        /// <param name="text">The full text to be read</param>
        /// <returns></returns>
        public virtual List <PDFTextOp> Parse(string text, bool preserveWhitespace)
        {
            List <PDFTextOp> all = new List <PDFTextOp>();
            int    lastindex     = 0;
            string normalized;
            string value;
            int    index = text.IndexOf('<');
            bool   first = true;

            while (index >= 0)
            {
                if (lastindex < index)
                {
                    //We have some text between the last point and the position of the <, so we need to normalise and add
                    value = text.Substring(lastindex, index - lastindex);
                    if (preserveWhitespace)
                    {
                        string[] lines = WhitespaceText(first, value);
                        for (int i = 0; i < lines.Length; i++)
                        {
                            string txt = lines[i];
                            if (i > 0)
                            {
                                all.Add(new PDFTextNewLineOp());
                            }
                            if (string.IsNullOrEmpty(txt))
                            {
                                all.Add(new PDFTextDrawOp(" "));
                            }
                            else
                            {
                                all.Add(new PDFTextDrawOp(txt));
                            }
                        }
                    }
                    else
                    {
                        normalized = NormalizeText(first, value);

                        if (!string.IsNullOrEmpty(normalized))
                        {
                            all.Add(new PDFTextDrawOp(normalized));
                        }
                    }
                }

                //check the validity of the element
                int endindex = text.IndexOf('>', index);

                if (endindex < 0) //Make sure it is closed
                {
                    throw new PDFXmlFormatException("No closing brace found after oening brace - character #" + index.ToString());
                }

                int nextindex = text.IndexOf('<', index + 1); //and make sure a new element is not opened before
                if (nextindex > 0 && endindex > nextindex)
                {
                    throw new PDFXmlFormatException("No closing brace found after oening brace - character #" + index.ToString());
                }

                value = text.Substring(index + 1, (endindex - index) - 1);

                //We have element content - parse and if valid add
                PDFTextOp ele = ParseElement(value);
                if (ele is PDFTextNewLineOp)
                {
                    first = true;
                }
                else
                {
                    first = false;
                }

                if (null != ele)
                {
                    all.Add(ele);
                }
                lastindex = endindex + 1;

                //check to see if there are some more elements
                if (lastindex < text.Length)
                {
                    index = text.IndexOf('<', lastindex);
                }
                else
                {
                    index = -1;
                }
            }

            //No more elements, so just make sure we cature any trailing text
            if (lastindex < text.Length)
            {
                value = text.Substring(lastindex);
                if (preserveWhitespace)
                {
                    string[] lines = WhitespaceText(first, value);
                    for (int i = 0; i < lines.Length; i++)
                    {
                        if (i > 0)
                        {
                            all.Add(new PDFTextNewLineOp());
                        }
                        string txt = lines[i];
                        if (string.IsNullOrEmpty(txt))
                        {
                            all.Add(new PDFTextDrawOp(" "));
                        }
                        else
                        {
                            all.Add(new PDFTextDrawOp(txt));
                        }
                    }
                }
                else
                {
                    normalized = NormalizeText(first, value);
                    if (!string.IsNullOrEmpty(normalized))
                    {
                        PDFTextDrawOp op = new PDFTextDrawOp(normalized);
                        all.Add(op);
                    }
                }
            }

            return(all);
        }