public override bool Read() { PDFTextOp op = null; string style; while (this.MoveToNext()) { if (this.InnerReader.NodeType == XmlNodeType.Element) { if (this.InnerReader.Name.Equals("br", StringComparison.CurrentCultureIgnoreCase)) { op = new PDFTextNewLineOp(); break; } else if (this.IsFontStyleOp(this.InnerReader.Name, out style)) { op = new PDFTextFontOp(style, true); break; } else if (this.InnerReader.Name.Equals("span", StringComparison.CurrentCultureIgnoreCase)) { throw new NotSupportedException("Span is not a currently supported Component"); } } else if (this.InnerReader.NodeType == XmlNodeType.Text) { string text = this.StripWhiteSpace(this.InnerReader.Value.Trim(), op != null); op = new PDFTextDrawOp(text); break; } else if (this.InnerReader.NodeType == XmlNodeType.EndElement) { if (this.IsFontStyleOp(this.InnerReader.Name, out style)) { op = new PDFTextFontOp(style, false); break; } } } this._op = op; return(op != null); }
// // public methods // /// <summary> /// Takes the provided text and splits it into a list of /// tokens that can be read in sequence to generate the text /// </summary> /// <param name="text">The full text to be read</param> /// <returns></returns> public virtual List <PDFTextOp> Parse(string text, bool preserveWhitespace) { List <PDFTextOp> all = new List <PDFTextOp>(); int lastindex = 0; string normalized; string value; int index = text.IndexOf('<'); bool first = true; while (index >= 0) { if (lastindex < index) { //We have some text between the last point and the position of the <, so we need to normalise and add value = text.Substring(lastindex, index - lastindex); if (preserveWhitespace) { string[] lines = WhitespaceText(first, value); for (int i = 0; i < lines.Length; i++) { string txt = lines[i]; if (i > 0) { all.Add(new PDFTextNewLineOp()); } if (string.IsNullOrEmpty(txt)) { all.Add(new PDFTextDrawOp(" ")); } else { all.Add(new PDFTextDrawOp(txt)); } } } else { normalized = NormalizeText(first, value); if (!string.IsNullOrEmpty(normalized)) { all.Add(new PDFTextDrawOp(normalized)); } } } //check the validity of the element int endindex = text.IndexOf('>', index); if (endindex < 0) //Make sure it is closed { throw new PDFXmlFormatException("No closing brace found after oening brace - character #" + index.ToString()); } int nextindex = text.IndexOf('<', index + 1); //and make sure a new element is not opened before if (nextindex > 0 && endindex > nextindex) { throw new PDFXmlFormatException("No closing brace found after oening brace - character #" + index.ToString()); } value = text.Substring(index + 1, (endindex - index) - 1); //We have element content - parse and if valid add PDFTextOp ele = ParseElement(value); if (ele is PDFTextNewLineOp) { first = true; } else { first = false; } if (null != ele) { all.Add(ele); } lastindex = endindex + 1; //check to see if there are some more elements if (lastindex < text.Length) { index = text.IndexOf('<', lastindex); } else { index = -1; } } //No more elements, so just make sure we cature any trailing text if (lastindex < text.Length) { value = text.Substring(lastindex); if (preserveWhitespace) { string[] lines = WhitespaceText(first, value); for (int i = 0; i < lines.Length; i++) { if (i > 0) { all.Add(new PDFTextNewLineOp()); } string txt = lines[i]; if (string.IsNullOrEmpty(txt)) { all.Add(new PDFTextDrawOp(" ")); } else { all.Add(new PDFTextDrawOp(txt)); } } } else { normalized = NormalizeText(first, value); if (!string.IsNullOrEmpty(normalized)) { PDFTextDrawOp op = new PDFTextDrawOp(normalized); all.Add(op); } } } return(all); }