/** * Determines if a space character should be inserted between a previous chunk and the current chunk. * This method is exposed as a callback so subclasses can fine time the algorithm for determining whether a space should be inserted or not. * By default, this method will insert a space if the there is a gap of more than half the font space character width between the end of the * previous chunk and the beginning of the current chunk. It will also indicate that a space is needed if the starting point of the new chunk * appears *before* the end of the previous chunk (i.e. overlapping text). * @param chunk the new chunk being evaluated * @param previousChunk the chunk that appeared immediately before the current chunk * @return true if the two chunks represent different words (i.e. should have a space between them). False otherwise. */ virtual protected bool IsChunkAtWordBoundary(TextChunk chunk, TextChunk previousChunk) { float dist = chunk.DistanceFromEndOf(previousChunk); if (dist < -chunk.CharSpaceWidth || dist > chunk.CharSpaceWidth / 2.0f) { return(true); } return(false); }
/** * Determines if a space character should be inserted between a previous chunk and the current chunk. * This method is exposed as a callback so subclasses can fine time the algorithm for determining whether a space should be inserted or not. * By default, this method will insert a space if the there is a gap of more than half the font space character width between the end of the * previous chunk and the beginning of the current chunk. It will also indicate that a space is needed if the starting point of the new chunk * appears *before* the end of the previous chunk (i.e. overlapping text). * @param chunk the new chunk being evaluated * @param previousChunk the chunk that appeared immediately before the current chunk * @return true if the two chunks represent different words (i.e. should have a space between them). False otherwise. */ virtual protected bool IsChunkAtWordBoundary(TextChunk chunk, TextChunk previousChunk) { /** * Here we handle a very specific case which in PDF may look like: * -.232 Tc [( P)-226.2(r)-231.8(e)-230.8(f)-238(a)-238.9(c)-228.9(e)]TJ * The font's charSpace width is 0.232 and it's compensated with charSpacing of 0.232. * And a resultant TextChunk.charSpaceWidth comes to TextChunk constructor as 0. * In this case every chunk is considered as a word boundary and space is added. * We should consider charSpaceWidth equal (or close) to zero as a no-space. */ if (chunk.CharSpaceWidth < 0.1f) { return(false); } float dist = chunk.DistanceFromEndOf(previousChunk); if (dist < -chunk.CharSpaceWidth || dist > chunk.CharSpaceWidth / 2.0f) { return(true); } return(false); }
/** * Returns the result so far. * @return a String with the resulting text. * @lineposition (input string parameter) : * "first" - identifies first line in line-by-line mark processing; * "last" - identifies last line in line-by-line mark processing; * "" - middle lines in line-by-line mark processing. */ public virtual String GetResultantText(string lineposition) { if (DUMP_STATE) { DumpState(); } locationalResult.Sort(); StringBuilder sb = new StringBuilder(); TextChunk lastChunk = null; for (int i = 0; i < locationalResult.Count; i++) { TextChunk chunk = locationalResult[i]; if (lastChunk == null) { //if first chunk in first line contains alien symbols befere space - remove them if (chunk.text.Contains(" ") && lineposition == "first") { int spacepos = chunk.text.IndexOf(" "); chunk.text = chunk.text.Substring(spacepos + 1); } sb.Append(chunk.text); } else { if (chunk.SameLine(lastChunk)) { float dist = chunk.DistanceFromEndOf(lastChunk); if (dist < -chunk.charSpaceWidth) { sb.Append(' '); } // we only insert a blank space if the trailing character of the previous string wasn't a space, and the leading character of the current string isn't a space else if (dist > chunk.charSpaceWidth / 2.0f && !StartsWithSpace(chunk.text) && !EndsWithSpace(lastChunk.text)) { sb.Append(' '); } // if last chunk in last line contains alien symbols after space - remove them if (i == locationalResult.Count - 1 && lineposition == "last") { if (chunk.text.Contains(" ")) { int spaceposend = chunk.text.IndexOf(" "); chunk.text = chunk.text.Remove(spaceposend); } } sb.Append(chunk.text); //add space in last chunk in not last line and if last chink not ends with space itself. if (i == locationalResult.Count - 1 && lineposition != "last" && !EndsWithSpace(chunk.text)) { sb.Append(' '); } } else { sb.Append('\n'); sb.Append(chunk.text); } } lastChunk = chunk; } return(sb.ToString()); }