public virtual void EventOccurred(IEventData data, EventType type) { if (type.Equals(EventType.RENDER_TEXT)) { TextRenderInfo renderInfo = (TextRenderInfo)data; LineSegment segment = renderInfo.GetBaseline(); if (renderInfo.GetRise() != 0) { // remove the rise from the baseline - we do this because the text from a super/subscript render operations should probably be considered as part of the baseline of the text the super/sub is relative to Matrix riseOffsetTransform = new Matrix(0, -renderInfo.GetRise()); segment = segment.TransformBy(riseOffsetTransform); } if (useActualText) { CanvasTag lastTagWithActualText = lastTextRenderInfo != null?FindLastTagWithActualText(lastTextRenderInfo .GetCanvasTagHierarchy()) : null; if (lastTagWithActualText != null && lastTagWithActualText == FindLastTagWithActualText(renderInfo.GetCanvasTagHierarchy ())) { // Merge two text pieces, assume they will be in the same line LocationTextExtractionStrategy.TextChunk lastTextChunk = locationalResult[locationalResult.Count - 1]; Vector mergedStart = new Vector(Math.Min(lastTextChunk.GetLocation().GetStartLocation().Get(0), segment.GetStartPoint ().Get(0)), Math.Min(lastTextChunk.GetLocation().GetStartLocation().Get(1), segment.GetStartPoint().Get (1)), Math.Min(lastTextChunk.GetLocation().GetStartLocation().Get(2), segment.GetStartPoint().Get(2))); Vector mergedEnd = new Vector(Math.Max(lastTextChunk.GetLocation().GetEndLocation().Get(0), segment.GetEndPoint ().Get(0)), Math.Max(lastTextChunk.GetLocation().GetEndLocation().Get(1), segment.GetEndPoint().Get(1) ), Math.Max(lastTextChunk.GetLocation().GetEndLocation().Get(2), segment.GetEndPoint().Get(2))); LocationTextExtractionStrategy.TextChunk merged = new LocationTextExtractionStrategy.TextChunk(lastTextChunk .GetText(), tclStrat.CreateLocation(renderInfo, new LineSegment(mergedStart, mergedEnd))); locationalResult[locationalResult.Count - 1] = merged; } else { String actualText = renderInfo.GetActualText(); LocationTextExtractionStrategy.TextChunk tc = new LocationTextExtractionStrategy.TextChunk(actualText != null ? actualText : renderInfo.GetText(), tclStrat.CreateLocation(renderInfo, segment)); locationalResult.Add(tc); } } else { LocationTextExtractionStrategy.TextChunk tc = new LocationTextExtractionStrategy.TextChunk(renderInfo.GetText (), tclStrat.CreateLocation(renderInfo, segment)); locationalResult.Add(tc); } lastTextRenderInfo = renderInfo; } }
public virtual String GetResultantText() { if (DUMP_STATE) { DumpState(); } IList <LocationTextExtractionStrategy.TextChunk> textChunks = locationalResult; JavaCollectionsUtil.Sort(textChunks); StringBuilder sb = new StringBuilder(); LocationTextExtractionStrategy.TextChunk lastChunk = null; foreach (LocationTextExtractionStrategy.TextChunk chunk in textChunks) { if (lastChunk == null) { sb.Append(chunk.text); } else { if (chunk.SameLine(lastChunk)) { // we only insert a blank space if the trailing character of the previous string wasn't a space, and the leading character of the current string isn't a space if (IsChunkAtWordBoundary(chunk, lastChunk) && !StartsWithSpace(chunk.text) && !EndsWithSpace(lastChunk.text )) { sb.Append(' '); } sb.Append(chunk.text); } else { sb.Append('\n'); sb.Append(chunk.text); } } lastChunk = chunk; } return(sb.ToString()); }
/// <summary>Determines if a space character should be inserted between a previous chunk and the current chunk. /// </summary> /// <remarks> /// Determines if a space character should be inserted between a previous chunk and the current chunk. /// This method is exposed as a callback so subclasses can fine time the algorithm for determining whether a space should be inserted or not. /// By default, this method will insert a space if the there is a gap of more than half the font space character width between the end of the /// previous chunk and the beginning of the current chunk. It will also indicate that a space is needed if the starting point of the new chunk /// appears *before* the end of the previous chunk (i.e. overlapping text). /// </remarks> /// <param name="chunk">the new chunk being evaluated</param> /// <param name="previousChunk">the chunk that appeared immediately before the current chunk</param> /// <returns>true if the two chunks represent different words (i.e. should have a space between them). False otherwise. /// </returns> protected internal virtual bool IsChunkAtWordBoundary(LocationTextExtractionStrategy.TextChunk chunk, LocationTextExtractionStrategy.TextChunk previousChunk) { return(chunk.GetLocation().IsAtWordBoundary(previousChunk.GetLocation())); }