/** * Compares based on orientation, perpendicular distance, then parallel distance * @see java.lang.Comparable#compareTo(java.lang.Object) */ public virtual int CompareTo(ITextChunkLocation other) { if (this == other) { return(0); // not really needed, but just in case } int rslt; rslt = CompareInts(OrientationMagnitude, other.OrientationMagnitude); if (rslt != 0) { return(rslt); } rslt = CompareInts(DistPerpendicular, other.DistPerpendicular); if (rslt != 0) { return(rslt); } // note: it's never safe to check floating point numbers for equality, and if two chunks // are truly right on top of each other, which one comes first or second just doesn't matter // so we arbitrarily choose this way. rslt = DistParallelStart < other.DistParallelStart ? -1 : 1; return(rslt); }
//Override for GetResultantText public static String GetResultantText(this LocationTextExtractionStrategy strategy, Rectangle rect) { //Get chunks of text from extraction strategy IList <TextChunk> locationalResult = (IList <TextChunk>)locationalResultField.GetValue(strategy); //Make container list to store chunks that do not match List <TextChunk> nonMatching = new List <TextChunk>(); //For reach chunk in extraction strategy foreach (TextChunk chunk in locationalResult) { //Get chunk location ITextChunkLocation location = chunk.GetLocation(); //Make start and end vectors Vector start = location.GetStartLocation(); Vector end = location.GetEndLocation(); //check if asked rectangle is NOT intersecting current chunk of text if (!rect.IntersectsLine(start.Get(Vector.I1), start.Get(Vector.I2), end.Get(Vector.I1), end.Get(Vector.I2))) { //if rectangle is not containing this chunk add to nonMatching nonMatching.Add(chunk); } } //For each element in nonMatching remove from locationalResult collection nonMatching.ForEach(c => locationalResult.Remove(c)); try { //Try returning value if something remain return(strategy.GetResultantText()); } finally { //Return everything from non matching to localResult if there is no return value. nonMatching.ForEach(c => locationalResult.Add(c)); } }
public static String GetResultantText(this LocationTextExtractionStrategy strategy, Rectangle rect) { IList <TextChunk> locationalResult = (IList <TextChunk>)locationalResultField.GetValue(strategy); List <TextChunk> nonMatching = new List <TextChunk>(); foreach (TextChunk chunk in locationalResult) { ITextChunkLocation location = chunk.GetLocation(); Vector start = location.GetStartLocation(); Vector end = location.GetEndLocation(); if (!rect.IntersectsLine(start.Get(Vector.I1), start.Get(Vector.I2), end.Get(Vector.I1), end.Get(Vector.I2))) { nonMatching.Add(chunk); } } nonMatching.ForEach(c => locationalResult.Remove(c)); try { return(strategy.GetResultantText()); } finally { nonMatching.ForEach(c => locationalResult.Add(c)); } }
protected override bool IsChunkAtWordBoundary(TextChunk chunk, TextChunk previousChunk) { ITextChunkLocation curLoc = chunk.GetLocation(); ITextChunkLocation prevLoc = previousChunk.GetLocation(); if (curLoc.GetStartLocation().Equals(curLoc.GetEndLocation()) || prevLoc.GetEndLocation().Equals(prevLoc.GetStartLocation ())) { return(false); } return(curLoc.DistParallelEnd() - prevLoc.DistParallelStart() > (curLoc.GetCharSpaceWidth() + prevLoc.GetCharSpaceWidth ()) / 2.0f); }
public virtual bool IsAtWordBoundary(ITextChunkLocation previous) { float dist = DistanceFromEndOf(previous); if (dist < 0) { dist = previous.DistanceFromEndOf(this); //The situation when the chunks intersect. We don't need to add space in this case if (dist < 0) { return(false); } } return(dist > CharSpaceWidth / 2.0f); }
public virtual bool SameLine(ITextChunkLocation @as) { if (OrientationMagnitude() != @as.OrientationMagnitude()) { return(false); } int distPerpendicularDiff = DistPerpendicular() - @as.DistPerpendicular(); if (Math.Abs(distPerpendicularDiff) < 2) { return(true); } LineSegment mySegment = new LineSegment(startLocation, endLocation); LineSegment otherSegment = new LineSegment(@as.GetStartLocation(), @as.GetEndLocation()); return(Math.Abs(distPerpendicularDiff) <= DIACRITICAL_MARKS_ALLOWED_VERTICAL_DEVIATION && (mySegment.GetLength() == 0 || otherSegment.GetLength() == 0)); }
public virtual bool IsAtWordBoundary(ITextChunkLocation previous) { if (startLocation.Equals(endLocation) || previous.GetEndLocation().Equals(previous.GetStartLocation())) { return(false); } float dist = DistanceFromEndOf(previous); if (dist < 0) { dist = previous.DistanceFromEndOf(this); //The situation when the chunks intersect. We don't need to add space in this case if (dist < 0) { return(false); } } return(dist > GetCharSpaceWidth() / 2.0f); }
public virtual bool IsAtWordBoundary(ITextChunkLocation previous) { /** * Here we handle a very specific case which in PDF may look like: * -.232 Tc [( P)-226.2(r)-231.8(e)-230.8(f)-238(a)-238.9(c)-228.9(e)]TJ * The font's charSpace width is 0.232 and it's compensated with charSpacing of 0.232. * And a resultant TextChunk.charSpaceWidth comes to TextChunk constructor as 0. * In this case every chunk is considered as a word boundary and space is added. * We should consider charSpaceWidth equal (or close) to zero as a no-space. */ if (CharSpaceWidth < 0.1f) { return(false); } float dist = DistanceFromEndOf(previous); return(dist < -CharSpaceWidth || dist > CharSpaceWidth / 2.0f); }
public virtual bool IsAtWordBoundary(ITextChunkLocation previous) { // In case a text chunk is of zero length, this probably means this is a mark character, // and we do not actually want to insert a space in such case if (startLocation.Equals(endLocation) || previous.GetEndLocation().Equals(previous.GetStartLocation())) { return(false); } float dist = DistanceFromEndOf(previous); if (dist < 0) { dist = previous.DistanceFromEndOf(this); //The situation when the chunks intersect. We don't need to add space in this case if (dist < 0) { return(false); } } return(dist > GetCharSpaceWidth() / 2.0f); }
public virtual bool IsAtWordBoundary(ITextChunkLocation previous) { /* * Here we handle a very specific case which in PDF may look like: * -.232 Tc [( P)-226.2(r)-231.8(e)-230.8(f)-238(a)-238.9(c)-228.9(e)]TJ * The font's charSpace width is 0.232 and it's compensated with charSpacing of 0.232. * And a resultant TextChunk.charSpaceWidth comes to TextChunk constructor as 0. * In this case every chunk is considered as a word boundary and space is added. * We should consider charSpaceWidth equal (or close) to zero as a no-space. */ if (GetCharSpaceWidth() < 0.1f) { return(false); } // In case a text chunk is of zero length, this probably means this is a mark character, // and we do not actually want to insert a space in such case if (startLocation.Equals(endLocation) || previous.GetEndLocation().Equals(previous.GetStartLocation())) { return(false); } float dist = DistanceFromEndOf(previous); return(dist < -GetCharSpaceWidth() || dist > GetCharSpaceWidth() / 2.0f); }
public TextChunk(String str, ITextChunkLocation location) { this.text = str; this.location = location; }
public virtual float DistanceFromEndOf(ITextChunkLocation other) { float distance = DistParallelStart - other.DistParallelEnd; return(distance); }
public virtual bool SameLine(ITextChunkLocation other) { return(OrientationMagnitude == other.OrientationMagnitude && DistPerpendicular == other.DistPerpendicular); }
internal static bool ContainsMark(ITextChunkLocation baseLocation, ITextChunkLocation markLocation) { return(baseLocation.GetStartLocation().Get(Vector.I1) <= markLocation.GetStartLocation().Get(Vector.I1) && baseLocation.GetEndLocation().Get(Vector.I1) >= markLocation.GetEndLocation().Get(Vector.I1) && Math. Abs(baseLocation.DistPerpendicular() - markLocation.DistPerpendicular()) <= DIACRITICAL_MARKS_ALLOWED_VERTICAL_DEVIATION); }
/// <summary> /// Computes the distance between the end of 'other' and the beginning of this chunk /// in the direction of this chunk's orientation vector. /// </summary> /// <remarks> /// Computes the distance between the end of 'other' and the beginning of this chunk /// in the direction of this chunk's orientation vector. Note that it's a bad idea /// to call this for chunks that aren't on the same line and orientation, but we don't /// explicitly check for that condition for performance reasons. /// </remarks> /// <param name="other"/> /// <returns>the number of spaces between the end of 'other' and the beginning of this chunk</returns> public virtual float DistanceFromEndOf(ITextChunkLocation other) { return(DistParallelStart() - other.DistParallelEnd()); }
private void SortWithMarks(IList <TextChunk> textChunks) { IDictionary <TextChunk, LocationTextExtractionStrategy.TextChunkMarks> marks = new Dictionary <TextChunk, LocationTextExtractionStrategy.TextChunkMarks >(); IList <TextChunk> toSort = new List <TextChunk>(); for (int markInd = 0; markInd < textChunks.Count; markInd++) { ITextChunkLocation location = textChunks[markInd].GetLocation(); if (location.GetStartLocation().Equals(location.GetEndLocation())) { bool foundBaseToAttachTo = false; for (int baseInd = 0; baseInd < textChunks.Count; baseInd++) { if (markInd != baseInd) { ITextChunkLocation baseLocation = textChunks[baseInd].GetLocation(); if (!baseLocation.GetStartLocation().Equals(baseLocation.GetEndLocation()) && TextChunkLocationDefaultImp. ContainsMark(baseLocation, location)) { LocationTextExtractionStrategy.TextChunkMarks currentMarks = marks.Get(textChunks[baseInd]); if (currentMarks == null) { currentMarks = new LocationTextExtractionStrategy.TextChunkMarks(); marks.Put(textChunks[baseInd], currentMarks); } if (markInd < baseInd) { currentMarks.preceding.Add(textChunks[markInd]); } else { currentMarks.succeeding.Add(textChunks[markInd]); } foundBaseToAttachTo = true; break; } } } if (!foundBaseToAttachTo) { toSort.Add(textChunks[markInd]); } } else { toSort.Add(textChunks[markInd]); } } JavaCollectionsUtil.Sort(toSort, new TextChunkLocationBasedComparator(new DefaultTextChunkLocationComparator (!rightToLeftRunDirection))); textChunks.Clear(); foreach (TextChunk current in toSort) { LocationTextExtractionStrategy.TextChunkMarks currentMarks = marks.Get(current); if (currentMarks != null) { if (!rightToLeftRunDirection) { for (int j = 0; j < currentMarks.preceding.Count; j++) { textChunks.Add(currentMarks.preceding[j]); } } else { for (int j = currentMarks.succeeding.Count - 1; j >= 0; j--) { textChunks.Add(currentMarks.succeeding[j]); } } } textChunks.Add(current); if (currentMarks != null) { if (!rightToLeftRunDirection) { for (int j = 0; j < currentMarks.succeeding.Count; j++) { textChunks.Add(currentMarks.succeeding[j]); } } else { for (int j = currentMarks.preceding.Count - 1; j >= 0; j--) { textChunks.Add(currentMarks.preceding[j]); } } } } }
public TextChunk(String @string, ITextChunkLocation loc) { this.text = @string; this.location = loc; }