Exemple #1
0
        public virtual ICollection <IPdfTextLocation> GetResultantLocations()
        {
            // align characters in "logical" order
            JavaCollectionsUtil.Sort(parseResult, new TextChunkLocationBasedComparator(new DefaultTextChunkLocationComparator()));
            // process parse results
            IList <IPdfTextLocation> retval = new List <IPdfTextLocation>();

            CharacterRenderInfo.StringConversionInfo txt = CharacterRenderInfo.MapString(parseResult);
            Match mat = iText.IO.Util.StringUtil.Match(pattern, txt.text);

            while (mat.Success)
            {
                int?startIndex = txt.indexMap.Get(mat.Index);
                int?endIndex   = txt.indexMap.Get(mat.Index + mat.Length);
                foreach (Rectangle r in ToRectangles(parseResult.SubList(startIndex.Value, endIndex.Value)))
                {
                    retval.Add(new DefaultPdfTextLocation(0, r, iText.IO.Util.StringUtil.Group(mat, 0)));
                }
                mat = mat.NextMatch();
            }

            /* sort
             * even though the return type is Collection<Rectangle>, we apply a sorting algorithm here
             * This is to ensure that tests that use this functionality (for instance to generate pdf with
             * areas of interest highlighted) will not break when compared.
             */
            JavaCollectionsUtil.Sort(retval, new _IComparer_54());
            return(retval);
        }
        /// <summary>
        /// This method converts a List<CharacterRenderInfo>
        /// The data structure that gets returned contains both the plaintext,
        /// as well as the mapping of indices (from the list to the string).
        /// </summary>
        /// <remarks>
        /// This method converts a List<CharacterRenderInfo>
        /// The data structure that gets returned contains both the plaintext,
        /// as well as the mapping of indices (from the list to the string).
        /// These indices can differ; if there is sufficient spacing between two CharacterRenderInfo
        /// objects, this algorithm will decide to insert space. The inserted space will cause
        /// the indices to differ by at least 1.
        /// </remarks>
        internal static CharacterRenderInfo.StringConversionInfo MapString(IList <iText.Kernel.Pdf.Canvas.Parser.Listener.CharacterRenderInfo
                                                                                  > cris)
        {
            IDictionary <int, int?> indexMap = new Dictionary <int, int?>();
            StringBuilder           sb       = new StringBuilder();

            iText.Kernel.Pdf.Canvas.Parser.Listener.CharacterRenderInfo lastChunk = null;
            for (int i = 0; i < cris.Count; i++)
            {
                iText.Kernel.Pdf.Canvas.Parser.Listener.CharacterRenderInfo chunk = cris[i];
                if (lastChunk == null)
                {
                    indexMap.Put(sb.Length, i);
                    sb.Append(chunk.GetText());
                }
                else
                {
                    if (chunk.SameLine(lastChunk))
                    {
                        // we only insert a blank space if the trailing character of the previous string wasn't a space, and the leading character of the current string isn't a space
                        if (chunk.GetLocation().IsAtWordBoundary(lastChunk.GetLocation()) && !chunk.GetText().StartsWith(" ") && !
                            chunk.GetText().EndsWith(" "))
                        {
                            sb.Append(' ');
                        }
                        indexMap.Put(sb.Length, i);
                        sb.Append(chunk.GetText());
                    }
                    else
                    {
                        indexMap.Put(sb.Length, i);
                        sb.Append(chunk.GetText());
                    }
                }
                lastChunk = chunk;
            }
            CharacterRenderInfo.StringConversionInfo ret = new CharacterRenderInfo.StringConversionInfo();
            ret.indexMap = indexMap;
            ret.text     = sb.ToString();
            return(ret);
        }