public void Coordenadas(string path) { try { List <Tuple <string, PdfOrientedRectangle> > WordCoordinates = new List <Tuple <string, PdfOrientedRectangle> >(); using (PdfDocumentProcessor processor = new PdfDocumentProcessor()) { processor.LoadDocument(path); PdfWord currentWord = processor.NextWord(); while (currentWord != null) { for (int i = 0; i < currentWord.Rectangles.Count; i++) { //Retrieve the rectangle encompassing the word var wordRectangle = currentWord.Rectangles[i]; //Add the segment's content and its coordinates to the list WordCoordinates.Add(new Tuple <string, PdfOrientedRectangle>(currentWord.Segments[i].Text, wordRectangle)); } //Switch to the next word currentWord = processor.NextWord(); } } Console.WriteLine(); } catch (Exception ex) { throw new Exception(ex.Message); } }
protected override void processTextPosition(TextPosition text) { try { float textX = text.getXDirAdj(); float textY = text.getYDirAdj(); if (!String.IsNullOrWhiteSpace(text.getCharacter())) { if (pdfWordsByXByY.ContainsKey(textY)) { Dictionary <float, PdfWord> wordsByX = pdfWordsByXByY[textY]; if (wordsByX.ContainsKey(textX)) { PdfWord word = wordsByX[textX]; wordsByX.Remove(word.Right); word.EndCharWidth = text.getWidthDirAdj(); word.Height = text.getHeightDir(); word.EndX = textX; word.Text += text.getCharacter(); if (!wordsByX.Keys.Contains(word.Right)) { wordsByX.Add(word.Right, word); } } else { float requiredX = -1; float minDiff = float.MaxValue; for (int index = 0; index < wordsByX.Keys.Count; index++) { float key = wordsByX.Keys.ElementAt(index); float diff = key - textX; if (diff < 0) { diff = -diff; } if (diff < minDiff) { minDiff = diff; requiredX = key; } } if (requiredX > -1 && minDiff <= 1) { PdfWord word = wordsByX[requiredX]; wordsByX.Remove(requiredX); word.EndCharWidth = text.getWidthDirAdj(); word.Height = text.getHeightDir(); word.EndX = textX; word.Text += text.getCharacter(); if (!wordsByX.ContainsKey(word.Right)) { wordsByX.Add(word.Right, word); } } else { PdfWord word = new PdfWord(); word.Text = text.getCharacter(); word.EndX = word.StartX = textX; word.Y = textY; word.EndCharWidth = word.StartCharWidth = text.getWidthDirAdj(); word.Height = text.getHeightDir(); if (!wordsByX.ContainsKey(word.Right)) { wordsByX.Add(word.Right, word); } pdfWordsByXByY[textY] = wordsByX; } } } else { Dictionary <float, PdfWord> wordsByX = new Dictionary <float, PdfWord>(); PdfWord word = new PdfWord(); word.Text = text.getCharacter(); word.EndX = word.StartX = textX; word.Y = textY; word.EndCharWidth = word.StartCharWidth = text.getWidthDirAdj(); word.Height = text.getHeightDir(); wordsByX.Add(word.Right, word); pdfWordsByXByY.Add(textY, wordsByX); } } } catch (Exception ex) { } }