public Normalizer(RawDocument document, string format) { // Construimos un StyleReader para obtener las regexp. StyleReader style = new StyleReader (format); this.format = format; rules = style.GetRules (); // Si el estilo tiene mas de una columna se rompe y se convierte a una // sola columna. if (style.GetNumColumns () > 1) document.BreakColumns (); EncodeText (document.GetText ()); }
public void GetText() { string rawtext; int count = 0; foreach (PDFPoppler doc in test_docs) { RawDocument rdoc = new RawDocument (doc); rawtext = rdoc.GetText (); Assert.AreEqual (raw_docs[count], rawtext, "GT" + count); count += 1; } }
private void OnOpenActivated(object sender, System.EventArgs e) { OpenPDFDialog dialog = new OpenPDFDialog (); if (dialog.Run () == (int) ResponseType.Ok) { Uri uri = new Uri (dialog.Document); PDFPoppler reader = new PDFPoppler (uri); //Extracting images from document reader.GetNonText (); //Extracting text from document rdocument = reader.CreateRawDocument (); textview.Buffer.Text = rdocument.GetText (); Markup.Sensitive = true; Normalize.Sensitive = true; store.Clear (); // Logger.ClearList (); } dialog.Destroy (); }