static void Search() { //open document Document pdfDocument = new Document(Config.TestPdf); string keyword = "pattern"; //create TextAbsorber object to find all instances of the input search phrase TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber(keyword); //accept the absorber for all the pages pdfDocument.Pages.Accept(textFragmentAbsorber); //get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; //loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { Console.WriteLine("Page Number : {0} ", textFragment.Page.Number); //Console.WriteLine("Text : {0} ", textFragment.Text); //Console.WriteLine("Position : {0} ", textFragment.Position); //Console.WriteLine("XIndent : {0} ", textFragment.Position.XIndent); //Console.WriteLine("YIndent : {0} ", textFragment.Position.YIndent); //Console.WriteLine("Font - Name : {0}", textFragment.TextState.Font.FontName); //Console.WriteLine("Font - IsAccessible : {0} ", textFragment.TextState.Font.IsAccessible); //Console.WriteLine("Font - IsEmbedded : {0} ", textFragment.TextState.Font.IsEmbedded); //Console.WriteLine("Font - IsSubset : {0} ", textFragment.TextState.Font.IsSubset); //Console.WriteLine("Font Size : {0} ", textFragment.TextState.FontSize); //Console.WriteLine("Foreground Color : {0} ", textFragment.TextState.ForegroundColor); } }
public static void Run() { // ExStart:ReplaceFirstOccurrence // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); // Open document Document pdfDocument = new Document(dataDir + "ReplaceTextPage.pdf"); // Create TextAbsorber object to find all instances of the input search phrase TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("text"); // Accept the absorber for all the pages pdfDocument.Pages.Accept(textFragmentAbsorber); // Get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; if (textFragmentCollection.Count > 0) { // Get first occurance of text and replace TextFragment textFragment = textFragmentCollection[1]; // Update text and other properties textFragment.Text = "New Phrase"; textFragment.TextState.Font = FontRepository.FindFont("Verdana"); textFragment.TextState.FontSize = 22; textFragment.TextState.ForegroundColor = Aspose.Pdf.Color.FromRgb(System.Drawing.Color.Blue); dataDir = dataDir + "ReplaceFirstOccurrence_out.pdf"; pdfDocument.Save(dataDir); Console.WriteLine("\nText replaced successfully.\nFile saved at " + dataDir); } // ExEnd:ReplaceFirstOccurrence }
public static void Run() { // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); //open document Document pdfDocument = new Document(dataDir + "ReplaceTextPage.pdf"); //create TextAbsorber object to find all instances of the input search phrase TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("text"); //accept the absorber for a particular page pdfDocument.Pages[2].Accept(textFragmentAbsorber); //get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; //loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { //update text and other properties textFragment.Text = "New Phrase"; textFragment.TextState.Font = FontRepository.FindFont("Verdana"); textFragment.TextState.FontSize = 22; textFragment.TextState.ForegroundColor = Aspose.Pdf.Color.FromRgb(System.Drawing.Color.Blue); textFragment.TextState.BackgroundColor = Aspose.Pdf.Color.FromRgb(System.Drawing.Color.Green); } pdfDocument.Save(dataDir + "ReplaceTextPage_out.pdf"); }
public static void Run() { // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); // Open document Document pdfDocument = new Document(dataDir + "ReplaceTextPage.pdf"); // Create TextAbsorber object to find all instances of the input search phrase TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("text"); // ExStart: SearchInsideText // Accept the absorber for a particular page pdfDocument.Pages[2].Accept(textFragmentAbsorber); // ExEnd: SearchInsideText // Get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; // Loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { // Update text and other properties textFragment.Text = "New Phrase"; textFragment.TextState.Font = FontRepository.FindFont("Verdana"); textFragment.TextState.FontSize = 22; textFragment.TextState.ForegroundColor = Aspose.Pdf.Color.FromRgb(System.Drawing.Color.Blue); textFragment.TextState.BackgroundColor = Aspose.Pdf.Color.FromRgb(System.Drawing.Color.Green); } pdfDocument.Save(dataDir + "ReplaceTextPage_out.pdf"); }
public void LoadPdf() { //ExStart //ExFor:Document.#ctor(String) //ExSummary:Shows how to load a PDF. Aspose.Words.Document doc = new Aspose.Words.Document(); DocumentBuilder builder = new DocumentBuilder(doc); builder.Write("Hello world!"); doc.Save(ArtifactsDir + "PDF2Word.LoadPdf.pdf"); // Below are two ways of loading PDF documents using Aspose products. // 1 - Load as an Aspose.Words document: Aspose.Words.Document asposeWordsDoc = new Aspose.Words.Document(ArtifactsDir + "PDF2Word.LoadPdf.pdf"); Assert.AreEqual("Hello world!", asposeWordsDoc.GetText().Trim()); // 2 - Load as an Aspose.Pdf document: Aspose.Pdf.Document asposePdfDoc = new Aspose.Pdf.Document(ArtifactsDir + "PDF2Word.LoadPdf.pdf"); TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber(); asposePdfDoc.Pages.Accept(textFragmentAbsorber); Assert.AreEqual("Hello world!", textFragmentAbsorber.Text.Trim()); //ExEnd }
public static void Run() { // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); //open document Document pdfDocument = new Document(dataDir + "SearchAndGetTextAll.pdf"); //create TextAbsorber object to find all instances of the input search phrase TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("text"); //accept the absorber for all the pages pdfDocument.Pages.Accept(textFragmentAbsorber); //get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; //loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { Console.WriteLine("Text : {0} ", textFragment.Text); Console.WriteLine("Position : {0} ", textFragment.Position); Console.WriteLine("XIndent : {0} ", textFragment.Position.XIndent); Console.WriteLine("YIndent : {0} ", textFragment.Position.YIndent); Console.WriteLine("Font - Name : {0}", textFragment.TextState.Font.FontName); Console.WriteLine("Font - IsAccessible : {0} ", textFragment.TextState.Font.IsAccessible); Console.WriteLine("Font - IsEmbedded : {0} ", textFragment.TextState.Font.IsEmbedded); Console.WriteLine("Font - IsSubset : {0} ", textFragment.TextState.Font.IsSubset); Console.WriteLine("Font Size : {0} ", textFragment.TextState.FontSize); Console.WriteLine("Foreground Color : {0} ", textFragment.TextState.ForegroundColor); } }
public static void Run() { //ExStart: SearchTextWithDotNetRegex string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); // Create Regex object to find all words System.Text.RegularExpressions.Regex regex = new System.Text.RegularExpressions.Regex(@"[\S]+"); // Open document Aspose.Pdf.Document document = new Aspose.Pdf.Document(dataDir + "SearchTextRegex.pdf"); // Get a particular page Page page = document.Pages[1]; // Create TextAbsorber object to find all instances of the input regex TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber(regex); textFragmentAbsorber.TextSearchOptions.IsRegularExpressionUsed = true; // Accept the absorber for the page page.Accept(textFragmentAbsorber); // Get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; // Loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { Console.WriteLine(textFragment.Text); } //ExEnd: SearchTextWithDotNetRegex }
static void SearchWithRegularExpression() { //open document Document pdfDocument = new Document(Config.TestPdf); //create TextAbsorber object to find all the phrases matching the regular expression TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("\\d{4}-\\d{4}"); //like 1999-2000 //###set text search option to specify regular expression usage TextSearchOptions textSearchOptions = new TextSearchOptions(true); textFragmentAbsorber.TextSearchOptions = textSearchOptions; //accept the absorber for all the pages pdfDocument.Pages.Accept(textFragmentAbsorber); //get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; //loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { Console.WriteLine("Text : {0} ", textFragment.Text); //Console.WriteLine("Position : {0} ", textFragment.Position); //Console.WriteLine("XIndent : {0} ", textFragment.Position.XIndent); //Console.WriteLine("YIndent : {0} ", textFragment.Position.YIndent); //Console.WriteLine("Font - Name : {0}", textFragment.TextState.Font.FontName); //Console.WriteLine("Font - IsAccessible : {0} ", textFragment.TextState.Font.IsAccessible); //Console.WriteLine("Font - IsEmbedded : {0} ", textFragment.TextState.Font.IsEmbedded); //Console.WriteLine("Font - IsSubset : {0} ", textFragment.TextState.Font.IsSubset); //Console.WriteLine("Font Size : {0} ", textFragment.TextState.FontSize); //Console.WriteLine("Foreground Color : {0} ", textFragment.TextState.ForegroundColor); } }
static void Search() { //open document Document pdfDocument = new Document(Config.TestPdf); string keyword = "pattern"; //create TextAbsorber object to find all instances of the input search phrase TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber(keyword); //accept the absorber for all the pages pdfDocument.Pages.Accept(textFragmentAbsorber); //get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; //loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { Console.WriteLine("Page Number : {0} ", textFragment.Page.Number); //Console.WriteLine("Text : {0} ", textFragment.Text); //Console.WriteLine("Position : {0} ", textFragment.Position); //Console.WriteLine("XIndent : {0} ", textFragment.Position.XIndent); //Console.WriteLine("YIndent : {0} ", textFragment.Position.YIndent); //Console.WriteLine("Font - Name : {0}", textFragment.TextState.Font.FontName); //Console.WriteLine("Font - IsAccessible : {0} ", textFragment.TextState.Font.IsAccessible); //Console.WriteLine("Font - IsEmbedded : {0} ", textFragment.TextState.Font.IsEmbedded); //Console.WriteLine("Font - IsSubset : {0} ", textFragment.TextState.Font.IsSubset); //Console.WriteLine("Font Size : {0} ", textFragment.TextState.FontSize); //Console.WriteLine("Foreground Color : {0} ", textFragment.TextState.ForegroundColor); } }
public void WithoutUpdateFields() { //ExStart //ExFor:SaveOptions.UpdateFields //ExSummary:Shows how to update fields before saving into a PDF document. Document doc = DocumentHelper.CreateDocumentFillWithDummyText(); PdfSaveOptions pdfSaveOptions = new PdfSaveOptions { UpdateFields = false }; doc.Save(MyDir + @"\Artifacts\UpdateFields_False.pdf", pdfSaveOptions); //ExEnd Aspose.Pdf.Document pdfDocument = new Aspose.Pdf.Document(MyDir + @"\Artifacts\UpdateFields_False.pdf"); // Get text fragment by search String TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("Page of"); pdfDocument.Pages.Accept(textFragmentAbsorber); // Assert that fields are not updated Assert.AreEqual("Page of", textFragmentAbsorber.TextFragments[1].Text); }
private void btnExtractTitles_Click(object sender, EventArgs e) { try { if (!String.IsNullOrEmpty(txtLicense.Text) && !String.IsNullOrEmpty(txtPDF.Text)) { Document doc = new Document(txtPDF.Text); foreach (Page page in doc.Pages) { TextFragmentAbsorber absorber = new TextFragmentAbsorber(); page.Accept(absorber); if (lstTitles.Items.Contains(absorber.TextFragments[1].Text)) { continue; } lstTitles.Items.Add(absorber.TextFragments[1].Text); page.Dispose(); } } else { MessageBox.Show("Please select License and PDF file first.", "No File Selected"); } } catch (Exception ex) { MessageBox.Show(ex.Message, "Error"); } }
public static void Run() { try { // ExStart:RemoveUnusedFonts // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); // Load source PDF file Document doc = new Document(dataDir + "ReplaceTextPage.pdf"); TextFragmentAbsorber absorber = new TextFragmentAbsorber(new TextEditOptions(TextEditOptions.FontReplace.RemoveUnusedFonts)); doc.Pages.Accept(absorber); // Iterate through all the TextFragments foreach (TextFragment textFragment in absorber.TextFragments) { textFragment.TextState.Font = FontRepository.FindFont("Arial, Bold"); } dataDir = dataDir + "RemoveUnusedFonts_out_.pdf"; // Save updated document doc.Save(dataDir); // ExEnd:RemoveUnusedFonts Console.WriteLine("\nUnused fonts removed successfully from pdf document.\nFile saved at " + dataDir); } catch (Exception ex) { Console.WriteLine(ex.Message + "\nThis example will only work if you apply a valid Aspose License. You can purchase full license or get 30 day temporary license from http:// Www.aspose.com/purchase/default.aspx."); } }
public IHttpActionResult DownloadCertificate(string idEvent, string nameParticipant) { string dataDir = "http://localhost:49661/Images/test.pdf"; //HttpWebRequest request = (HttpWebRequest)WebRequest.Create(dataDir); //HttpWebResponse response = (HttpWebResponse)request.GetResponse(); //Stream receiveStream = response.GetResponseStream(); HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(dataDir); WebResponse myResp = myReq.GetResponse(); StreamReader reader = new StreamReader(myResp.GetResponseStream()); Document pdfDocument = new Document(reader.BaseStream); TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("Name"); pdfDocument.Pages.Accept(textFragmentAbsorber); TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; foreach (TextFragment textFragment in textFragmentCollection) { // Update text and other properties textFragment.Text = nameParticipant; } dataDir = dataDir + "/Images/" + idEvent + "_" + nameParticipant + ".pdf"; pdfDocument.Save(dataDir); return(Ok(dataDir)); }
public static void Run() { // ExStart:SearchTextAndDrawRectangle // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); // Open document Document document = new Document(dataDir + "SearchAndGetTextFromAll.pdf"); // Create TextAbsorber object to find all the phrases matching the regular expression TextFragmentAbsorber textAbsorber = new TextFragmentAbsorber(@"[\S]+"); TextSearchOptions textSearchOptions = new TextSearchOptions(true); textAbsorber.TextSearchOptions = textSearchOptions; document.Pages.Accept(textAbsorber); var editor = new PdfContentEditor(document); foreach (TextFragment textFragment in textAbsorber.TextFragments) { foreach (TextSegment textSegment in textFragment.Segments) { DrawBox(editor, textFragment.Page.Number, textSegment, System.Drawing.Color.Red); } } dataDir = dataDir + "SearchTextAndDrawRectangle_out.pdf"; document.Save(dataDir); // ExEnd:SearchTextAndDrawRectangle Console.WriteLine("\nRectangle drawn successfully on searched text.\nFile saved at " + dataDir); }
public static void Run() { // ExStart:ReplaceFirstOccurrence // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); // Open document Document pdfDocument = new Document(dataDir + "ReplaceTextPage.pdf"); // Create TextAbsorber object to find all instances of the input search phrase TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("text"); // Accept the absorber for all the pages pdfDocument.Pages.Accept(textFragmentAbsorber); // Get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; if (textFragmentCollection.Count > 0) { // Get first occurance of text and replace TextFragment textFragment = textFragmentCollection[1]; // Update text and other properties textFragment.Text = "New Phrase"; textFragment.TextState.Font = FontRepository.FindFont("Verdana"); textFragment.TextState.FontSize = 22; textFragment.TextState.ForegroundColor = Aspose.Pdf.Color.FromRgb(System.Drawing.Color.Blue); dataDir = dataDir + "ReplaceFirstOccurrence_out_.pdf"; pdfDocument.Save(dataDir); Console.WriteLine("\nText replaced successfully.\nFile saved at " + dataDir); } // ExEnd:ReplaceFirstOccurrence }
public static void Run() { // ExStart:ExtractColumnsText // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); // Open document Document pdfDocument = new Document(dataDir + "ExtractTextPage.pdf"); TextFragmentAbsorber tfa = new TextFragmentAbsorber(); pdfDocument.Pages.Accept(tfa); TextFragmentCollection tfc = tfa.TextFragments; foreach (TextFragment tf in tfc) { // Need to reduce font size at least for 70% tf.TextState.FontSize = tf.TextState.FontSize * 0.7f; } Stream st = new MemoryStream(); pdfDocument.Save(st); pdfDocument = new Document(st); TextAbsorber textAbsorber = new TextAbsorber(); pdfDocument.Pages.Accept(textAbsorber); String extractedText = textAbsorber.Text; textAbsorber.Visit(pdfDocument); dataDir = dataDir + "ExtractColumnsText_out.txt"; System.IO.File.WriteAllText(dataDir, extractedText); // ExEnd:ExtractColumnsText Console.WriteLine("\nColumns text extracted successfully from Pages of PDF Document.\nFile saved at " + dataDir); }
public static void Main(string[] args) { // The path to the documents directory. string dataDir = Path.GetFullPath("../../../Data/"); //open document Document pdfDocument = new Document(dataDir + "input.pdf"); //create TextAbsorber object to find all instances of the input search phrase TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("text"); //accept the absorber for all the pages pdfDocument.Pages.Accept(textFragmentAbsorber); //get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; //loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { //update text and other properties textFragment.Text = "TEXT"; textFragment.TextState.Font = FontRepository.FindFont("Verdana"); textFragment.TextState.FontSize = 22; textFragment.TextState.ForegroundColor = Aspose.Pdf.Color.FromRgb(System.Drawing.Color.Blue); textFragment.TextState.BackgroundColor = Aspose.Pdf.Color.FromRgb(System.Drawing.Color.Green); } // Save resulting PDF document. pdfDocument.Save(dataDir + "output.pdf"); // Let user know about the outcome of the processing. System.Console.WriteLine("Text replaced successfully!"); }
public static void Run() { //ExStart: AddAndSearchHiddenText // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); //Create document with hidden text Aspose.Pdf.Document doc = new Aspose.Pdf.Document(); Page page = doc.Pages.Add(); TextFragment frag1 = new TextFragment("This is common text."); TextFragment frag2 = new TextFragment("This is invisible text."); //Set text property - invisible frag2.TextState.Invisible = true; page.Paragraphs.Add(frag1); page.Paragraphs.Add(frag2); doc.Save(dataDir + "39400_out.pdf"); doc.Dispose(); //Search text in the document doc = new Aspose.Pdf.Document(dataDir + "39400_out.pdf"); TextFragmentAbsorber absorber = new TextFragmentAbsorber(); absorber.Visit(doc.Pages[1]); foreach (TextFragment fragment in absorber.TextFragments) { //Do something with fragments Console.WriteLine("Text '{0}' on pos {1} invisibility: {2} ", fragment.Text, fragment.Position.ToString(), fragment.TextState.Invisible); } doc.Dispose(); //ExEnd: AddAndSearchHiddenText }
public static void Run() { // ExStart:ReplaceTextAll // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); // Open document Document pdfDocument = new Document(dataDir + "ReplaceTextAll.pdf"); // Create TextAbsorber object to find all instances of the input search phrase TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("text"); // Accept the absorber for all the pages pdfDocument.Pages.Accept(textFragmentAbsorber); // Get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; // Loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { // Update text and other properties textFragment.Text = "TEXT"; textFragment.TextState.Font = FontRepository.FindFont("Verdana"); textFragment.TextState.FontSize = 22; textFragment.TextState.ForegroundColor = Aspose.Pdf.Color.FromRgb(System.Drawing.Color.Blue); textFragment.TextState.BackgroundColor = Aspose.Pdf.Color.FromRgb(System.Drawing.Color.Green); } dataDir = dataDir + "ReplaceTextAll_out_.pdf"; // Save resulting PDF document. pdfDocument.Save(dataDir); // ExEnd:ReplaceTextAll Console.WriteLine("\nText replaced successfully.\nFile saved at " + dataDir); }
public static void Run() { // ExStart:SearchAndGetTextAll // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); // Open document Document pdfDocument = new Document(dataDir + "SearchAndGetTextFromAll.pdf"); // Create TextAbsorber object to find all instances of the input search phrase TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("text"); // Accept the absorber for all the pages pdfDocument.Pages.Accept(textFragmentAbsorber); // Get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; // Loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { Console.WriteLine("Text : {0} ", textFragment.Text); Console.WriteLine("Position : {0} ", textFragment.Position); Console.WriteLine("XIndent : {0} ", textFragment.Position.XIndent); Console.WriteLine("YIndent : {0} ", textFragment.Position.YIndent); Console.WriteLine("Font - Name : {0}", textFragment.TextState.Font.FontName); Console.WriteLine("Font - IsAccessible : {0} ", textFragment.TextState.Font.IsAccessible); Console.WriteLine("Font - IsEmbedded : {0} ", textFragment.TextState.Font.IsEmbedded); Console.WriteLine("Font - IsSubset : {0} ", textFragment.TextState.Font.IsSubset); Console.WriteLine("Font Size : {0} ", textFragment.TextState.FontSize); Console.WriteLine("Foreground Color : {0} ", textFragment.TextState.ForegroundColor); } // ExEnd:SearchAndGetTextAll }
public static void Main(string[] args) { // The path to the documents directory. string dataDir = Path.GetFullPath("../../../Data/"); //open document Document pdfDocument = new Document(dataDir + "input.pdf"); //create TextAbsorber object to find all instances of the input search phrase TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("text"); //accept the absorber for a particular page pdfDocument.Pages[2].Accept(textFragmentAbsorber); //get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; //loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { //update text and other properties textFragment.Text = "New Phrase"; textFragment.TextState.Font = FontRepository.FindFont("Verdana"); textFragment.TextState.FontSize = 22; textFragment.TextState.ForegroundColor = Aspose.Pdf.Color.FromRgb(System.Drawing.Color.Blue); textFragment.TextState.BackgroundColor = Aspose.Pdf.Color.FromRgb(System.Drawing.Color.Green); } pdfDocument.Save(dataDir + "output.pdf"); }
public static void Run() { // ExStart:1 // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_DocumentConversion(); Document doc = new Document(dataDir + "SampleHtmlFile.html", new HtmlLoadOptions()); doc.Save(new MemoryStream()); foreach (Annotation a in doc.Pages[1].Annotations) { if (a.AnnotationType == AnnotationType.Link) { LinkAnnotation la = (LinkAnnotation)a; if (la.Action is GoToURIAction) { GoToURIAction gta = (GoToURIAction)la.Action; gta.URI = ""; TextFragmentAbsorber tfa = new TextFragmentAbsorber(); tfa.TextSearchOptions = new TextSearchOptions(a.Rect); doc.Pages[a.PageIndex].Accept(tfa); foreach (TextFragment tf in tfa.TextFragments) { tf.TextState.Underline = false; tf.TextState.ForegroundColor = Color.Black; } } doc.Pages[a.PageIndex].Annotations.Delete(a); } } doc.Save(dataDir + "RemoveHyperlinksFromText_out.pdf"); // ExEnd:1 }
public static void Main(string[] args) { // The path to the documents directory. string dataDir = Path.GetFullPath("../../../Data/"); //open document Document pdfDocument = new Document(dataDir + "input.pdf"); //create TextAbsorber object to find all instances of the input search phrase TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("text"); //accept the absorber for all the pages pdfDocument.Pages.Accept(textFragmentAbsorber); //get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; //loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { foreach (TextSegment textSegment in textFragment.Segments) { Console.WriteLine("Text : {0} ", textSegment.Text); Console.WriteLine("Position : {0} ", textSegment.Position); Console.WriteLine("XIndent : {0} ", textSegment.Position.XIndent); Console.WriteLine("YIndent : {0} ", textSegment.Position.YIndent); Console.WriteLine("Font - Name : {0}", textSegment.TextState.Font.FontName); Console.WriteLine("Font - IsAccessible : {0} ", textSegment.TextState.Font.IsAccessible); Console.WriteLine("Font - IsEmbedded : {0} ", textSegment.TextState.Font.IsEmbedded); Console.WriteLine("Font - IsSubset : {0} ", textSegment.TextState.Font.IsSubset); Console.WriteLine("Font Size : {0} ", textSegment.TextState.FontSize); Console.WriteLine("Foreground Color : {0} ", textSegment.TextState.ForegroundColor); } } }
/// <summary> /// Written by Fredio /// </summary> /// <param name="path"></param> /// <param name="phrase"></param> /// <returns></returns> public System.IO.MemoryStream SearcText(string path, string phrase) { InjectAsposeLicemse(); Aspose.Pdf.Document document = new Aspose.Pdf.Document(path); string searchTextValue = string.Format("(?i){0}", phrase); TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber(searchTextValue, new TextSearchOptions(true)); //TextSearchOptions textSearchOptions = new TextSearchOptions(true); //textFragmentAbsorber.TextSearchOptions = textSearchOptions; document.Pages.Accept(textFragmentAbsorber); TextFragmentCollection textFragmentCollection1 = textFragmentAbsorber.TextFragments; if (textFragmentCollection1.Count > 0) { foreach (TextFragment textFragment in textFragmentCollection1) { Aspose.Pdf.Annotations.HighlightAnnotation freeText = new Aspose.Pdf.Annotations.HighlightAnnotation(textFragment.Page, new Aspose.Pdf.Rectangle(textFragment.Position.XIndent, textFragment.Position.YIndent, textFragment.Position.XIndent + textFragment.Rectangle.Width, textFragment.Position.YIndent + textFragment.Rectangle.Height)); freeText.Opacity = 0.5; //freeText.Color = Aspose.Pdf.Color.FromRgb(0.6, 0.8, 0.98); freeText.Color = Aspose.Pdf.Color.Yellow; textFragment.Page.Annotations.Add(freeText); } } System.IO.MemoryStream ms = new System.IO.MemoryStream(); document.Save(ms); return(ms); }
public static void Run() { try { // ExStart:RearrangeContentsUsingTextReplacement // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); // Load source PDF file Document doc = new Document(dataDir + "ExtractTextPage.pdf"); // Create TextFragment Absorber object with regular expression TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("[TextFragmentAbsorber,companyname,Textbox,50]"); doc.Pages.Accept(textFragmentAbsorber); // Replace each TextFragment foreach (TextFragment textFragment in textFragmentAbsorber.TextFragments) { // Set font of text fragment being replaced textFragment.TextState.Font = FontRepository.FindFont("Arial"); // Set font size textFragment.TextState.FontSize = 12; textFragment.TextState.ForegroundColor = Aspose.Pdf.Color.Navy; // Replace the text with larger string than placeholder textFragment.Text = "This is a Larger String for the Testing of this issue"; } dataDir = dataDir + "RearrangeContentsUsingTextReplacement_out_.pdf"; // Save resultant PDF doc.Save(dataDir); // ExEnd:RearrangeContentsUsingTextReplacement Console.WriteLine("\nContents rearranged successfully using text replacement.\nFile saved at " + dataDir); } catch (Exception ex) { Console.WriteLine(ex.Message + "\nThis example will only work if you apply a valid Aspose License. You can purchase full license or get 30 day temporary license from http:// Www.aspose.com/purchase/default.aspx."); } }
public ActionResult DownloadCertificate() { string dataDir = Server.MapPath("~/Images/test.pdf"); // Open document Document pdfDocument = new Document(dataDir); // Create TextAbsorber object to find all instances of the input search phrase TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("Name"); // Accept the absorber for all the pages pdfDocument.Pages.Accept(textFragmentAbsorber); // Get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; // Loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { // Update text and other properties textFragment.Text = "HIYA HIYA HIYA"; } dataDir = dataDir + "test3.pdf"; // Save resulting PDF document. pdfDocument.Save(dataDir); return(View()); }
public static void Run() { try { // ExStart:RearrangeContentsUsingTextReplacement // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); // Load source PDF file Document doc = new Document(dataDir + "ExtractTextPage.pdf"); // Create TextFragment Absorber object with regular expression TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("[TextFragmentAbsorber,companyname,Textbox,50]"); doc.Pages.Accept(textFragmentAbsorber); // Replace each TextFragment foreach (TextFragment textFragment in textFragmentAbsorber.TextFragments) { // Set font of text fragment being replaced textFragment.TextState.Font = FontRepository.FindFont("Arial"); // Set font size textFragment.TextState.FontSize = 12; textFragment.TextState.ForegroundColor = Aspose.Pdf.Color.Navy; // Replace the text with larger string than placeholder textFragment.Text = "This is a Larger String for the Testing of this issue"; } dataDir = dataDir + "RearrangeContentsUsingTextReplacement_out.pdf"; // Save resultant PDF doc.Save(dataDir); // ExEnd:RearrangeContentsUsingTextReplacement Console.WriteLine("\nContents rearranged successfully using text replacement.\nFile saved at " + dataDir); } catch (Exception ex) { Console.WriteLine(ex.Message + "\nThis example will only work if you apply a valid Aspose License. You can purchase full license or get 30 day temporary license from http:// Www.aspose.com/purchase/default.aspx."); } }
public static void Main(string[] args) { // The path to the documents directory. string dataDir = Path.GetFullPath("../../../Data/"); //open document Document pdfDocument = new Document(dataDir + "input.pdf"); //create TextAbsorber object to find all instances of the input search phrase TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("text"); //accept the absorber for all the pages pdfDocument.Pages.Accept(textFragmentAbsorber); //get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; //loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { //update text and other properties textFragment.Text = "TEXT"; textFragment.TextState.Font = FontRepository.FindFont("Verdana"); textFragment.TextState.FontSize = 22; textFragment.TextState.ForegroundColor = Aspose.Pdf.Color.FromRgb(System.Drawing.Color.Blue); textFragment.TextState.BackgroundColor = Aspose.Pdf.Color.FromRgb(System.Drawing.Color.Green); } // Save resulting PDF document. pdfDocument.Save(dataDir + "output.pdf"); // Let user know about the outcome of the processing. System.Console.WriteLine("Text replaced successfully!"); }
public static void Run() { // ExStart:TextSegments TextFragmentAbsorber textFragmentAbsorber; // In order to search exact match of a word, you may consider using regular expression. textFragmentAbsorber = new TextFragmentAbsorber(@"\bWord\b", new TextSearchOptions(true)); // In order to search a string in either upper case or lowercase, you may consider using regular expression. textFragmentAbsorber = new TextFragmentAbsorber("(?i)Line", new TextSearchOptions(true)); // In order to search all the strings (parse all strings) inside PDF document, please try using following regular expression. textFragmentAbsorber = new TextFragmentAbsorber(@"[\S]+"); // Find match of search string and get anything after the string till line break. textFragmentAbsorber = new TextFragmentAbsorber(@"(?i)the ((.)*)"); // Please use following regular expression to find text following to the regex match. textFragmentAbsorber = new TextFragmentAbsorber(@"(?<=word).*"); // In order to search Hyperlink/URL's inside PDF document, please try using following regular expression. textFragmentAbsorber = new TextFragmentAbsorber(@"(http|ftp|https):\/\/([\w\-_]+(?:(?:\.[\w\-_]+)+))([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?"); // ExEnd: TextSegments }
public static void Run() { try { // ExStart:RemoveUnusedFonts // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); // Load source PDF file Document doc = new Document(dataDir + "ReplaceTextPage.pdf"); TextFragmentAbsorber absorber = new TextFragmentAbsorber(new TextEditOptions(TextEditOptions.FontReplace.RemoveUnusedFonts)); doc.Pages.Accept(absorber); // Iterate through all the TextFragments foreach (TextFragment textFragment in absorber.TextFragments) { textFragment.TextState.Font = FontRepository.FindFont("Arial, Bold"); } dataDir = dataDir + "RemoveUnusedFonts_out.pdf"; // Save updated document doc.Save(dataDir); // ExEnd:RemoveUnusedFonts Console.WriteLine("\nUnused fonts removed successfully from pdf document.\nFile saved at " + dataDir); } catch (Exception ex) { Console.WriteLine(ex.Message + "\nThis example will only work if you apply a valid Aspose License. You can purchase full license or get 30 day temporary license from http:// Www.aspose.com/purchase/default.aspx."); } }
public static void Run() { // ExStart:ReplaceTextAll // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); // Open document Document pdfDocument = new Document(dataDir + "ReplaceTextAll.pdf"); // Create TextAbsorber object to find all instances of the input search phrase TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("text"); // Accept the absorber for all the pages pdfDocument.Pages.Accept(textFragmentAbsorber); // Get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; // Loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { // Update text and other properties textFragment.Text = "TEXT"; textFragment.TextState.Font = FontRepository.FindFont("Verdana"); textFragment.TextState.FontSize = 22; textFragment.TextState.ForegroundColor = Aspose.Pdf.Color.FromRgb(System.Drawing.Color.Blue); textFragment.TextState.BackgroundColor = Aspose.Pdf.Color.FromRgb(System.Drawing.Color.Green); } dataDir = dataDir + "ReplaceTextAll_out.pdf"; // Save resulting PDF document. pdfDocument.Save(dataDir); // ExEnd:ReplaceTextAll Console.WriteLine("\nText replaced successfully.\nFile saved at " + dataDir); }
static void SearchWithRegularExpression() { //open document Document pdfDocument = new Document(Config.TestPdf); //create TextAbsorber object to find all the phrases matching the regular expression TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("\\d{4}-\\d{4}"); //like 1999-2000 //###set text search option to specify regular expression usage TextSearchOptions textSearchOptions = new TextSearchOptions(true); textFragmentAbsorber.TextSearchOptions = textSearchOptions; //accept the absorber for all the pages pdfDocument.Pages.Accept(textFragmentAbsorber); //get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; //loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { Console.WriteLine("Text : {0} ", textFragment.Text); //Console.WriteLine("Position : {0} ", textFragment.Position); //Console.WriteLine("XIndent : {0} ", textFragment.Position.XIndent); //Console.WriteLine("YIndent : {0} ", textFragment.Position.YIndent); //Console.WriteLine("Font - Name : {0}", textFragment.TextState.Font.FontName); //Console.WriteLine("Font - IsAccessible : {0} ", textFragment.TextState.Font.IsAccessible); //Console.WriteLine("Font - IsEmbedded : {0} ", textFragment.TextState.Font.IsEmbedded); //Console.WriteLine("Font - IsSubset : {0} ", textFragment.TextState.Font.IsSubset); //Console.WriteLine("Font Size : {0} ", textFragment.TextState.FontSize); //Console.WriteLine("Foreground Color : {0} ", textFragment.TextState.ForegroundColor); } }
public async Task <MemoryStream> SubsituteValuesForPdf(string fileName, Dictionary <string, string> valuesToSubsitute) { var pdfStream = await GetBlobDataFromAzure(ConfigurationName, fileName); await SetPdfLicense(); var pdfDocument = new Document(pdfStream); foreach (var key in valuesToSubsitute) { var textFragmentAbsorber = new TextFragmentAbsorber($"__{key.Key}__") { TextSearchOptions = new TextSearchOptions(true) }; pdfDocument.Pages[1].Accept(textFragmentAbsorber); // Get the extracted text fragments var textFragmentCollection = textFragmentAbsorber.TextFragments; // Loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { // Update text and other properties textFragment.Text = key.Value; textFragment.TextState.FontStyle = FontStyles.Bold; } } var streamOutput = new MemoryStream(); pdfDocument.Save(streamOutput); return(streamOutput); }
public static void Run() { // ExStart:TextSegments TextFragmentAbsorber textFragmentAbsorber; // In order to search exact match of a word, you may consider using regular expression. textFragmentAbsorber = new TextFragmentAbsorber(@"\bWord\b", new TextSearchOptions(true)); // In order to search a string in either upper case or lowercase, you may consider using regular expression. textFragmentAbsorber = new TextFragmentAbsorber("(?i)Line", new TextSearchOptions(true)); // In order to search all the strings (parse all strings) inside PDF document, please try using following regular expression. textFragmentAbsorber = new TextFragmentAbsorber(@"[\S]+"); // Find match of search string and get anything after the string till line break. textFragmentAbsorber = new TextFragmentAbsorber(@"(?i)the ((.)*)"); // Please use following regular expression to find text following to the regex match. textFragmentAbsorber = new TextFragmentAbsorber(@"(?<=word).*"); // In order to search Hyperlink/URL's inside PDF document, please try using following regular expression. textFragmentAbsorber = new TextFragmentAbsorber(@"(http|ftp|https):\/\/([\w\-_]+(?:(?:\.[\w\-_]+)+))([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?"); // ExEnd: TextSegments }
public static void Run() { // ExStart:1 // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); string outputFile = dataDir + "Tooltip_out.pdf"; // Create sample document with text Document doc = new Document(); doc.Pages.Add().Paragraphs.Add(new TextFragment("Move the mouse cursor here to display a tooltip")); doc.Pages[1].Paragraphs.Add(new TextFragment("Move the mouse cursor here to display a very long tooltip")); doc.Save(outputFile); // Open document with text Document document = new Document(outputFile); // Create TextAbsorber object to find all the phrases matching the regular expression TextFragmentAbsorber absorber = new TextFragmentAbsorber("Move the mouse cursor here to display a tooltip"); // Accept the absorber for the document pages document.Pages.Accept(absorber); // Get the extracted text fragments TextFragmentCollection textFragments = absorber.TextFragments; // Loop through the fragments foreach (TextFragment fragment in textFragments) { // Create invisible button on text fragment position ButtonField field = new ButtonField(fragment.Page, fragment.Rectangle); // AlternateName value will be displayed as tooltip by a viewer application field.AlternateName = "Tooltip for text."; // Add button field to the document document.Form.Add(field); } // Next will be sapmle of very long tooltip absorber = new TextFragmentAbsorber("Move the mouse cursor here to display a very long tooltip"); document.Pages.Accept(absorber); textFragments = absorber.TextFragments; foreach (TextFragment fragment in textFragments) { ButtonField field = new ButtonField(fragment.Page, fragment.Rectangle); // Set very long text field.AlternateName = "Lorem ipsum dolor sit amet, consectetur adipiscing elit," + " sed do eiusmod tempor incididunt ut labore et dolore magna" + " aliqua. Ut enim ad minim veniam, quis nostrud exercitation" + " ullamco laboris nisi ut aliquip ex ea commodo consequat." + " Duis aute irure dolor in reprehenderit in voluptate velit" + " esse cillum dolore eu fugiat nulla pariatur. Excepteur sint" + " occaecat cupidatat non proident, sunt in culpa qui officia" + " deserunt mollit anim id est laborum."; document.Form.Add(field); } // Save document document.Save(outputFile); // ExEnd:1 }
private static void AssertPage1HasText(this Document pdf, string fileName, string text) { var contractReferenceAbsorber = new TextFragmentAbsorber(text); pdf.Pages[1].Accept(contractReferenceAbsorber); Assert.AreEqual(1, contractReferenceAbsorber.TextFragments.Count, $"Could not find {text} for {fileName}"); }
private static TextFragmentAbsorber FindPageForText(Document document, string text) { var textFragmentAbsorber = new TextFragmentAbsorber("(?i)" + text, new TextSearchOptions(true)); textFragmentAbsorber.Visit(document); return(textFragmentAbsorber); }
public IHttpActionResult ReplaceText(ReplaceTextModel replaceTextModel) { var documentFileName = System.IO.Path.Combine(Config.Configuration.WorkingDirectory, "Editor", replaceTextModel.documentId, "document.pdf"); try { Document doc = new Document(documentFileName); // TODO: Imporve alghorithm //create TextAbsorber object to find all instances of the input search phrase //TextFragmentAbsorber textFragmentAbsorber = // new TextFragmentAbsorber("(?i)" + replaceTextModel.txtFind, new TextSearchOptions(true)) // { // TextReplaceOptions = // { // ReplaceAdjustmentAction = TextReplaceOptions.ReplaceAdjustment.WholeWordsHyphenation // } // }; TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber(replaceTextModel.txtFind); doc.Pages.Accept(textFragmentAbsorber); //get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; //loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { //update text and other properties textFragment.Text = replaceTextModel.txtReplace; } doc.Save(documentFileName); //doc = new Document(HttpContext.Current.Server.MapPath("Convert/output.pdf")); var downloadedMessageInfo = new DirectoryInfo(Path.GetDirectoryName(documentFileName)); foreach (FileInfo file in downloadedMessageInfo.GetFiles("*.png")) { file.Delete(); } var model = new DocStatusModel { d = ImageConverter(documentFileName), Path = replaceTextModel.documentId }; return(Ok(replaceTextModel)); } catch (Exception ex) { return(InternalServerError(ex)); } }
private void btnCreateTOC_Click(object sender, EventArgs e) { try { if (lstTitles.Items.Count > 0) { Document doc = new Document(txtPDF.Text); Page tocPage = doc.Pages.Insert(1); // Create object to represent TOC information TocInfo tocInfo = new TocInfo(); TextFragment title = new TextFragment("Table Of Contents"); title.TextState.FontSize = 20; title.TextState.FontStyle = FontStyles.Bold; // Set the title for TOC tocInfo.Title = title; tocPage.TocInfo = tocInfo; foreach (var item in lstTitles.Items) { TextFragmentAbsorber absorber = new TextFragmentAbsorber(item.ToString()); doc.Pages.Accept(absorber); Page targetPage = doc.Pages[absorber.TextFragments[1].Page.Number]; // Create Heading object Aspose.Pdf.Heading heading2 = new Aspose.Pdf.Heading(1); TextSegment segment2 = new TextSegment(); heading2.TocPage = tocPage; heading2.Segments.Add(segment2); // Specify the destination page for heading object heading2.DestinationPage = targetPage; // Destination page heading2.Top = targetPage.Rect.Height; // Destination coordinate segment2.Text = item.ToString(); // Add heading to page containing TOC tocPage.Paragraphs.Add(heading2); } doc.Save("Toc_out.pdf"); MessageBox.Show("PDF with TOC has been created.", "Success"); } else { MessageBox.Show("Please extract titles from PDF document first.", "Titles are not extracted"); } } catch (Exception ex) { MessageBox.Show(ex.Message, "Error"); } }
public void HideHeader(string fileName, string newFileName, Document pdfDocument) { TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("Employment History"); pdfDocument.Pages[1].Accept(textFragmentAbsorber); TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; if (textFragmentCollection.Any()) { foreach (TextFragment textFragment in textFragmentCollection) { textFragment.TextState.Invisible = true; } } }
public static void Run() { // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); //open document Document pdfDocument = new Document(dataDir + "SearchRegularExpressionPage.pdf"); //create TextAbsorber object to find all the phrases matching the regular expression TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("\\d{4}-\\d{4}"); //like 1999-2000 //set text search option to specify regular expression usage TextSearchOptions textSearchOptions = new TextSearchOptions(true); textFragmentAbsorber.TextSearchOptions = textSearchOptions; //accept the absorber for a single page pdfDocument.Pages[1].Accept(textFragmentAbsorber); //get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; //loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { Console.WriteLine("Text : {0} ", textFragment.Text); Console.WriteLine("Position : {0} ", textFragment.Position); Console.WriteLine("XIndent : {0} ", textFragment.Position.XIndent); Console.WriteLine("YIndent : {0} ", textFragment.Position.YIndent); Console.WriteLine("Font - Name : {0}", textFragment.TextState.Font.FontName); Console.WriteLine("Font - IsAccessible : {0} ", textFragment.TextState.Font.IsAccessible); Console.WriteLine("Font - IsEmbedded : {0} ", textFragment.TextState.Font.IsEmbedded); Console.WriteLine("Font - IsSubset : {0} ", textFragment.TextState.Font.IsSubset); Console.WriteLine("Font Size : {0} ", textFragment.TextState.FontSize); Console.WriteLine("Foreground Color : {0} ", textFragment.TextState.ForegroundColor); } }
private static HeaderElement ExtractToHeaderElement(ITaggedContent taggedContent, Page page, int textIndex, int headerLevel = 1) { TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber(); page.Accept(textFragmentAbsorber); TextFragment originalHeaderText = textFragmentAbsorber.TextFragments[textIndex]; HeaderElement h1 = taggedContent.CreateHeaderElement(headerLevel); h1.StructureTextState.ForegroundColor = originalHeaderText.TextState.ForegroundColor; Font headerFont = FontRepository.FindFont(originalHeaderText.TextState.Font.FontName); headerFont.IsEmbedded = true; h1.StructureTextState.Font = headerFont; h1.SetText(originalHeaderText.Text); return(h1); }
public static void Run() { // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); //open document Document pdfDocument = new Document(dataDir + "SearchRegularExpressionPage.pdf"); //create TextAbsorber object to find all the phrases matching the regular expression TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("\\d{4}-\\d{4}"); //like 1999-2000 //set text search option to specify regular expression usage TextSearchOptions textSearchOptions = new TextSearchOptions(true); textFragmentAbsorber.TextSearchOptions = textSearchOptions; //accept the absorber for a single page pdfDocument.Pages[1].Accept(textFragmentAbsorber); //get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; //loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { Console.WriteLine("Text : {0} ", textFragment.Text); Console.WriteLine("Position : {0} ", textFragment.Position); Console.WriteLine("XIndent : {0} ", textFragment.Position.XIndent); Console.WriteLine("YIndent : {0} ", textFragment.Position.YIndent); Console.WriteLine("Font - Name : {0}", textFragment.TextState.Font.FontName); Console.WriteLine("Font - IsAccessible : {0} ", textFragment.TextState.Font.IsAccessible); Console.WriteLine("Font - IsEmbedded : {0} ", textFragment.TextState.Font.IsEmbedded); Console.WriteLine("Font - IsSubset : {0} ", textFragment.TextState.Font.IsSubset); Console.WriteLine("Font Size : {0} ", textFragment.TextState.FontSize); Console.WriteLine("Foreground Color : {0} ", textFragment.TextState.ForegroundColor); } }
public static void Main(string[] args) { // The path to the documents directory. string dataDir = Path.GetFullPath("../../../Data/"); //open document Document pdfDocument = new Document(dataDir + "input.pdf"); //create TextAbsorber object to find all instances of the input search phrase TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("text"); //accept the absorber for all the pages pdfDocument.Pages[2].Accept(textFragmentAbsorber); //get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; //loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { foreach (TextSegment textSegment in textFragment.Segments) { Console.WriteLine("Text : {0} ", textSegment.Text); Console.WriteLine("Position : {0} ", textSegment.Position); Console.WriteLine("XIndent : {0} ", textSegment.Position.XIndent); Console.WriteLine("YIndent : {0} ", textSegment.Position.YIndent); Console.WriteLine("Font - Name : {0}", textSegment.TextState.Font.FontName); Console.WriteLine("Font - IsAccessible : {0} ", textSegment.TextState.Font.IsAccessible); Console.WriteLine("Font - IsEmbedded : {0} ", textSegment.TextState.Font.IsEmbedded); Console.WriteLine("Font - IsSubset : {0} ", textSegment.TextState.Font.IsSubset); Console.WriteLine("Font Size : {0} ", textSegment.TextState.FontSize); Console.WriteLine("Foreground Color : {0} ", textSegment.TextState.ForegroundColor); } } }
public static void Run() { try { // ExStart:UpdateLinkTextColor // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_LinksActions(); // Load the PDF file Document doc = new Document(dataDir + "UpdateLinks.pdf"); foreach (Annotation annotation in doc.Pages[1].Annotations) { if (annotation is LinkAnnotation) { // Search the text under the annotation TextFragmentAbsorber ta = new TextFragmentAbsorber(); Rectangle rect = annotation.Rect; rect.LLX -= 10; rect.LLY -= 10; rect.URX += 10; rect.URY += 10; ta.TextSearchOptions = new TextSearchOptions(rect); ta.Visit(doc.Pages[1]); // Change color of the text. foreach (TextFragment tf in ta.TextFragments) { tf.TextState.ForegroundColor = Color.Red; } } } dataDir = dataDir + "UpdateLinkTextColor_out.pdf"; // Save the document with updated link doc.Save(dataDir); // ExEnd:UpdateLinkTextColor Console.WriteLine("\nLinkAnnotation text color updated successfully.\nFile saved at " + dataDir); } catch (Exception ex) { Console.WriteLine(ex.Message); } }
public static void Run() { // ExStart:SearchTextAndDrawRectangle // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); // Open document Document document = new Document(dataDir + "SearchAndGetTextFromAll.pdf"); // Create TextAbsorber object to find all the phrases matching the regular expression TextFragmentAbsorber textAbsorber = new TextFragmentAbsorber(@"[\S]+"); TextSearchOptions textSearchOptions = new TextSearchOptions(true); textAbsorber.TextSearchOptions = textSearchOptions; document.Pages.Accept(textAbsorber); var editor = new PdfContentEditor(document); foreach (TextFragment textFragment in textAbsorber.TextFragments) { foreach (TextSegment textSegment in textFragment.Segments) { DrawBox(editor, textFragment.Page.Number, textSegment, System.Drawing.Color.Red); } } dataDir = dataDir + "SearchTextAndDrawRectangle_out.pdf"; document.Save(dataDir); // ExEnd:SearchTextAndDrawRectangle Console.WriteLine("\nRectangle drawn successfully on searched text.\nFile saved at " + dataDir); }
public static void Run() { // ExStart:SearchTextAndAddHyperlink // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); // Create absorber object to find all instances of the input search phrase TextFragmentAbsorber absorber = new TextFragmentAbsorber("\\d{4}-\\d{4}"); // Enable regular expression search absorber.TextSearchOptions = new TextSearchOptions(true); // Open document PdfContentEditor editor = new PdfContentEditor(); // Bind source PDF file editor.BindPdf(dataDir + "SearchRegularExpressionPage.pdf"); // Accept the absorber for the page editor.Document.Pages[1].Accept(absorber); int[] dashArray = { }; String[] LEArray = { }; System.Drawing.Color blue = System.Drawing.Color.Blue; // Loop through the fragments foreach (TextFragment textFragment in absorber.TextFragments) { textFragment.TextState.ForegroundColor = Aspose.Pdf.Color.Blue; System.Drawing.Rectangle rect = new System.Drawing.Rectangle((int)textFragment.Rectangle.LLX, (int)Math.Round(textFragment.Rectangle.LLY), (int)Math.Round(textFragment.Rectangle.Width + 2), (int)Math.Round(textFragment.Rectangle.Height + 1)); Enum[] actionName = new Enum[2] { Aspose.Pdf.Annotations.PredefinedAction.Document_AttachFile, Aspose.Pdf.Annotations.PredefinedAction.Document_ExtractPages }; editor.CreateWebLink(rect, "http:// Www.aspose.com", 1, blue, actionName); editor.CreateLine(rect, "", (float)textFragment.Rectangle.LLX + 1, (float)textFragment.Rectangle.LLY - 1, (float)textFragment.Rectangle.URX, (float)textFragment.Rectangle.LLY - 1, 1, 1, blue, "S", dashArray, LEArray); } dataDir = dataDir + "SearchTextAndAddHyperlink_out.pdf"; editor.Save(dataDir); editor.Close(); // ExEnd:SearchTextAndAddHyperlink Console.WriteLine("\nText replaced and hyperlink added successfully based on a regular expression.\nFile saved at " + dataDir); }
public static void Run() { // ExStart:ReplaceTextonRegularExpression // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); // Open document Document pdfDocument = new Document(dataDir + "SearchRegularExpressionPage.pdf"); // Create TextAbsorber object to find all the phrases matching the regular expression TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("\\d{4}-\\d{4}"); // Like 1999-2000 // Set text search option to specify regular expression usage TextSearchOptions textSearchOptions = new TextSearchOptions(true); textFragmentAbsorber.TextSearchOptions = textSearchOptions; // Accept the absorber for a single page pdfDocument.Pages[1].Accept(textFragmentAbsorber); // Get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; // Loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { // Update text and other properties textFragment.Text = "New Phrase"; // Set to an instance of an object. textFragment.TextState.Font = FontRepository.FindFont("Verdana"); textFragment.TextState.FontSize = 22; textFragment.TextState.ForegroundColor = Aspose.Pdf.Color.FromRgb(System.Drawing.Color.Blue); textFragment.TextState.BackgroundColor = Aspose.Pdf.Color.FromRgb(System.Drawing.Color.Green); } dataDir = dataDir + "ReplaceTextonRegularExpression_out.pdf"; pdfDocument.Save(dataDir); // ExEnd:ReplaceTextonRegularExpression Console.WriteLine("\nText replaced successfully based on a regular expression.\nFile saved at " + dataDir); }
public static void Run() { // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); //create absorber object to find all instances of the input search phrase TextFragmentAbsorber absorber = new TextFragmentAbsorber("D[a-z]{7}:"); //Enable regular expression search absorber.TextSearchOptions = new TextSearchOptions(true); //open document PdfContentEditor editor = new PdfContentEditor(); // bind source PDF file editor.BindPdf(dataDir+ "Input_new.pdf"); //accept the absorber for the page editor.Document.Pages[1].Accept(absorber); int[] dashArray = { }; String[] LEArray = { }; System.Drawing.Color blue = System.Drawing.Color.Blue; //loop through the fragments foreach (TextFragment textFragment in absorber.TextFragments) { textFragment.TextState.ForegroundColor = Aspose.Pdf.Color.Blue; System.Drawing.Rectangle rect = new System.Drawing.Rectangle((int)textFragment.Rectangle.LLX, (int)Math.Round(textFragment.Rectangle.LLY), (int)Math.Round(textFragment.Rectangle.Width + 2), (int)Math.Round(textFragment.Rectangle.Height + 1)); Enum[] actionName = new Enum[2] { Aspose.Pdf.InteractiveFeatures.PredefinedAction.Document_AttachFile, Aspose.Pdf.InteractiveFeatures.PredefinedAction.Document_ExtractPages }; editor.CreateWebLink(rect, "http://www.aspose.com", 1, blue, actionName); editor.CreateLine(rect, "", (float)textFragment.Rectangle.LLX + 1, (float)textFragment.Rectangle.LLY - 1, (float)textFragment.Rectangle.URX, (float)textFragment.Rectangle.LLY - 1, 1, 1, blue, "S", dashArray, LEArray); } //Save & Close the document editor.Save(dataDir+ "TextReplaced_with_Links.pdf"); editor.Close(); }
public void WithUpdateFields() { Document doc = DocumentHelper.CreateDocumentFillWithDummyText(); PdfSaveOptions pdfSaveOptions = new PdfSaveOptions(); pdfSaveOptions.UpdateFields = true; doc.Save(MyDir + @"\Artifacts\UpdateFields_False.pdf", pdfSaveOptions); Aspose.Pdf.Document pdfDocument = new Aspose.Pdf.Document(MyDir + @"\Artifacts\UpdateFields_False.pdf"); //Get text fragment by search string TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("Page 1 of 2"); pdfDocument.Pages.Accept(textFragmentAbsorber); //Assert that fields are updated Assert.AreEqual("Page 1 of 2", textFragmentAbsorber.TextFragments[1].Text); }
public static string SearchData(string searchText, string[] pageList) { string name = DateTime.Now.Millisecond.ToString(); System.IO.DirectoryInfo downloadedMessageInfo = new DirectoryInfo(HttpContext.Current.Server.MapPath("search/")); foreach (FileInfo file in downloadedMessageInfo.GetFiles()) { file.Delete(); } foreach (DirectoryInfo dir in downloadedMessageInfo.GetDirectories()) { dir.Delete(true); } System.IO.Directory.CreateDirectory(HttpContext.Current.Server.MapPath("search/" + name)); Document doc = new Document(HttpContext.Current.Server.MapPath("Convert/output.pdf")); for (int i = 1; i <= doc.Pages.Count; i++) { string filename = "Input/" + pageList[i-1]; filename = filename.Replace("image", "image-1"); Bitmap bmp = (Bitmap)Bitmap.FromFile(HttpContext.Current.Server.MapPath(filename)); using (System.Drawing.Graphics gr = System.Drawing.Graphics.FromImage(bmp)) { float scale = 150 / 72f; gr.Transform = new System.Drawing.Drawing2D.Matrix(scale, 0, 0, -scale, 0, bmp.Height); Aspose.Pdf.Page page = doc.Pages[i]; //create TextAbsorber object to find all words TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber(searchText); // textFragmentAbsorber.TextSearchOptions.IsRegularExpressionUsed = true; page.Accept(textFragmentAbsorber); //get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; Brush brush = new SolidBrush(System.Drawing.Color.FromArgb(50, 255, 255, 0)); //loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { // if (i == 0) { gr.FillRectangle( // gr.DrawRectangle( brush, (float)(textFragment.Position.XIndent), (float)(textFragment.Position.YIndent), (float)(textFragment.Rectangle.Width), (float)(textFragment.Rectangle.Height)); for (int segNum = 1; segNum <= textFragment.Segments.Count; segNum++) { TextSegment segment = textFragment.Segments[segNum]; gr.DrawRectangle( Pens.Green, (float)segment.Rectangle.LLX, (float)segment.Rectangle.LLY, (float)segment.Rectangle.Width, (float)segment.Rectangle.Height); } } } gr.Dispose(); } bmp.Save(HttpContext.Current.Server.MapPath(filename.Replace("image-1","image_search")), System.Drawing.Imaging.ImageFormat.Png); bmp.Dispose(); string height = ""; string Aratio = ""; System.Drawing.Image image = System.Drawing.Image.FromFile(HttpContext.Current.Server.MapPath(filename.Replace("image-1", "image_search"))); ScaleImage(image, 1138, 760, HttpContext.Current.Server.MapPath("search/" + name + "/" + pageList[i - 1]), out height, out Aratio); image.Dispose(); // System.IO.File.Copy(HttpContext.Current.Server.MapPath("Input/image_search" + i + ".png"), HttpContext.Current.Server.MapPath("Input/image" + i + ".png")); } return name; }
public static string ReplaceText(string txtFind, string txtReplace, string[] pageList) { try { Document doc = new Document(HttpContext.Current.Server.MapPath("Convert/output.pdf")); //create TextAbsorber object to find all instances of the input search phrase TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("(?i)" + txtFind, new Aspose.Pdf.Text.TextOptions.TextSearchOptions(true)); textFragmentAbsorber.TextReplaceOptions.ReplaceAdjustmentAction = TextReplaceOptions.ReplaceAdjustment.WholeWordsHyphenation; //accept the absorber for all the pages doc.Pages.Accept(textFragmentAbsorber); //get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; //loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { //update text and other properties textFragment.Text = txtReplace; } doc.Save(HttpContext.Current.Server.MapPath("Convert/output.pdf")); doc = new Document(HttpContext.Current.Server.MapPath("Convert/output.pdf")); System.IO.DirectoryInfo downloadedMessageInfo = new DirectoryInfo(HttpContext.Current.Server.MapPath("Input/")); foreach (FileInfo file in downloadedMessageInfo.GetFiles()) { file.Delete(); } for (int pageCount = 1; pageCount <= doc.Pages.Count; pageCount++) { string filename = "Input/" + pageList[pageCount - 1]; filename = filename.Replace("image", "image-1"); using (FileStream imageStream = new FileStream(HttpContext.Current.Server.MapPath(filename), FileMode.Create)) { //Create Resolution object Resolution resolution = new Resolution(300); //create PNG device with specified attributes PngDevice pngDevice = new PngDevice(); //Convert a particular page and save the image to stream pngDevice.Process(doc.Pages[pageCount], imageStream); //Close stream imageStream.Close(); System.Drawing.Image image = System.Drawing.Image.FromFile(HttpContext.Current.Server.MapPath(filename)); string height = ""; string Aratio = ""; ScaleImage(image, 1138, 760, HttpContext.Current.Server.MapPath(filename.Replace("image-1", "image")), out height, out Aratio); image.Dispose(); } } } catch (Exception exp) { } return "success"; }
public static void Run() { try { // ExStart:HighlightCharacterInPDF // The path to the documents directory. string dataDir = RunExamples.GetDataDir_AsposePdf_Text(); int resolution = 150; Aspose.Pdf.Document pdfDocument = new Aspose.Pdf.Document(dataDir + "input.pdf"); using (MemoryStream ms = new MemoryStream()) { PdfConverter conv = new PdfConverter(pdfDocument); conv.Resolution = new Resolution(resolution, resolution); conv.GetNextImage(ms, System.Drawing.Imaging.ImageFormat.Png); Bitmap bmp = (Bitmap)Bitmap.FromStream(ms); using (System.Drawing.Graphics gr = System.Drawing.Graphics.FromImage(bmp)) { float scale = resolution / 72f; gr.Transform = new System.Drawing.Drawing2D.Matrix(scale, 0, 0, -scale, 0, bmp.Height); for (int i = 0; i < pdfDocument.Pages.Count; i++) { Page page = pdfDocument.Pages[1]; // Create TextAbsorber object to find all words TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber(@"[\S]+"); textFragmentAbsorber.TextSearchOptions.IsRegularExpressionUsed = true; page.Accept(textFragmentAbsorber); // Get the extracted text fragments TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments; // Loop through the fragments foreach (TextFragment textFragment in textFragmentCollection) { if (i == 0) { gr.DrawRectangle( Pens.Yellow, (float)textFragment.Position.XIndent, (float)textFragment.Position.YIndent, (float)textFragment.Rectangle.Width, (float)textFragment.Rectangle.Height); for (int segNum = 1; segNum <= textFragment.Segments.Count; segNum++) { TextSegment segment = textFragment.Segments[segNum]; for (int charNum = 1; charNum <= segment.Characters.Count; charNum++) { CharInfo characterInfo = segment.Characters[charNum]; Aspose.Pdf.Rectangle rect = page.GetPageRect(true); Console.WriteLine("TextFragment = " + textFragment.Text + " Page URY = " + rect.URY + " TextFragment URY = " + textFragment.Rectangle.URY); gr.DrawRectangle( Pens.Black, (float)characterInfo.Rectangle.LLX, (float)characterInfo.Rectangle.LLY, (float)characterInfo.Rectangle.Width, (float)characterInfo.Rectangle.Height); } gr.DrawRectangle( Pens.Green, (float)segment.Rectangle.LLX, (float)segment.Rectangle.LLY, (float)segment.Rectangle.Width, (float)segment.Rectangle.Height); } } } } } dataDir = dataDir + "HighlightCharacterInPDF_out.png"; bmp.Save(dataDir, System.Drawing.Imaging.ImageFormat.Png); } // ExEnd:HighlightCharacterInPDF Console.WriteLine("\nCharacters highlighted successfully in pdf document.\nFile saved at " + dataDir); } catch (Exception ex) { Console.WriteLine(ex.Message + "\nThis example will only work if you apply a valid Aspose License. You can purchase full license or get 30 day temporary license from http:// Www.aspose.com/purchase/default.aspx."); } }