Ejemplo n.º 1
0
        // Returns the list of page numbers on which the word or words can be found (separated by commas, ex: 7,8,9,10)
        // bCaseSensitive: 0 = false, 1 = true, bWholeWordsOnly: 0 = false, 1 = true
        public string GetPageNumforWord(string szPdfPathConst, string searchword, int bCaseSensitive, int bWholeWordsOnly)
        {
            //Initializing variables
            int           iNum = 0;
            bool          TextCheck;
            int           PageNum;
            bool          GoToStatus;
            string        PageNumConsol = "";
            int           ScanPage;
            List <int>    PageList       = new List <int>();
            List <string> PageListString = new List <string>();

            try
            {
                //Declaring relevant IAC objects
                CAcroPDDoc      pdDoc;
                CAcroAVDoc      avDoc;
                CAcroApp        avApp;
                CAcroAVPageView avPage;

                //set AVApp Project
                avApp = new AcroAppClass();

                //set AVDoc object
                avDoc = new AcroAVDocClass();

                //open the PDF if it isn't already open

                if (avDoc.Open(szPdfPathConst, ""))
                {
                    //set the pdDoc object and get some data
                    pdDoc = (CAcroPDDoc)avDoc.GetPDDoc();

                    //Getting Total Number of Pages in the PDF
                    iNum = pdDoc.GetNumPages();

                    //set AVPage View object
                    avPage = (CAcroAVPageView)avDoc.GetAVPageView();

                    //Navigating to Page 1 to initiate search
                    ScanPage   = 0;
                    GoToStatus = avPage.GoTo(ScanPage);

                    //Checking if word is present or not
                    TextCheck = avDoc.FindText(searchword, bCaseSensitive, bWholeWordsOnly, 0);

                    //Declaring variable for storing the previous page number
                    int PageNumPrev = 0;

                    if (TextCheck == true)
                    {
                        PageNum = avPage.GetPageNum();
                        //First Page is 0 and thus offset is being taken care of
                        PageNum = PageNum + 1;
                        PageList.Add(PageNum);

                        //Incrementing Page numbers and searching for more instances
                        while (TextCheck == true)
                        {
                            //Going to the page next to the previous search result - Not incremented by 1 since PageNum was already incremented for recording.
                            ScanPage = PageNum;
                            if (ScanPage == iNum)
                            {
                                TextCheck = false;
                                break;
                            }
                            GoToStatus = avPage.GoTo(ScanPage);
                            TextCheck  = avDoc.FindText(searchword, bCaseSensitive, bWholeWordsOnly, 0);
                            PageNum    = avPage.GetPageNum();

                            //Exit loop in case the previous page number is bigger than the current
                            if (PageNumPrev > PageNum)
                            {
                                break;
                            }
                            //Assigning the page number for this search iteration to a previous variable
                            PageNumPrev = PageNum;

                            //First Page is 0 and thus offset is being taken care of
                            PageNum = PageNum + 1;
                            PageList.Add(PageNum);
                        }
                    }
                    else
                    {
                        PageNum = 0;
                        PageList.Add(PageNum);
                    }
                }
                else
                {
                    PageNum = 0;
                    PageList.Add(PageNum);
                }

                //Removing Duplicates in the list due to multiple occurences of word on the same page
                List <int> PageListFilter = new List <int>();
                foreach (int i in PageList)
                {
                    if (!PageListFilter.Contains(i))
                    {
                        PageListFilter.Add(i);
                    }
                }

                //Converting Integer List for Page List to String List
                PageListString = PageListFilter.ConvertAll <string>(delegate(int i)
                {
                    return(i.ToString());
                });

                //Converting String List to Comma Delimited List
                PageNumConsol = string.Join(",", PageListString.ToArray());
            }
            catch (Exception)
            {
                PageNumConsol = "Unknown Exception";
            }

            return(PageNumConsol);
        }
Ejemplo n.º 2
0
        public String getPageRangeBetweenStrings(String szPdfPathConst, String HeaderStr, String FooterStr, Boolean includeFirstPageInRange, Boolean includeLastPageInRange)
        {
            CAcroApp        avApp;
            CAcroAVDoc      avDoc;
            CAcroAVPageView avPage;

            avApp = new AcroAppClass();
            avDoc = new AcroAVDocClass();
            avDoc.Open(szPdfPathConst, "");
            CAcroPDDoc pdDoc          = (CAcroPDDoc)avDoc.GetPDDoc();
            List <int> PageListHeader = new List <int>();
            List <int> PageListFooter = new List <int>();
            //AcroPDDoc pdDoc = getPDDoc(szPdfPathConst);
            int        TotalNumberOfPages = pdDoc.GetNumPages();
            AcroPDPage page;

            //set AVPage View object
            avPage = (CAcroAVPageView)avDoc.GetAVPageView();
            avApp.Show();
            for (int i = 0; i < TotalNumberOfPages; i++)
            {
                page = (AcroPDPage)pdDoc.AcquirePage(i);
                Boolean TextCheck = avDoc.FindText(HeaderStr, 1, 1, 0);
                if (TextCheck == true)
                {
                    int PageNum = avPage.GetPageNum();
                    PageListHeader.Add(PageNum);
                }
            }


            List <int> PagesWithHeaderWords = DeDuplicateArray(PageListHeader);



            for (int i = 0; i < TotalNumberOfPages; i++)
            {
                page = (AcroPDPage)pdDoc.AcquirePage(i);
                Boolean TextCheck = avDoc.FindText(FooterStr, 1, 1, 0);
                if (TextCheck == true)
                {
                    int PageNum = avPage.GetPageNum();
                    PageListFooter.Add(PageNum);
                }
            }
            List <int> PagesWithFooterWords = DeDuplicateArray(PageListFooter);
            int        MinimumFooterRange   = 0;
            int        MinimumHeaderRange   = 0;

            if (PagesWithFooterWords.Count == 0 || PagesWithHeaderWords.Count == 0)
            {
                return("No Range Found");
            }

            MinimumFooterRange = PagesWithFooterWords.Min();
            MinimumHeaderRange = PagesWithFooterWords.Min();



            int HeaderFinalPageNumber = MinimumHeaderRange + 1;
            int FooterFinalPageNumber = MinimumFooterRange + 1;

            if (!includeFirstPageInRange)
            {
                HeaderFinalPageNumber++;
            }
            if (!includeLastPageInRange)
            {
                FooterFinalPageNumber--;
            }

            return(HeaderFinalPageNumber + "-" + FooterFinalPageNumber);
        }