Beispiel #1
0
        protected void Page_Load(object sender, EventArgs e)
        {
            // This test file will be copied to the project directory on the pre-build event (see the project properties).
            String inputFile = Server.MapPath(@".\bin\sample1.pdf");

            // Create Bytescout.PDFExtractor.InfoExtractor instance
            InfoExtractor extractor = new InfoExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile(inputFile);

            // Display document information

            Response.Clear();
            Response.ContentType = "text/html";

            Response.Write("Author: " + extractor.Author + "<br/>");
            Response.Write("Creator: " + extractor.Creator + "<br/>");
            Response.Write("Producer: " + extractor.Producer + "<br/>");
            Response.Write("Subject: " + extractor.Subject + "<br/>");
            Response.Write("Title: " + extractor.Title + "<br/>");
            Response.Write("CreationDate: " + extractor.CreationDate + "<br/>");
            Response.Write("Keywords: " + extractor.Keywords + "<br/>");
            Response.Write("Bookmarks: " + extractor.Bookmarks + "<br/>");
            Response.Write("Encrypted: " + extractor.Encrypted + "<br/>");

            Response.End();
        }
        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.InfoExtractor instance
            InfoExtractor extractor = new InfoExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile(@".\sample1.pdf");

            Console.WriteLine("Author:       " + extractor.Author);
            Console.WriteLine("Creator:      " + extractor.Creator);
            Console.WriteLine("Producer:     " + extractor.Producer);
            Console.WriteLine("Subject:      " + extractor.Subject);
            Console.WriteLine("Title:        " + extractor.Title);
            Console.WriteLine("CreationDate: " + extractor.CreationDate);
            Console.WriteLine("Keywords:     " + extractor.Keywords);
            Console.WriteLine("Bookmarks:    " + extractor.Bookmarks);
            Console.WriteLine("Encrypted:    " + extractor.Encrypted);


            // Cleanup
            extractor.Dispose();

            Console.WriteLine();
            Console.WriteLine("Press any key to continue...");
            Console.ReadLine();
        }
Beispiel #3
0
        static void Main(string[] args)
        {
            const string inputFile  = "sample.pdf";
            const string resultFile = "result.pdf";

            int pageCount;

            // Get document page count
            using (var infoExtractor = new InfoExtractor("demo", "demo"))
            {
                infoExtractor.LoadDocumentFromFile(inputFile);
                pageCount = infoExtractor.GetPageCount();
            }

            // Process the document by 10-page pieces

            int numberOfThreads = pageCount / 10;

            if (pageCount - numberOfThreads * 10 > 0)
            {
                numberOfThreads += 1;
            }

            WaitHandle[] doneEvents = new WaitHandle[numberOfThreads];
            Stopwatch    stopwatch = Stopwatch.StartNew();
            int          startPage, endPage;

            string[] pieces = new string[numberOfThreads];

            for (int i = 0; i < numberOfThreads; i++)
            {
                doneEvents[i] = new ManualResetEvent(false);
                startPage     = i * 10;
                endPage       = Math.Min(pageCount - 1, (i + 1) * 10 - 1);

                pieces[i] = string.Format("temp-{0}-{1}.pdf", startPage, endPage);
                ThreadPool.QueueUserWorkItem(new WaitCallback(ThreadProc),
                                             new object[] { i, doneEvents[i], inputFile, pieces[i], startPage, endPage });
            }

            // Wait for all threads
            WaitHandle.WaitAll(doneEvents);

            // Merge pieces
            using (DocumentMerger merger = new DocumentMerger("demo", "demo"))
                merger.Merge(pieces, resultFile);

            // Delete temp files
            foreach (string tempFile in pieces)
            {
                File.Delete(tempFile);
            }

            Console.WriteLine("All done in {0}.", stopwatch.Elapsed);
            Console.WriteLine();

            Console.WriteLine("Press any key to exit...");
            Console.ReadKey();
        }
        /// <summary>Get and process app information in public store web page</summary>
        /// <param name="storeApp">Object to complete fields with all information taken from app web page</param>
        /// <param name="dom">C# query object used to explore DOM from Store App web page</param>
        public override void ObjectDOMMapper(IStoreApp storeApp, CQ dom)
        {
            storeApp.Name         = dom["h1[itemprop='name']"].Text();
            storeApp.Author       = dom["#AppDeveloper"].Text().Trim();
            storeApp.ThumbnailUri = new Uri(dom["img#ScreenshotImage"].Attr("src"));

            //from here working in windows store specific fields
            var winstoreApp = storeApp as WindowsStoreApp;

            if (winstoreApp != null)
            {
                //converting DOM object in string
                string strTargetString = dom.Text();
                //using regular expression to extract PFN
                winstoreApp.PackageFamilyName = InfoExtractor.Match(strTargetString).Groups[1].Value;
            }
        }
Beispiel #5
0
        public IEnumerable <Instance> FindLocalInstance(SyntaxReference reference, Compilation compilation)
        {
            ISymbol symbol = reference.GetDeclaringSymbol(compilation);

            if (symbol == null)
            {
                symbol = reference.GetSymbolInfo(compilation).Symbol;
            }

            if (symbol != null)
            {
                //Should we search the whole identity stack or just the top one or search via the frame?
                return(Instances.Where(s =>
                {
                    if (s.Identities.Any(q => q.Symbol.Equals(symbol)))
                    {
                        return true;
                    }

                    return false;
                }));
            }

            HashSet <Instance> found = new HashSet <Instance>();

            MultiDictionary <InfoExtractor.Info, object> info = InfoExtractor.ExtractInfo(reference);

            if (info.ContainsKey(InfoExtractor.Info.NAME))
            {
                foreach (Object n in info[InfoExtractor.Info.NAME])
                {
                    String name = n as string;
                    if (name == null)
                    {
                        continue;
                    }

                    foreach (Instance instance in FindLocalInstance(name))
                    {
                        found.Add(instance);
                    }
                }
            }

            return(found);
        }
Beispiel #6
0
        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.InfoExtractor instance
            InfoExtractor infoExtractor = new InfoExtractor();

            infoExtractor.RegistrationName = "demo";
            infoExtractor.RegistrationKey  = "demo";

            TextExtractor textExtractor = new TextExtractor();

            textExtractor.RegistrationName = "demo";
            textExtractor.RegistrationKey  = "demo";

            // List all PDF files in directory
            foreach (string file in Directory.GetFiles(@"..\..\..\..", "*.pdf"))
            {
                infoExtractor.LoadDocumentFromFile(file);

                Console.WriteLine("File Name:      " + Path.GetFileName(file));
                Console.WriteLine("Page Count:     " + infoExtractor.GetPageCount());
                Console.WriteLine("Author:         " + infoExtractor.Author);
                Console.WriteLine("Title:          " + infoExtractor.Title);
                Console.WriteLine("Producer:       " + infoExtractor.Producer);
                Console.WriteLine("Subject:        " + infoExtractor.Subject);
                Console.WriteLine("CreationDate:   " + infoExtractor.CreationDate);
                Console.WriteLine("Text (first 2 lines): ");

                // Load a couple of lines from each document
                textExtractor.LoadDocumentFromFile(file);
                using (StringReader stringReader = new StringReader(textExtractor.GetTextFromPage(0)))
                {
                    Console.WriteLine(stringReader.ReadLine());
                    Console.WriteLine(stringReader.ReadLine());
                }
                Console.WriteLine();
            }

            // Cleanup
            infoExtractor.Dispose();
            textExtractor.Dispose();

            Console.WriteLine();
            Console.WriteLine("Press any key to continue...");
            Console.ReadLine();
        }
Beispiel #7
0
        static void Main(string[] args)
        {
            const string inputFileName  = "sample.pdf";
            const string resultFileName = "result.txt";
            int          CHUNK_SIZE     = 10;

            int pageCount;

            // Get document page count
            using (var infoExtractor = new InfoExtractor("demo", "demo"))
            {
                infoExtractor.LoadDocumentFromFile(inputFileName);
                pageCount = infoExtractor.GetPageCount();
            }

            Stopwatch stopwatch = Stopwatch.StartNew();

            int numberOfThreads = pageCount / CHUNK_SIZE;

            if (pageCount - numberOfThreads * CHUNK_SIZE > 0)
            {
                numberOfThreads += 1;
            }

            ManualResetEvent allFinishedEvent = new ManualResetEvent(false);

            _runningThreadsCounter = 0;
            string[] chunks = new string[numberOfThreads];

            for (int i = 0; i < numberOfThreads; i++)
            {
                // Wait for the queue
                _threadLimiter.WaitOne();

                var startPage = i * CHUNK_SIZE;
                var endPage   = Math.Min(pageCount - 1, (i + 1) * CHUNK_SIZE - 1);

                // Prepare temp file name for the chunk
                chunks[i] = string.Format("temp-{0}-{1}.txt", startPage, endPage);

                // Increase the thread counter
                Interlocked.Increment(ref _runningThreadsCounter);

                ThreadPool.QueueUserWorkItem(new WaitCallback(ThreadProc),
                                             new object[] { i, allFinishedEvent, inputFileName, chunks[i], startPage, endPage });
            }

            // Wait for all threads
            allFinishedEvent.WaitOne();

            // Merge pieces into a single text file
            using (Stream resultFileStream = File.Create(resultFileName))
            {
                foreach (string tempFile in chunks)
                {
                    using (Stream srcStream = File.OpenRead(tempFile))
                        srcStream.CopyTo(resultFileStream);
                }
            }

            // Delete temp files
            foreach (string tempFile in chunks)
            {
                File.Delete(tempFile);
            }


            Console.WriteLine("All done in {0}.", stopwatch.Elapsed);
            Console.WriteLine();

            Console.WriteLine("Press any key to exit...");
            Console.ReadKey();
        }