/**
     * Extract text from a document for which a TET handle is already available.
     *
     * @param tet
     *            The TET object
     * @param doc
     *            A valid TET document handle
     * @param outfp
     *            Output file handle
     *
     * @throws TETException
     * @throws IOException
     */
    static void extract_text(TET tet, int doc, BinaryWriter outfp)
    {
        UnicodeEncoding unicode = new UnicodeEncoding(false, true);

        /*
         * Get number of pages in the document.
         */
        int n_pages = (int)tet.pcos_get_number(doc, "length:pages");

        /* loop over pages */
        for (int pageno = 1; pageno <= n_pages; ++pageno)
        {
            String text;
            int    page;

            page = tet.open_page(doc, pageno, pageoptlist);

            if (page == -1)
            {
                Console.WriteLine("Error " + tet.get_errnum() + " in  "
                                  + tet.get_apiname() + "() on page " + pageno + ": "
                                  + tet.get_errmsg());
                continue; /* try next page */
            }

            /*
             * Retrieve all text fragments; This loop is actually not required
             * for granularity=page, but must be used for other granularities.
             */
            while ((text = tet.get_text(page)) != null)
            {
                outfp.Write(unicode.GetBytes(text)); // print the retrieved text

                /* print a separator between chunks of text */
                outfp.Write(unicode.GetBytes(separator));
            }

            if (tet.get_errnum() != 0)
            {
                Console.WriteLine("Error " + tet.get_errnum() + " in  "
                                  + tet.get_apiname() + "() on page " + pageno + ": "
                                  + tet.get_errmsg());
            }

            tet.close_page(page);
        }
    }
예제 #2
0
    static int Main(string[] args)
    {
        /* global option list */
        string globaloptlist = "searchpath={{../data} {../../data}}";

        /* document-specific  option list */
        string docoptlist = "";

        /* page-specific option list  e.g
         * "imageanalysis={merge={gap=1} smallimages={maxwidth=20}}"
         */
        string pageoptlist = "";


        TET    tet;
        int    pageno = 0;
        string outfilebase;

        if (args.Length != 1)
        {
            Console.WriteLine("usage: image_resources <filename>");
            return(2);
        }

        outfilebase = args.GetValue(0).ToString();
        if ((outfilebase.Length > 4) && (outfilebase.Substring(outfilebase.Length - 4).Equals(".pdf")) || (outfilebase.Substring(outfilebase.Length - 4).Equals(".PDF")))
        {
            outfilebase = outfilebase.Substring(0, outfilebase.Length - 4);
        }

        tet = new TET();

        try
        {
            int n_pages;

            tet.set_option(globaloptlist);

            int doc = tet.open_document(args.GetValue(0).ToString(), docoptlist);

            if (doc == -1)
            {
                Console.WriteLine("Error {0} in {1}(): {2}",
                                  tet.get_errnum(), tet.get_apiname(), tet.get_errmsg());
                return(2);
            }


            /* Images will only be merged upon opening a page.
             * In order to enumerate all merged image resources
             * we open all pages before extracting the images.
             */

            /* get number of pages in the document */
            n_pages = (int)tet.pcos_get_number(doc, "length:pages");

            /* Loop over all pages to trigger image merging */
            for (pageno = 1; pageno <= n_pages; ++pageno)
            {
                string text;
                int    page;

                page = tet.open_page(doc, pageno, pageoptlist);

                if (page == -1)
                {
                    Console.WriteLine("Error {0} in {1}() on page {2}: {3}",
                                      tet.get_errnum(), tet.get_apiname(), pageno,
                                      tet.get_errmsg());
                    continue;                    /* process next page */
                }

                if (tet.get_errnum() != 0)
                {
                    Console.WriteLine("Error {0} in {1}() on page {2}: {3}",
                                      tet.get_errnum(), tet.get_apiname(), pageno,
                                      tet.get_errmsg());
                }
                tet.close_page(page);
            }

            int imageid, n_images;

            /* Get the number of images in the document */
            n_images = (int)tet.pcos_get_number(doc, "length:images");

            /* Loop over image resources in the document */
            for (imageid = 0; imageid < n_images; ++imageid)
            {
                string imageoptlist;
                /* Skiop images which have been consumed by merging */
                int mergetype = (int)tet.pcos_get_number(doc,
                                                         "images[" + imageid + "]/mergetype");

                if (mergetype == 2)
                {
                    continue;
                }

                /* Skip small images (see "smallimages" option) */
                if (tet.pcos_get_number(doc, "images[" + imageid + "]/small") > 0)
                {
                    continue;
                }
                /* Report image details: pixel geometry, color space etc . */
                report_image_info(tet, doc, imageid);

                /* Write image data to file */

                imageoptlist = " filename={" + outfilebase + "_I" + imageid + "}";

                if (tet.write_image_file(doc, imageid, imageoptlist) == -1)
                {
                    Console.WriteLine(
                        "Error {0} in {1}(): {2}",
                        tet.get_errnum(), tet.get_apiname(), tet.get_errmsg());
                    continue;              /* process next image */
                }
            }
            tet.close_document(doc);
        }
        catch (TETException e) {
            /* caught exception thrown by TET */
            Console.WriteLine("Error {0} in {1}(): {2}",
                              e.get_errnum(), e.get_apiname(), e.get_errmsg());
            return(2);
        }
        catch (Exception e)
        {
            Console.WriteLine("General Exception: " + e.ToString());
            return(2);
        }
        finally
        {
            if (tet != null)
            {
                tet.Dispose();
            }
        }

        return(0);
    }
    /**
     * Open a named physical or virtual file, extract the text from it, search
     * for document or page attachments, and process these recursively. Either
     * filename must be supplied for physical files, or data+length from which a
     * virtual file will be created. The caller cannot create the PVF file since
     * we create a new TET object here in case an exception happens with the
     * embedded document - the caller can happily continue with his TET object
     * even in case of an exception here.
     *
     * @param outfp
     * @param filename
     * @param realname
     * @param data
     *
     * @return 0 if successful, otherwise a non-null code to be used as exit
     *         status
     */
    static int process_document(BinaryWriter outfp, String filename, String realname,
                                byte[] data)
    {
        int retval = 0;
        TET tet    = null;

        try
        {
            String pvfname = "/pvf/attachment";

            tet = new TET();

            /*
             * Construct a PVF file if data instead of a filename was provided
             */
            if (filename == null || filename.Length == 0)
            {
                tet.create_pvf(pvfname, data, "");
                filename = pvfname;
            }

            tet.set_option(globaloptlist);

            int doc = tet.open_document(filename, docoptlist);

            if (doc == -1)

            {
                Console.WriteLine("Error " + tet.get_errnum() + " in  "
                                  + tet.get_apiname() + "() (source: attachment '"
                                  + realname + "'): " + tet.get_errmsg());

                retval = 5;
            }
            else
            {
                process_document(outfp, tet, doc);
            }

            /*
             * If there was no PVF file deleting it won't do any harm
             */
            tet.delete_pvf(pvfname);
        }
        catch (TETException e)
        {
            Console.WriteLine("Error " + e.get_errnum() + " in  "
                              + e.get_apiname() + "() (source: attachment '" + realname
                              + "'): " + e.get_errmsg());
            retval = 1;
        }
        catch (Exception e)
        {
            Console.WriteLine("General Exception: " + e.ToString());
            retval = 1;
        }
        finally
        {
            if (tet != null)
            {
                tet.Dispose();
            }
        }


        return(retval);
    }
예제 #4
0
    static int Main(string[] args)
    {
        /* global option list */
        string globaloptlist = "searchpath={{../data} {../../data}}";

        /* document-specific  option list */
        string docoptlist = "";

        /* page-specific option list  e.g
         * "imageanalysis={merge={gap=1} smallimages={maxwidth=20}}"
         */
        string pageoptlist = "";


        TET    tet;
        int    pageno = 0;
        string outfilebase;

        if (args.Length != 1)
        {
            Console.WriteLine("usage: image_resources <filename>");
            return(2);
        }

        outfilebase = args.GetValue(0).ToString();
        if ((outfilebase.Length > 4) && (outfilebase.Substring(outfilebase.Length - 4).Equals(".pdf")) || (outfilebase.Substring(outfilebase.Length - 4).Equals(".PDF")))
        {
            outfilebase = outfilebase.Substring(0, outfilebase.Length - 4);
        }

        tet = new TET();

        try
        {
            int n_pages;

            tet.set_option(globaloptlist);

            int doc = tet.open_document(args.GetValue(0).ToString(), docoptlist);

            if (doc == -1)
            {
                Console.WriteLine("Error {0} in {1}(): {2}",
                                  tet.get_errnum(), tet.get_apiname(), tet.get_errmsg());
                return(2);
            }
            /* Get number of pages in the document */
            n_pages = (int)tet.pcos_get_number(doc, "length:pages");

            /* Loop over pages and extract images  */
            for (pageno = 1; pageno <= n_pages; ++pageno)
            {
                int page;
                int imagecount = 0;

                page = tet.open_page(doc, pageno, pageoptlist);

                if (page == -1)
                {
                    Console.WriteLine("Error {0} in {1}() on page {2}: {3}",
                                      tet.get_errnum(), tet.get_apiname(), pageno, tet.get_errmsg());
                    continue; /* try next page */
                }

                /*
                 * Retrieve all images on the page
                 */
                while ((tet.get_image_info(page)) == 1)
                {
                    String imageoptlist;
                    int    maskid;

                    imagecount++;

                    /* Report image details: pixel geometry, color space etc. */
                    report_image_info(tet, doc, tet.imageid);

                    /* Report placement geometry */
                    Console.WriteLine("  placed on page " + pageno +
                                      " at position (" + tet.x.ToString("f2") + ", " + tet.y.ToString("f2") + "): " +
                                      (int)tet.width + "x" + (int)tet.height + "pt, alpha=" + tet.alpha + ", beta=" +
                                      tet.beta);
                    /* Write image data to file */
                    imageoptlist = "filename={" + outfilebase + "_p" + pageno + "_" + imagecount + "_I" + tet.imageid + "}";

                    if (tet.write_image_file(doc, tet.imageid, imageoptlist) == -1)
                    {
                        Console.WriteLine("\nError [" + tet.get_errnum() +
                                          " in " + tet.get_apiname() + "(): " + tet.get_errmsg());
                        continue; /* try next image */
                    }

                    /* Check whether the image has a mask attached... */
                    maskid = (int)tet.pcos_get_number(doc,
                                                      "images[" + tet.imageid + "]/maskid");

                    /* and retrieve it if present */
                    if (maskid != -1)
                    {
                        Console.WriteLine("  masked with ");
                        report_image_info(tet, doc, maskid);

                        imageoptlist = "filename={" + outfilebase + "_p" + pageno + "_" + imagecount + "_I" + tet.imageid + "mask_I" + maskid + "}";

                        if (tet.write_image_file(doc, tet.imageid, imageoptlist) == -1)
                        {
                            Console.WriteLine("\nError [" + tet.get_errnum() +
                                              " in " + tet.get_apiname() +
                                              "() for mask image: " + tet.get_errmsg());
                            continue; /* try next image */
                        }
                    }

                    if (tet.get_errnum() != 0)
                    {
                        Console.WriteLine("Error {0} in {1}() on page {2}: {3}",
                                          tet.get_errnum(), tet.get_apiname(), pageno, tet.get_errmsg());
                    }
                }
                tet.close_page(page);
            }

            tet.close_document(doc);
        }
        catch (TETException e)
        {
            /* caught exception thrown by TET */
            Console.WriteLine("Error {0} in {1}(): {2}",
                              e.get_errnum(), e.get_apiname(), e.get_errmsg());
            return(2);
        }
        catch (Exception e)
        {
            Console.WriteLine("General Exception: " + e.ToString());
            return(2);
        }
        finally
        {
            if (tet != null)
            {
                tet.Dispose();
            }
        }

        return(0);
    }
예제 #5
0
    static int Main(string[] args)
    {
        /* global option list */
        string globaloptlist = "searchpath={{../data} {../../data}}";

        /* document-specific  option list */
        string docoptlist = "";

        /* page-specific option list */
        string pageoptlist = "granularity=page";

        /* separator to emit after each chunk of text. This depends on the
         * applications needs; for granularity=word a space character may be useful.
         */
        string separator = "\n";

        TET          tet;
        FileStream   outfile;
        BinaryWriter w;
        int          pageno = 0;

        UnicodeEncoding unicode = new UnicodeEncoding(false, true);

        Byte[] byteOrderMark = unicode.GetPreamble();


        if (args.Length != 2)
        {
            Console.WriteLine("usage: extractor <infilename> <outfilename>");
            return(2);
        }

        outfile = File.Create(args.GetValue(1).ToString());
        w       = new BinaryWriter(outfile);
        w.Write(byteOrderMark);

        tet = new TET();

        try
        {
            int n_pages;

            tet.set_option(globaloptlist);

            int doc = tet.open_document(args.GetValue(0).ToString(), docoptlist);

            if (doc == -1)
            {
                Console.WriteLine("Error {0} in {1}(): {2}",
                                  tet.get_errnum(), tet.get_apiname(), tet.get_errmsg());
                return(2);
            }

            /* get number of pages in the document */
            n_pages = (int)tet.pcos_get_number(doc, "length:pages");

            /* loop over pages in the document */
            for (pageno = 1; pageno <= n_pages; ++pageno)
            {
                string text;
                int    page;

                page = tet.open_page(doc, pageno, pageoptlist);

                if (page == -1)
                {
                    Console.WriteLine("Error {0} in {1}() on page {2}: {3}",
                                      tet.get_errnum(), tet.get_apiname(), pageno,
                                      tet.get_errmsg());
                    continue;                    /* try next page */
                }

                /* Retrieve all text fragments; This is actually not required
                 * for granularity=page, but must be used for other
                 * granularities.
                 */
                while ((text = tet.get_text(page)) != null)
                {
                    /* print the retrieved text */
                    w.Write(unicode.GetBytes(text));

                    /* print a separator between chunks of text */
                    w.Write(unicode.GetBytes(separator));
                }

                if (tet.get_errnum() != 0)
                {
                    Console.WriteLine("Error {0} in {1}(): {3}",
                                      tet.get_errnum(), tet.get_apiname(), tet.get_errmsg());
                }
                tet.close_page(page);
            }
            tet.close_document(doc);
        }
        catch (TETException e) {
            /* caught exception thrown by TET */
            Console.WriteLine("Error {0} in {1}(): {2}",
                              e.get_errnum(), e.get_apiname(), e.get_errmsg());
        }
        catch (Exception e)
        {
            Console.WriteLine("General Exception: " + e.ToString());
            return(2);
        }
        finally
        {
            outfile.Close();
            if (tet != null)
            {
                tet.Dispose();
            }
        }

        return(0);
    }
예제 #6
0
    public static void Main(String[] args)
    {
        /* Global option list. */
        string globaloptlist = "searchpath={{../data} {../../data}}";

        /* Document specific option list. */
        string docoptlist = "";

        /* Page-specific option list. */
        string pageoptlist = "granularity=word";

        FileStream   outfile;
        StreamWriter outfp;

        if (args.Length != 2)
        {
            Console.WriteLine("usage: glyphinfo <infilename> <outfilename>");
            return;
        }

        outfile = File.Create(args.GetValue(1).ToString());
        outfp   = new StreamWriter(outfile, System.Text.Encoding.UTF8);

        TET tet = null;

        try
        {
            tet = new TET();

            tet.set_option(globaloptlist);

            int doc = tet.open_document(args[0], docoptlist);

            if (doc == -1)
            {
                Console.WriteLine("Error " + tet.get_errnum() + " in "
                                  + tet.get_apiname() + "(): " + tet.get_errmsg());
                return;
            }

            /* get number of pages in the document */
            int n_pages = (int)tet.pcos_get_number(doc, "length:pages");

            /* Loop over pages in the document */
            for (int pageno = 1; pageno <= n_pages; ++pageno)
            {
                string text;
                int    page;
                int    previouscolor = -1;

                page = tet.open_page(doc, pageno, pageoptlist);

                if (page == -1)
                {
                    Console.WriteLine("Error " + tet.get_errnum() + " in "
                                      + tet.get_apiname() + "() on page "
                                      + pageno + ": " + tet.get_errmsg());
                    continue;                        /* try next page */
                }

                /* Administrative information */
                outfp.WriteLine("[ Document: '" +
                                tet.pcos_get_string(doc, "filename") + "' ]");

                outfp.WriteLine("[ Document options: '" + docoptlist + "' ]");

                outfp.WriteLine("[ Page options: '" + pageoptlist + "' ]");

                outfp.WriteLine("[ ----- Page " + pageno + " ----- ]");

                /* Retrieve all text fragments */
                while ((text = tet.get_text(page)) != null)
                {
                    /* print the retrieved text */
                    outfp.WriteLine("[" + text + "]");

                    /* Loop over all glyphs and print their details */
                    while (tet.get_char_info(page) != -1)
                    {
                        string str;
                        string fontname;

                        /* Fetch the font name with pCOS (based on its ID) */
                        fontname = tet.pcos_get_string(doc,
                                                       "fonts[" + tet.fontid + "]/name");

                        /* Print the character */
                        str = String.Format("U+{0}", tet.uv.ToString("X4"));

                        /* ...and its UTF8 representation */
                        str = str + String.Format(" '" + (char)(tet.uv) + "'");

                        /* Print font name, size, and position */
                        str = str + String.Format(" {0} size={1} x={2} y={3}",
                                                  fontname, tet.fontsize.ToString("f2"),
                                                  tet.x.ToString("f2"), tet.y.ToString("f2"));
                        /* Print the color id */
                        str = str + String.Format(" colorid={0}", tet.colorid);

                        /* check wheather the text color changes */
                        if (tet.colorid != previouscolor)
                        {
                            str           = print_color_value(str, tet, doc, tet.colorid);
                            previouscolor = tet.colorid;
                        }
                        /* Examine the "type" member */
                        if (tet.type == 1)
                        {
                            str = str + " ligature_start";
                        }

                        else if (tet.type == 10)
                        {
                            str = str + " ligature_cont";
                        }

                        /* Separators are only inserted for granularity > word*/
                        else if (tet.type == 12)
                        {
                            str = str + " inserted";
                        }

                        /* Examine the bit flags in the "attributes" member */
                        const int ATTR_NONE    = 0;
                        const int ATTR_SUB     = 1;
                        const int ATTR_SUP     = 2;
                        const int ATTR_DROPCAP = 4;
                        const int ATTR_SHADOW  = 8;
                        const int ATTR_DH_PRE  = 16;
                        const int ATTR_DH_ARTF = 32;
                        const int ATTR_DH_POST = 64;

                        if (tet.attributes != ATTR_NONE)
                        {
                            if ((tet.attributes & ATTR_SUB) == ATTR_SUB)
                            {
                                str = str + "/sub";
                            }
                            if ((tet.attributes & ATTR_SUP) == ATTR_SUP)
                            {
                                str = str + "/sup";
                            }
                            if ((tet.attributes & ATTR_DROPCAP) == ATTR_DROPCAP)
                            {
                                str = str + "/dropcap";
                            }
                            if ((tet.attributes & ATTR_SHADOW) == ATTR_SHADOW)
                            {
                                str = str + "/shadow";
                            }
                            if ((tet.attributes & ATTR_DH_PRE) == ATTR_DH_PRE)
                            {
                                str = str + "/dehyphenation_pre";
                            }
                            if ((tet.attributes & ATTR_DH_ARTF) == ATTR_DH_ARTF)
                            {
                                str = str + "/dehyphenation_artifact";
                            }
                            if ((tet.attributes & ATTR_DH_POST) == ATTR_DH_POST)
                            {
                                str = str + "/dehyphenation_post";
                            }
                        }
                        outfp.WriteLine(str);
                    }
                    outfp.WriteLine("");
                }

                if (tet.get_errnum() != 0)
                {
                    Console.WriteLine("Error " + tet.get_errnum() + " in "
                                      + tet.get_apiname() + "() on page "
                                      + pageno + ": " + tet.get_errmsg());
                }

                tet.close_page(page);
            }

            tet.close_document(doc);
        }
        catch (TETException e)
        {
            Console.WriteLine("Error " + e.get_errnum() + " in "
                              + e.get_apiname() + "(): " + e.get_errmsg());
        }
        catch (Exception e)
        {
            Console.WriteLine("General Exception: " + e.ToString());
        }
        finally
        {
            if (tet != null)
            {
                tet.Dispose();
            }
        }
    }
    public static void Main(String[] args)
    {
        /* Global option list. */
        string globaloptlist = "searchpath={{../data} {../../data}}";

        /* Document specific option list. */
        string docoptlist = "";

        /* Page-specific option list. */
        string pageoptlist = "granularity=line";

        /* Search text with at least this size (use 0 to catch all sizes). */
        double fontsizetrigger = 10;

        /* Catch text where the font name contains this string (use empty string
         * to catch all font names).
         */
        String fontnametrigger = "Bold";

        TET tet    = null;
        int pageno = 0;

        if (args.Length != 1)
        {
            Console.WriteLine("usage: fontfilter <infilename>");
            return;
        }

        try
        {
            tet = new TET();
            tet.set_option(globaloptlist);

            int doc = tet.open_document(args[0], docoptlist);
            if (doc == -1)
            {
                Console.WriteLine("Error " + tet.get_errnum() + " in "
                                  + tet.get_apiname() + "(): " + tet.get_errmsg());
                return;
            }

            /* Loop over pages in the document */
            int n_pages = (int)tet.pcos_get_number(doc, "length:pages");
            for (pageno = 1; pageno <= n_pages; ++pageno)
            {
                int page = tet.open_page(doc, pageno, pageoptlist);

                if (page == -1)
                {
                    Console.WriteLine("Error " + tet.get_errnum() + " in "
                                      + tet.get_apiname() + "(): " + tet.get_errmsg());
                    return; /* try next page */
                }

                /* Retrieve all text fragments for the page */
                String text;
                while ((text = tet.get_text(page)) != null)
                {
                    /* Loop over all characters */
                    int ci;
                    while ((ci = tet.get_char_info(page)) != -1)
                    {
                        /* We need only the font name and size; the text
                         * position could be fetched from tet.x and tet.y.
                         */
                        String fontname = tet.pcos_get_string(doc,
                                                              "fonts[" + tet.fontid + "]/name");

                        /* Check whether we found a match */
                        if (tet.fontsize >= fontsizetrigger &&
                            fontname.IndexOf(fontnametrigger) != -1)
                        {
                            /* print the retrieved font name, size, and text */
                            Console.WriteLine("[{0} {1:0.00}] {2}", fontname,
                                              tet.fontsize, text);
                        }

                        /* In this sample we check only the first character of
                         * each fragment.
                         */
                        break;
                    }
                }

                if (tet.get_errnum() != 0)
                {
                    Console.WriteLine("Error " + tet.get_errnum() + " in "
                                      + tet.get_apiname() + "(): " + tet.get_errmsg());
                }

                tet.close_page(page);
            }

            tet.close_document(doc);
        }
        catch (TETException e)
        {
            if (pageno == 0)
            {
                Console.WriteLine("Error " + e.get_errnum() + " in "
                                  + e.get_apiname() + "(): " + e.get_errmsg() + "\n");
            }
            else
            {
                Console.WriteLine("Error " + e.get_errnum() + " in "
                                  + e.get_apiname() + "() on page " + pageno + ": "
                                  + e.get_errmsg() + "\n");
            }
        }
        catch (Exception e)
        {
            Console.WriteLine("General Exception: " + e.ToString());
        }
        finally
        {
            tet.Dispose();
        }
    }
예제 #8
0
    public static void Main(String[] args)
    {
        /* Global option list. */
        string globaloptlist = "searchpath={{../data} {../../data}}";

        /* Document specific option list. */
        string basedocoptlist = "";

        /* Page-specific option list. */

        /* Remove the tetml= option if you don't need font and geometry
         * information */
        string pageoptlist = "granularity=word tetml={glyphdetails={all}}";

        /* set this to true to generate TETML output in memory */
        bool inmemory = false;

        if (args.Length != 2)
        {
            Console.WriteLine("usage: tetml <pdffilename> <xmlfilename>");
            return;
        }

        TET tet = null;

        try
        {
            String docoptlist;

            tet = new TET();
            tet.set_option(globaloptlist);

            if (inmemory)
            {
                /*
                 * This program fetches the TETML data encoded in UTF-8.
                 * Subsequently the data is converted to a VisualBasic String,
                 * which is encoded in UTF-16.
                 * While it is not strictly necessary in case of this program, it
                 * is more clean to instruct TET to put 'encoding="UTF-16"' into
                 * the XML header.
                 */
                docoptlist = "tetml={encodingname=UTF-16} " + basedocoptlist;
            }
            else
            {
                docoptlist = "tetml={filename={" + args[1] + "}} "
                             + basedocoptlist;
            }

            int doc = tet.open_document(args[0], docoptlist);

            if (doc == -1)
            {
                Console.WriteLine("Error " + tet.get_errnum() + " in "
                                  + tet.get_apiname() + "(): " + tet.get_errmsg());
                return;
            }

            int n_pages = (int)tet.pcos_get_number(doc, "length:pages");

            /* Loop over pages in the document */
            for (int pageno = 1; pageno <= n_pages; ++pageno)
            {
                tet.process_page(doc, pageno, pageoptlist);
            }

            /* This could be combined with the last page-related call. */
            tet.process_page(doc, 0, "tetml={trailer}");

            if (inmemory)
            {
                /* Get the XML document as a byte array. */
                byte[] tetml = tet.get_tetml(doc, "");

                if (tetml == null)
                {
                    Console.WriteLine("tetml: couldn't retrieve XML data");
                    return;
                }

                /* Process the in-memory XML document to print out some
                 * information that is extracted with the sax_handler class.
                 */
                XmlDocument  xmldoc   = new XmlDocument();
                UTF8Encoding utf8_enc = new UTF8Encoding();
                String       stetml   = utf8_enc.GetString(tetml);
                xmldoc.LoadXml(stetml);

                XmlNodeList nodeList;
                XmlElement  root = xmldoc.DocumentElement;

                /* Create an XmlNamespaceManager for resolving namespaces. */
                XmlNamespaceManager nsmgr =
                    new XmlNamespaceManager(xmldoc.NameTable);
                nsmgr.AddNamespace("tet",
                                   "http://www.pdflib.com/XML/TET5/TET-5.0");

                nodeList = root.SelectNodes("//tet:Font", nsmgr);
                IEnumerator ienum = nodeList.GetEnumerator();
                while (ienum.MoveNext())
                {
                    XmlNode font = (XmlNode)ienum.Current;
                    XmlAttributeCollection attrColl = font.Attributes;

                    XmlAttribute name_attr =
                        (XmlAttribute)attrColl.GetNamedItem("name");
                    XmlAttribute type_attr =
                        (XmlAttribute)attrColl.GetNamedItem("type");
                    Console.WriteLine("Font " + name_attr.Value + " "
                                      + type_attr.Value);
                }
                nodeList = root.SelectNodes("//tet:Word", nsmgr);
                Console.WriteLine("Found " + nodeList.Count
                                  + " words in document");
            }

            tet.close_document(doc);
        }
        catch (TETException e)
        {
            Console.WriteLine("Error " + e.get_errnum() + " in "
                              + e.get_apiname() + "(): " + e.get_errmsg());
        }
        catch (Exception e)
        {
            Console.WriteLine("General Exception: " + e.ToString());
        }
        finally
        {
            if (tet != null)
            {
                tet.Dispose();
            }
        }
    }