Пример #1
0
    public static void Main(String[] args)
    {
        /* Global option list. */
        string globaloptlist = "searchpath={{../data} {../../data}}";

        /* Document specific option list. */
        string docoptlist = "";

        /* Page-specific option list. */
        string pageoptlist = "granularity=word";

        FileStream   outfile;
        StreamWriter outfp;

        if (args.Length != 2)
        {
            Console.WriteLine("usage: glyphinfo <infilename> <outfilename>");
            return;
        }

        outfile = File.Create(args.GetValue(1).ToString());
        outfp   = new StreamWriter(outfile, System.Text.Encoding.UTF8);

        TET tet = null;

        try
        {
            tet = new TET();

            tet.set_option(globaloptlist);

            int doc = tet.open_document(args[0], docoptlist);

            if (doc == -1)
            {
                Console.WriteLine("Error " + tet.get_errnum() + " in "
                                  + tet.get_apiname() + "(): " + tet.get_errmsg());
                return;
            }

            /* get number of pages in the document */
            int n_pages = (int)tet.pcos_get_number(doc, "length:pages");

            /* Loop over pages in the document */
            for (int pageno = 1; pageno <= n_pages; ++pageno)
            {
                string text;
                int    page;
                int    previouscolor = -1;

                page = tet.open_page(doc, pageno, pageoptlist);

                if (page == -1)
                {
                    Console.WriteLine("Error " + tet.get_errnum() + " in "
                                      + tet.get_apiname() + "() on page "
                                      + pageno + ": " + tet.get_errmsg());
                    continue;                        /* try next page */
                }

                /* Administrative information */
                outfp.WriteLine("[ Document: '" +
                                tet.pcos_get_string(doc, "filename") + "' ]");

                outfp.WriteLine("[ Document options: '" + docoptlist + "' ]");

                outfp.WriteLine("[ Page options: '" + pageoptlist + "' ]");

                outfp.WriteLine("[ ----- Page " + pageno + " ----- ]");

                /* Retrieve all text fragments */
                while ((text = tet.get_text(page)) != null)
                {
                    /* print the retrieved text */
                    outfp.WriteLine("[" + text + "]");

                    /* Loop over all glyphs and print their details */
                    while (tet.get_char_info(page) != -1)
                    {
                        string str;
                        string fontname;

                        /* Fetch the font name with pCOS (based on its ID) */
                        fontname = tet.pcos_get_string(doc,
                                                       "fonts[" + tet.fontid + "]/name");

                        /* Print the character */
                        str = String.Format("U+{0}", tet.uv.ToString("X4"));

                        /* ...and its UTF8 representation */
                        str = str + String.Format(" '" + (char)(tet.uv) + "'");

                        /* Print font name, size, and position */
                        str = str + String.Format(" {0} size={1} x={2} y={3}",
                                                  fontname, tet.fontsize.ToString("f2"),
                                                  tet.x.ToString("f2"), tet.y.ToString("f2"));
                        /* Print the color id */
                        str = str + String.Format(" colorid={0}", tet.colorid);

                        /* check wheather the text color changes */
                        if (tet.colorid != previouscolor)
                        {
                            str           = print_color_value(str, tet, doc, tet.colorid);
                            previouscolor = tet.colorid;
                        }
                        /* Examine the "type" member */
                        if (tet.type == 1)
                        {
                            str = str + " ligature_start";
                        }

                        else if (tet.type == 10)
                        {
                            str = str + " ligature_cont";
                        }

                        /* Separators are only inserted for granularity > word*/
                        else if (tet.type == 12)
                        {
                            str = str + " inserted";
                        }

                        /* Examine the bit flags in the "attributes" member */
                        const int ATTR_NONE    = 0;
                        const int ATTR_SUB     = 1;
                        const int ATTR_SUP     = 2;
                        const int ATTR_DROPCAP = 4;
                        const int ATTR_SHADOW  = 8;
                        const int ATTR_DH_PRE  = 16;
                        const int ATTR_DH_ARTF = 32;
                        const int ATTR_DH_POST = 64;

                        if (tet.attributes != ATTR_NONE)
                        {
                            if ((tet.attributes & ATTR_SUB) == ATTR_SUB)
                            {
                                str = str + "/sub";
                            }
                            if ((tet.attributes & ATTR_SUP) == ATTR_SUP)
                            {
                                str = str + "/sup";
                            }
                            if ((tet.attributes & ATTR_DROPCAP) == ATTR_DROPCAP)
                            {
                                str = str + "/dropcap";
                            }
                            if ((tet.attributes & ATTR_SHADOW) == ATTR_SHADOW)
                            {
                                str = str + "/shadow";
                            }
                            if ((tet.attributes & ATTR_DH_PRE) == ATTR_DH_PRE)
                            {
                                str = str + "/dehyphenation_pre";
                            }
                            if ((tet.attributes & ATTR_DH_ARTF) == ATTR_DH_ARTF)
                            {
                                str = str + "/dehyphenation_artifact";
                            }
                            if ((tet.attributes & ATTR_DH_POST) == ATTR_DH_POST)
                            {
                                str = str + "/dehyphenation_post";
                            }
                        }
                        outfp.WriteLine(str);
                    }
                    outfp.WriteLine("");
                }

                if (tet.get_errnum() != 0)
                {
                    Console.WriteLine("Error " + tet.get_errnum() + " in "
                                      + tet.get_apiname() + "() on page "
                                      + pageno + ": " + tet.get_errmsg());
                }

                tet.close_page(page);
            }

            tet.close_document(doc);
        }
        catch (TETException e)
        {
            Console.WriteLine("Error " + e.get_errnum() + " in "
                              + e.get_apiname() + "(): " + e.get_errmsg());
        }
        catch (Exception e)
        {
            Console.WriteLine("General Exception: " + e.ToString());
        }
        finally
        {
            if (tet != null)
            {
                tet.Dispose();
            }
        }
    }
    public static void Main(String[] args)
    {
        /* Global option list. */
        string globaloptlist = "searchpath={{../data} {../../data}}";

        /* Document specific option list. */
        string docoptlist = "";

        /* Page-specific option list. */
        string pageoptlist = "granularity=line";

        /* Search text with at least this size (use 0 to catch all sizes). */
        double fontsizetrigger = 10;

        /* Catch text where the font name contains this string (use empty string
         * to catch all font names).
         */
        String fontnametrigger = "Bold";

        TET tet    = null;
        int pageno = 0;

        if (args.Length != 1)
        {
            Console.WriteLine("usage: fontfilter <infilename>");
            return;
        }

        try
        {
            tet = new TET();
            tet.set_option(globaloptlist);

            int doc = tet.open_document(args[0], docoptlist);
            if (doc == -1)
            {
                Console.WriteLine("Error " + tet.get_errnum() + " in "
                                  + tet.get_apiname() + "(): " + tet.get_errmsg());
                return;
            }

            /* Loop over pages in the document */
            int n_pages = (int)tet.pcos_get_number(doc, "length:pages");
            for (pageno = 1; pageno <= n_pages; ++pageno)
            {
                int page = tet.open_page(doc, pageno, pageoptlist);

                if (page == -1)
                {
                    Console.WriteLine("Error " + tet.get_errnum() + " in "
                                      + tet.get_apiname() + "(): " + tet.get_errmsg());
                    return; /* try next page */
                }

                /* Retrieve all text fragments for the page */
                String text;
                while ((text = tet.get_text(page)) != null)
                {
                    /* Loop over all characters */
                    int ci;
                    while ((ci = tet.get_char_info(page)) != -1)
                    {
                        /* We need only the font name and size; the text
                         * position could be fetched from tet.x and tet.y.
                         */
                        String fontname = tet.pcos_get_string(doc,
                                                              "fonts[" + tet.fontid + "]/name");

                        /* Check whether we found a match */
                        if (tet.fontsize >= fontsizetrigger &&
                            fontname.IndexOf(fontnametrigger) != -1)
                        {
                            /* print the retrieved font name, size, and text */
                            Console.WriteLine("[{0} {1:0.00}] {2}", fontname,
                                              tet.fontsize, text);
                        }

                        /* In this sample we check only the first character of
                         * each fragment.
                         */
                        break;
                    }
                }

                if (tet.get_errnum() != 0)
                {
                    Console.WriteLine("Error " + tet.get_errnum() + " in "
                                      + tet.get_apiname() + "(): " + tet.get_errmsg());
                }

                tet.close_page(page);
            }

            tet.close_document(doc);
        }
        catch (TETException e)
        {
            if (pageno == 0)
            {
                Console.WriteLine("Error " + e.get_errnum() + " in "
                                  + e.get_apiname() + "(): " + e.get_errmsg() + "\n");
            }
            else
            {
                Console.WriteLine("Error " + e.get_errnum() + " in "
                                  + e.get_apiname() + "() on page " + pageno + ": "
                                  + e.get_errmsg() + "\n");
            }
        }
        catch (Exception e)
        {
            Console.WriteLine("General Exception: " + e.ToString());
        }
        finally
        {
            tet.Dispose();
        }
    }