// Used in code snippet 1. static void ProcessStructElement(SElement element, int indent) { if (!element.IsValid()) { return; } // Print out the type and title info, if any. PrintIndent(indent++); Console.Write("Type: " + element.GetType()); if (element.HasTitle()) { Console.Write(". Title: " + element.GetTitle()); } int num = element.GetNumKids(); for (int i = 0; i < num; ++i) { // Check is the kid is a leaf node (i.e. it is a ContentItem). if (element.IsContentItem(i)) { ContentItem cont = element.GetAsContentItem(i); ContentItem.Type type = cont.GetType(); Page page = cont.GetPage(); PrintIndent(indent); Console.Write("Content Item. Part of page #" + page.GetIndex()); PrintIndent(indent); switch (type) { case ContentItem.Type.e_MCID: case ContentItem.Type.e_MCR: Console.Write("MCID: " + cont.GetMCID()); break; case ContentItem.Type.e_OBJR: { Console.Write("OBJR "); Obj ref_obj = cont.GetRefObj(); if (ref_obj != null) { Console.Write("- Referenced Object#: " + ref_obj.GetObjNum()); } } break; default: break; } } else // the kid is another StructElement node. { ProcessStructElement(element.GetAsStructElem(i), indent); } } }
// Used in code snippet 2. static void ProcessElements(ElementReader reader) { Element element; while ((element = reader.Next()) != null) // Read page contents { // In this sample we process only paths & text, but the code can be // extended to handle any element type. Element.Type type = element.GetType(); if (type == Element.Type.e_path || type == Element.Type.e_text || type == Element.Type.e_path) { switch (type) { case Element.Type.e_path: // Process path ... Console.WriteLine(); Console.Write("PATH: "); break; case Element.Type.e_text: // Process text ... Console.WriteLine(); Console.WriteLine("TEXT: " + element.GetTextString()); break; case Element.Type.e_form: // Process form XObjects Console.WriteLine(); Console.Write("FORM XObject: "); //reader.FormBegin(); //ProcessElements(reader); //reader.End(); break; } // Check if the element is associated with any structural element. // Content items are leaf nodes of the structure tree. SElement struct_parent = element.GetParentStructElement(); if (struct_parent.IsValid()) { // Print out the parent structural element's type, title, and object number. Console.Write(" Type: " + struct_parent.GetType() + ", MCID: " + element.GetStructMCID()); if (struct_parent.HasTitle()) { Console.Write(". Title: " + struct_parent.GetTitle()); } Console.Write(", Obj#: " + struct_parent.GetSDFObj().GetObjNum()); } } } }
// Used in code snippet 3. static void ProcessStructElement2(SElement element, Hashtable mcid_doc_map, int indent) { if (!element.IsValid()) { return; } // Print out the type and title info, if any. PrintIndent(indent); Console.Write("<" + element.GetType()); if (element.HasTitle()) { Console.Write(" title=\"" + element.GetTitle() + "\""); } Console.Write(">"); int num = element.GetNumKids(); for (int i = 0; i < num; ++i) { if (element.IsContentItem(i)) { ContentItem cont = element.GetAsContentItem(i); if (cont.GetType() == ContentItem.Type.e_MCID) { int page_num = cont.GetPage().GetIndex(); if (mcid_doc_map.ContainsKey(page_num)) { Hashtable mcid_page_map = (Hashtable)(mcid_doc_map[page_num]); int mcid = cont.GetMCID(); if (mcid_page_map.ContainsKey(mcid)) { Console.Write(mcid_page_map[mcid]); } } } } else // the kid is another StructElement node. { ProcessStructElement2(element.GetAsStructElem(i), mcid_doc_map, indent + 1); } } PrintIndent(indent); Console.Write("</" + element.GetType() + ">"); }