/// <summary> /// Get the PAGE-XML (XML) string of the pages layout. /// </summary> /// <param name="page"></param> /// <param name="includePaths">Draw PdfPaths present in the page.</param> public string Get(Page page, bool includePaths) { lineCount = 0; wordCount = 0; glyphCount = 0; regionCount = 0; groupOrderCount = 0; orderedRegions = new List <PageXmlDocument.PageXmlRegionRefIndexed>(); PageXmlDocument pageXmlDocument = new PageXmlDocument() { Metadata = new PageXmlDocument.PageXmlMetadata() { Created = DateTime.UtcNow, LastChange = DateTime.UtcNow, Creator = "PdfPig", Comments = pageSegmenter.GetType().Name + "|" + wordExtractor.GetType().Name, }, PcGtsId = "pc-" + page.GetHashCode() }; pageXmlDocument.Page = ToPageXmlPage(page, includePaths); return(Serialize(pageXmlDocument)); }
static public void InitColumn() { if (Columns == null) { Columns = PageXmlDocument.ToDataGridViewColumns(); } }
void DesignWebBrowser_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) { SdsiteXmlDocument sdDoc = Service.Sdsite.CurrentDocument; PageXmlDocument pageDoc = sdDoc.GetPageDocumentById(PageId); this.DesignWebBrowser.Document.Body.InnerHtml = pageDoc.PageText; }
/// <summary> /// 加载相应的内容文章文件 /// </summary> public PagePropertyHelper(string pageID) { if (!string.IsNullOrEmpty(pageID)) { try { _pageTextDoc = Service.Sdsite.CurrentDocument.GetPageDocumentById(pageID) as PageXmlDocument; } catch { throw; } } else { _pageTextDoc.LoadXml(@"<?xml version=""1.0"" encoding=""utf-8"" ?> <content content_id="" content_title="" content_title_alias="""" created_time="""" design_summary="""" content_source="""" is_always_pub=""True"" pub_time="""" stop_time=""0001-1-1 0:00:00"" is_pub=""false"" is_modified=""true"" is_delete=""false"" pub_by="""" created_by="""" pub_by_alias="""" modify_by="""" modify_time="""" created_by_alias=""""> <article_text /> <article_summary></article_summary> <files> <file /> </files> <tags> </tags> </content>"); } }
public static void Run() { string pageFilePath = @"D:\MachineLearning\Document Layout Analysis\hocr\PAGE samples\aletheiaexamplepage_2019.xml"; // Glyph_Sample01_General.xml"; PageXmlDocument pageXml = PageXmlDocument.Deserialize(pageFilePath); var xml = pageXml.Serialize(); File.WriteAllText(Path.ChangeExtension(pageFilePath, "new.xml"), xml); }
void titleToolStripTextBox_Validated(object sender, EventArgs e) { SdsiteXmlDocument doc = Service.Sdsite.CurrentDocument; PageSimpleExXmlElement ele = doc.GetPageElementById(_htmlDesigner.PageId); ele.Title = titleToolStripTextBox.Text; doc.Save(); PageXmlDocument pageDoc = doc.GetPageDocumentById(_htmlDesigner.PageId); pageDoc.Title = titleToolStripTextBox.Text; pageDoc.Save(); }
private string Serialize(PageXmlDocument pageXmlDocument) { XmlSerializer serializer = new XmlSerializer(typeof(PageXmlDocument)); var settings = new XmlWriterSettings() { Encoding = System.Text.Encoding.UTF8, Indent = true, IndentChars = indentChar, }; using (var memoryStream = new System.IO.MemoryStream()) using (var xmlWriter = XmlWriter.Create(memoryStream, settings)) { serializer.Serialize(xmlWriter, pageXmlDocument); return(System.Text.Encoding.UTF8.GetString(memoryStream.ToArray())); } }
/// <summary> /// 将窗体值转入xml文件中 /// </summary> public void WritePageTextProp(PageTextPropertyItem property, string PageId) { PageXmlDocument doc = Service.Sdsite.CurrentDocument.GetPageDocumentById(PageId) as PageXmlDocument; PageSimpleExXmlElement ele = Service.Sdsite.CurrentDocument.GetPageElementById(PageId) as PageSimpleExXmlElement; ele.IsModified = true; doc.Title = property.Title; doc.PageTitleAlias = property.TitleAlias; doc.PageCreateTime = property.DeliverTime; doc.PageSummary = property.Summary; doc.Author = property.AuthorAlias; doc.AuthorAlias = property.AuthorAlias; doc.ModifyAlias = property.ModifyBy; doc.DesignSummary = property.DesignSummary; doc.PageKeywords = property.tag.ToArray(); doc.ContentSource = property.ContentSource; // doc.DocumentElement.Attributes["content_source"].Value = property.ContentSource; //doc.DocumentElement.Attributes["is_always_pub"].Value = property.IsAlwaysPub.ToString(); //doc.DocumentElement.Attributes["stop_time"].Value = property.EndPubTime; doc.Save(); Service.Sdsite.CurrentDocument.Save(); ///写入文章摘要 /* * * doc.DocumentElement.Attributes["content_source"].Value = property.ContentSource; * doc.DocumentElement.Attributes["is_always_pub"].Value = property.IsAlwaysPub.ToString(); * * doc.DocumentElement.Attributes["stop_time"].Value = property.EndPubTime; * * * XmlNode tagNode = ContentDoc.SelectSingleNode("/content/tags"); * WriteXmlTag(property.tag, tagNode); * * if (text != "") * { * ///重新存储 * XmlNode textNode = ContentDoc.SelectSingleNode("/content/article_text"); * textNode.RemoveAll(); * XmlCDataSection xmlData = ContentDoc.CreateCDataSection(text); * textNode.AppendChild(xmlData); * }*/ }
/// <summary> /// Get the PAGE-XML (XML) string of the pages layout. /// </summary> /// <param name="page"></param> /// <param name="includePaths">Draw <see cref="PdfPath"/>s present in the page.</param> private static string Get(CocoEntry page, Dictionary <int, string> categories) { PageXmlDocument pageXmlDocument = new PageXmlDocument() { Metadata = new PageXmlDocument.PageXmlMetadata() { Created = DateTime.UtcNow, LastChange = DateTime.UtcNow, Creator = "PublayNetConverter", Comments = "PubLayNet dataset" }, PcGtsId = "pc" + page.Id.ToString() }; pageXmlDocument.Page = ToPageXmlPage(page, categories); return(Serialize(pageXmlDocument)); }
protected override void OnLoad(EventArgs e) { Debug.Assert(!string.IsNullOrEmpty(_pageId)); PageSimpleExXmlElement ele = Service.Sdsite.CurrentDocument.GetPageElementById(_pageId); if (ele == null || !File.Exists(ele.AbsoluteFilePath)) { MessageService.Show("文件不存在,打开失败!", MessageBoxButtons.OK, MessageBoxIcon.Error); Timer timer = new Timer(); timer.Interval = 10; timer.Tick += delegate { timer.Stop(); timer.Dispose(); this.Close(); }; timer.Start(); return; } this._pageDoc = ele.GetIndexXmlDocument(); // this._pageEle = (PageElement)this._pageDoc.GetElementById(pageId); /wangmiao this.Text = ele.Title;//w Service.Sdsite.CurrentDocument.ElementTitleChanged += new EventHandler <ChangeTitleEventArgs>(CurrentDocument_ElementTitleChanged); //控件设置 _htmldesign = new HTMLDesignerEx(PageId); //_htmldesign.PageId = _pageId; //this.Controls.Add(_htmldesign.GetMainToolStrip()); //this.Controls.Add(_htmldesign.GetHtmlPanel()); this.Controls.Add(_htmldesign); string fileName = Service.Sdsite.CurrentDocument.GetPageElementById(_pageId).Title; //w this._htmldesign.PageTitle = fileName; //w //this._htmldesign.SetHtmlPanel().BringToFront(); this._designWebB = this._htmldesign.DesignWebBrowser; //IDoc2 = _htmldesign.Idoc2; base.OnLoad(e); }
/// <summary> /// 内容页面资料处理 /// </summary> /// <param name="filePath">文件路径, string pageText</param> /// <param name="fileContent">文件的正文</param> public PagePropertyPanel(string pageId) { InitializeComponent(); _pageID = pageId; PageSimpleExXmlElement pageEle = (PageSimpleExXmlElement)Service.Sdsite.CurrentDocument.GetElementById(pageId); string filePath = pageEle.AbsoluteFilePath; PageXmlDocument pageDoc = Service.Sdsite.CurrentDocument.GetPageDocumentById(pageId); if (pageDoc == null) { ///读取文章正文] mgr = new PagePropertyHelper(pageId); _pageText = mgr.ReadPageText(); } else { mgr = new PagePropertyHelper(pageId); _pageText = pageDoc.PageText; } }
public BuildSite() { ToHtmlHelper siteHelper = new ToHtmlHelper("", @"D:\_abc\myabc"); TmpltXmlDocument tmpltDoc = null; tmpltDoc.SaveXhtml(siteHelper); tmpltDoc.DeleteXhtml(siteHelper); PageXmlDocument pageDoc = null; pageDoc.SaveXhtml(siteHelper); pageDoc.DeleteXhtml(siteHelper); foreach (var item in tmpltDoc.GetSnipElementList()) { SnipXmlElement snip = (SnipXmlElement)item; snip.SaveXhtml(siteHelper); snip.DeleteXhtml(siteHelper); } }
public static void Run(string path) { PageXmlDocument pageXmlDocument = new PageXmlDocument() { Metadata = new PageXmlMetadata() { Created = DateTime.UtcNow, LastChange = DateTime.UtcNow, Creator = "PdfPig", Comments = "", // algo used in here }, Page = new PageXmlPage() { }, PcGtsId = "pc-" + path.GetHashCode() }; using (PdfDocument document = PdfDocument.Open(path)) { //var testAlto = AltoDocument.FromPdfDocument(document); for (var i = 0; i < document.NumberOfPages; i++) { Page pagePdf = document.GetPage(i + 1); pageXmlDocument.Page = FromPdfPage(pagePdf); //var words = pagePdf.GetWords(NearestNeighbourWordExtractor.Instance); //var pageWordsH = words.Where(x => x.TextDirection == TextDirection.Horizontal || x.TextDirection == TextDirection.Rotate180).ToArray(); //var blocks = RecursiveXYCut.Instance.GetBlocks(pageWordsH); } } File.WriteAllText(Path.ChangeExtension(path, "pagexml.xml"), pageXmlDocument.Serialize()); }
/// <summary> /// 设置需要搜索的文件集 /// </summary> private void GetSearchDocuments() { _searchDocuments.Clear(); KeyValuePair <ISearch, IMarkPosition> kv = new KeyValuePair <ISearch, IMarkPosition>(); switch (FindOptions.Singler.FindScope) { case FindScope.CurrentForm: //当前窗口 #region { if (GetFormDocument((BaseViewForm)_mainFindForm.ActiveMdiChild, out kv)) { _currentForm = kv.Value; _currentDocument = kv.Key; _searchDocuments.Add(kv.Key); } break; } #endregion case FindScope.AllOpenForm: //所有打开的窗体 #region { if (GetFormDocument((BaseViewForm)_mainFindForm.ActiveMdiChild, out kv)) { _currentForm = kv.Value; _currentDocument = kv.Key; _searchDocuments.Add(kv.Key); } foreach (BaseViewForm form in _mainFindForm.MdiChildren) { if (form == _mainFindForm.ActiveMdiChild) { continue; } if (GetFormDocument(form, out kv)) { _searchDocuments.Add(kv.Key); } } break; } #endregion case FindScope.WholeChannels: //所有的频道 #region { if (GetFormDocument((BaseViewForm)_mainFindForm.ActiveMdiChild, out kv)) { _currentForm = kv.Value; _currentDocument = kv.Key; } // string[] tmpltIds = Service.Sdsite.CurrentDocument.GetAllTmpltId(); string[] pageIds = Service.Sdsite.CurrentDocument.GetAllPageId(); //foreach (string tmpletId in tmpltIds) //{ // TmpltXmlDocument tmpltDoc = Service.Sdsite.CurrentDocument.GetTmpltDocumentById(tmpletId); // if ((tmpltDoc as ISearch) == kv.Key) // { // continue; // } // if (tmpltDoc != null) // { // _searchDocuments.Add(tmpltDoc); // } //} foreach (string pageId in pageIds) { PageXmlDocument pageDoc = Service.Sdsite.CurrentDocument.GetPageDocumentById(pageId); if ((pageDoc as ISearch) == kv.Key) { continue; } if (pageDoc != null) { _searchDocuments.Add(pageDoc); } } break; } #endregion default: break; } }
/// <summary> /// 生成页面文件 /// </summary> /// <param name="simpleEle"></param> public void BuildPageFile(SimpleExIndexXmlElement simpleEle) { //添加模板到Dictionary字典中 AddElementToDictionary(simpleEle); PageSimpleExXmlElement pageEle = (PageSimpleExXmlElement)simpleEle; string pageId = pageEle.Id; string channelId = pageEle.OwnerChannelElement.Id; //页面直属频道的Id PageXmlDocument pageDoc = SdsiteDocument.GetPageDocumentById(pageId); if (pageEle.IsDeletedRecursive) { //如果 是索引页面,删除本身的文件 filename.sdpage index.sdpage,其它页面则还要删除 //及一系列的关于页面的文件filename_head.inc filename_content.inc filename_list.inc pageDoc.DeleteXhtml(ToHtmlHelperObj); //此时就要看与此文件关联的其它文件 if (AsTmpltList(channelId)) { NewMethod(channelId); } //文件删除,则其它链接到此文件文件不做处理 } else if (!pageEle.IsAlreadyPublished) //新建 { //如果 是索引页面,新建本身的文件 filename.sdpage 及index.shtml //,其它类型页面则要新建一系列的关于页面的文件filename_head.inc filename_content.inc //todo:... //此时,就要查看与此关联的文件,是如何 if (AsTmpltList(channelId)) { //生成filename_list.inc文件 NewMethod(channelId); //此处不用考虑其路径问题 } } else { //文件路径是否改变(与重命名有密切关系) if (pageEle.IsChangedPosition) { //先将文件移动到新的位置 } if (pageEle.IsModified) { //页面本身的一些文件重新生成 //关联页面 NewMethod(channelId); //路径关联的一些页面的重新生成 //如果发生CustomID的变化,找到链接到此文件的所有页面,然后重新生成 } else { //如果是模板的内容布局有所在改变,则content还是要重新生成 } } }
static public RowEx GetDataGridViewRow(DataGridViewColumn[] columns, PageXmlDocument pageDoc) { RowEx row = new RowEx(); row.Tag = pageDoc.Id; DataGridViewCell cell; SortedDictionary <int, DataGridViewCell> cellDic = new SortedDictionary <int, DataGridViewCell>(); foreach (DataGridViewColumn column in columns) { int index = column.DisplayIndex; object obj = (pageDoc.GetType().GetProperty(column.Name).GetValue(pageDoc, null)); bool tempBool; if (bool.TryParse(obj.ToString(), out tempBool)) { cell = new DataGridViewImageCell(); switch (tempBool.ToString().ToUpper()) { case "TRUE": switch (column.Name) { case "IsOnceAd": cell.ToolTipText = "TRUE"; cell.Value = ResourceService.GetResourceImage("page.img.oldadbitmap"); //GetImage(@"Image\del.png"); break; case "IsAd": cell.ToolTipText = "TRUE"; cell.Value = ResourceService.GetResourceImage("page.img.adbitmap"); // GetImage(@"Image\ad.png"); break; case "IsPublish": cell.ToolTipText = "TRUE"; cell.Value = ResourceService.GetResourceImage("page.img.publishbitmap"); // GetImage(@"Image\public.png"); break; default: cell.ToolTipText = "TRUE"; cell.Value = ResourceService.GetResourceImage("page.img.savebitmap"); // GetImage(@"Image\save.png"); break; } break; case "FALSE": switch (column.Name) { case "IsOnceAd": cell.ToolTipText = "FALSE"; cell.Value = ResourceService.GetResourceImage("page.img.notoldbitmap"); //GetImage(@"Image\del.png"); break; case "IsAd": cell.ToolTipText = "FALSE"; cell.Value = ResourceService.GetResourceImage("page.img.addbitmap"); // GetImage(@"Image\notAd.png"); break; case "IsPublish": cell.ToolTipText = "FALSE"; cell.Value = ResourceService.GetResourceImage("page.img.notPublishbitmap"); // GetImage(@"Image\notPublic.png"); break; default: cell.ToolTipText = "FALSE"; cell.Value = ResourceService.GetResourceImage("page.img.notsavebitmap"); // GetImage(@"Image\save.png"); break; } break; } } else if (obj.GetType().IsEnum&& obj.GetType() == typeof(PageSimpleState)) { PageSimpleState state = (PageSimpleState)obj; cell = new DataGridViewImageCell(); switch (state) { case PageSimpleState.New: cell.ToolTipText = "NEW"; cell.Value = ResourceService.GetResourceImage("page.img.newbitmap");// GetImage(@"Image\new.png"); break; case PageSimpleState.Modified: cell.ToolTipText = "MODIFIED"; cell.Value = ResourceService.GetResourceImage("page.img.modifedbitmap");//GetImage(@"Image\modified.png"); break; case PageSimpleState.NotModified: cell.ToolTipText = "NOTMODIFIED"; cell.Value = ResourceService.GetResourceImage("page.img.notModifiedbitmap");//GetImage(@"Image\notModified.png"); break; default: Debug.Assert(false); break; } } else { cell = new DataGridViewTextBoxCell(); cell.Value = obj; cell.ToolTipText = cell.Value.ToString(); if (column.Name.Equals("AdTime")) { if (obj.ToString().Equals("9999-12-31 23:31:59")) { cell.Value = null; cell.ToolTipText = null; } } } cell.Tag = column.Name; cellDic.Add(index, cell); } foreach (KeyValuePair <int, DataGridViewCell> pair in cellDic) { row.Cells.Add(pair.Value); } return(row); }