Exemple #1
0
        /// <summary>
        /// Get the PAGE-XML (XML) string of the pages layout.
        /// </summary>
        /// <param name="page"></param>
        /// <param name="includePaths">Draw PdfPaths present in the page.</param>
        public string Get(Page page, bool includePaths)
        {
            lineCount       = 0;
            wordCount       = 0;
            glyphCount      = 0;
            regionCount     = 0;
            groupOrderCount = 0;
            orderedRegions  = new List <PageXmlDocument.PageXmlRegionRefIndexed>();

            PageXmlDocument pageXmlDocument = new PageXmlDocument()
            {
                Metadata = new PageXmlDocument.PageXmlMetadata()
                {
                    Created    = DateTime.UtcNow,
                    LastChange = DateTime.UtcNow,
                    Creator    = "PdfPig",
                    Comments   = pageSegmenter.GetType().Name + "|" + wordExtractor.GetType().Name,
                },
                PcGtsId = "pc-" + page.GetHashCode()
            };

            pageXmlDocument.Page = ToPageXmlPage(page, includePaths);

            return(Serialize(pageXmlDocument));
        }
Exemple #2
0
 static public void InitColumn()
 {
     if (Columns == null)
     {
         Columns = PageXmlDocument.ToDataGridViewColumns();
     }
 }
Exemple #3
0
        void DesignWebBrowser_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
        {
            SdsiteXmlDocument sdDoc   = Service.Sdsite.CurrentDocument;
            PageXmlDocument   pageDoc = sdDoc.GetPageDocumentById(PageId);

            this.DesignWebBrowser.Document.Body.InnerHtml = pageDoc.PageText;
        }
Exemple #4
0
 /// <summary>
 /// 加载相应的内容文章文件
 /// </summary>
 public PagePropertyHelper(string pageID)
 {
     if (!string.IsNullOrEmpty(pageID))
     {
         try
         {
             _pageTextDoc = Service.Sdsite.CurrentDocument.GetPageDocumentById(pageID) as PageXmlDocument;
         }
         catch
         {
             throw;
         }
     }
     else
     {
         _pageTextDoc.LoadXml(@"<?xml version=""1.0"" encoding=""utf-8"" ?>
         <content content_id="" content_title="" 
         content_title_alias="""" created_time="""" design_summary="""" content_source="""" is_always_pub=""True"" pub_time=""""
         stop_time=""0001-1-1 0:00:00"" is_pub=""false"" is_modified=""true"" is_delete=""false"" pub_by="""" created_by=""""
         pub_by_alias="""" modify_by="""" modify_time="""" created_by_alias="""">
           <article_text />
           <article_summary></article_summary>
           <files>
             <file />
           </files>
           <tags>
           </tags>
         </content>");
     }
 }
        public static void Run()
        {
            string pageFilePath = @"D:\MachineLearning\Document Layout Analysis\hocr\PAGE samples\aletheiaexamplepage_2019.xml"; // Glyph_Sample01_General.xml";

            PageXmlDocument pageXml = PageXmlDocument.Deserialize(pageFilePath);

            var xml = pageXml.Serialize();

            File.WriteAllText(Path.ChangeExtension(pageFilePath, "new.xml"), xml);
        }
Exemple #6
0
        void titleToolStripTextBox_Validated(object sender, EventArgs e)
        {
            SdsiteXmlDocument      doc = Service.Sdsite.CurrentDocument;
            PageSimpleExXmlElement ele = doc.GetPageElementById(_htmlDesigner.PageId);

            ele.Title = titleToolStripTextBox.Text;
            doc.Save();

            PageXmlDocument pageDoc = doc.GetPageDocumentById(_htmlDesigner.PageId);

            pageDoc.Title = titleToolStripTextBox.Text;
            pageDoc.Save();
        }
Exemple #7
0
        private string Serialize(PageXmlDocument pageXmlDocument)
        {
            XmlSerializer serializer = new XmlSerializer(typeof(PageXmlDocument));
            var           settings   = new XmlWriterSettings()
            {
                Encoding    = System.Text.Encoding.UTF8,
                Indent      = true,
                IndentChars = indentChar,
            };

            using (var memoryStream = new System.IO.MemoryStream())
                using (var xmlWriter = XmlWriter.Create(memoryStream, settings))
                {
                    serializer.Serialize(xmlWriter, pageXmlDocument);
                    return(System.Text.Encoding.UTF8.GetString(memoryStream.ToArray()));
                }
        }
Exemple #8
0
        /// <summary>
        /// 将窗体值转入xml文件中
        /// </summary>
        public void WritePageTextProp(PageTextPropertyItem property, string PageId)
        {
            PageXmlDocument        doc = Service.Sdsite.CurrentDocument.GetPageDocumentById(PageId) as PageXmlDocument;
            PageSimpleExXmlElement ele = Service.Sdsite.CurrentDocument.GetPageElementById(PageId) as PageSimpleExXmlElement;

            ele.IsModified     = true;
            doc.Title          = property.Title;
            doc.PageTitleAlias = property.TitleAlias;
            doc.PageCreateTime = property.DeliverTime;
            doc.PageSummary    = property.Summary;
            doc.Author         = property.AuthorAlias;
            doc.AuthorAlias    = property.AuthorAlias;
            doc.ModifyAlias    = property.ModifyBy;
            doc.DesignSummary  = property.DesignSummary;

            doc.PageKeywords  = property.tag.ToArray();
            doc.ContentSource = property.ContentSource;

            //                doc.DocumentElement.Attributes["content_source"].Value = property.ContentSource;
            //doc.DocumentElement.Attributes["is_always_pub"].Value = property.IsAlwaysPub.ToString();

            //doc.DocumentElement.Attributes["stop_time"].Value = property.EndPubTime;
            doc.Save();
            Service.Sdsite.CurrentDocument.Save();
            ///写入文章摘要

            /*
             *
             * doc.DocumentElement.Attributes["content_source"].Value = property.ContentSource;
             * doc.DocumentElement.Attributes["is_always_pub"].Value = property.IsAlwaysPub.ToString();
             *
             * doc.DocumentElement.Attributes["stop_time"].Value = property.EndPubTime;
             *
             *
             * XmlNode tagNode = ContentDoc.SelectSingleNode("/content/tags");
             * WriteXmlTag(property.tag, tagNode);
             *
             * if (text != "")
             * {
             *   ///重新存储
             *   XmlNode textNode = ContentDoc.SelectSingleNode("/content/article_text");
             *   textNode.RemoveAll();
             *   XmlCDataSection xmlData = ContentDoc.CreateCDataSection(text);
             *   textNode.AppendChild(xmlData);
             * }*/
        }
Exemple #9
0
        /// <summary>
        /// Get the PAGE-XML (XML) string of the pages layout.
        /// </summary>
        /// <param name="page"></param>
        /// <param name="includePaths">Draw <see cref="PdfPath"/>s present in the page.</param>
        private static string Get(CocoEntry page, Dictionary <int, string> categories)
        {
            PageXmlDocument pageXmlDocument = new PageXmlDocument()
            {
                Metadata = new PageXmlDocument.PageXmlMetadata()
                {
                    Created    = DateTime.UtcNow,
                    LastChange = DateTime.UtcNow,
                    Creator    = "PublayNetConverter",
                    Comments   = "PubLayNet dataset"
                },
                PcGtsId = "pc" + page.Id.ToString()
            };

            pageXmlDocument.Page = ToPageXmlPage(page, categories);

            return(Serialize(pageXmlDocument));
        }
Exemple #10
0
        protected override void OnLoad(EventArgs e)
        {
            Debug.Assert(!string.IsNullOrEmpty(_pageId));
            PageSimpleExXmlElement ele = Service.Sdsite.CurrentDocument.GetPageElementById(_pageId);

            if (ele == null || !File.Exists(ele.AbsoluteFilePath))
            {
                MessageService.Show("文件不存在,打开失败!", MessageBoxButtons.OK, MessageBoxIcon.Error);
                Timer timer = new Timer();
                timer.Interval = 10;
                timer.Tick    += delegate
                {
                    timer.Stop();
                    timer.Dispose();
                    this.Close();
                };
                timer.Start();
                return;
            }

            this._pageDoc = ele.GetIndexXmlDocument();
            // this._pageEle = (PageElement)this._pageDoc.GetElementById(pageId); /wangmiao

            this.Text = ele.Title;//w
            Service.Sdsite.CurrentDocument.ElementTitleChanged += new EventHandler <ChangeTitleEventArgs>(CurrentDocument_ElementTitleChanged);

            //控件设置
            _htmldesign = new HTMLDesignerEx(PageId);
            //_htmldesign.PageId = _pageId;
            //this.Controls.Add(_htmldesign.GetMainToolStrip());
            //this.Controls.Add(_htmldesign.GetHtmlPanel());
            this.Controls.Add(_htmldesign);
            string fileName = Service.Sdsite.CurrentDocument.GetPageElementById(_pageId).Title; //w

            this._htmldesign.PageTitle = fileName;                                              //w

            //this._htmldesign.SetHtmlPanel().BringToFront();
            this._designWebB = this._htmldesign.DesignWebBrowser;

            //IDoc2 = _htmldesign.Idoc2;
            base.OnLoad(e);
        }
Exemple #11
0
        /// <summary>
        /// 内容页面资料处理
        /// </summary>
        /// <param name="filePath">文件路径, string pageText</param>
        /// <param name="fileContent">文件的正文</param>
        public PagePropertyPanel(string pageId)
        {
            InitializeComponent();
            _pageID = pageId;
            PageSimpleExXmlElement pageEle = (PageSimpleExXmlElement)Service.Sdsite.CurrentDocument.GetElementById(pageId);
            string          filePath       = pageEle.AbsoluteFilePath;
            PageXmlDocument pageDoc        = Service.Sdsite.CurrentDocument.GetPageDocumentById(pageId);

            if (pageDoc == null)
            {
                ///读取文章正文]
                mgr       = new PagePropertyHelper(pageId);
                _pageText = mgr.ReadPageText();
            }
            else
            {
                mgr       = new PagePropertyHelper(pageId);
                _pageText = pageDoc.PageText;
            }
        }
        public BuildSite()
        {
            ToHtmlHelper siteHelper = new ToHtmlHelper("", @"D:\_abc\myabc");

            TmpltXmlDocument tmpltDoc = null;

            tmpltDoc.SaveXhtml(siteHelper);
            tmpltDoc.DeleteXhtml(siteHelper);

            PageXmlDocument pageDoc = null;

            pageDoc.SaveXhtml(siteHelper);
            pageDoc.DeleteXhtml(siteHelper);

            foreach (var item in tmpltDoc.GetSnipElementList())
            {
                SnipXmlElement snip = (SnipXmlElement)item;
                snip.SaveXhtml(siteHelper);
                snip.DeleteXhtml(siteHelper);
            }
        }
        public static void Run(string path)
        {
            PageXmlDocument pageXmlDocument = new PageXmlDocument()
            {
                Metadata = new PageXmlMetadata()
                {
                    Created    = DateTime.UtcNow,
                    LastChange = DateTime.UtcNow,
                    Creator    = "PdfPig",
                    Comments   = "", // algo used in here
                },
                Page = new PageXmlPage()
                {
                },
                PcGtsId = "pc-" + path.GetHashCode()
            };

            using (PdfDocument document = PdfDocument.Open(path))
            {
                //var testAlto = AltoDocument.FromPdfDocument(document);

                for (var i = 0; i < document.NumberOfPages; i++)
                {
                    Page pagePdf = document.GetPage(i + 1);
                    pageXmlDocument.Page = FromPdfPage(pagePdf);

                    //var words = pagePdf.GetWords(NearestNeighbourWordExtractor.Instance);

                    //var pageWordsH = words.Where(x => x.TextDirection == TextDirection.Horizontal || x.TextDirection == TextDirection.Rotate180).ToArray();
                    //var blocks = RecursiveXYCut.Instance.GetBlocks(pageWordsH);
                }
            }


            File.WriteAllText(Path.ChangeExtension(path, "pagexml.xml"), pageXmlDocument.Serialize());
        }
Exemple #14
0
        /// <summary>
        /// 设置需要搜索的文件集
        /// </summary>
        private void GetSearchDocuments()
        {
            _searchDocuments.Clear();
            KeyValuePair <ISearch, IMarkPosition> kv = new KeyValuePair <ISearch, IMarkPosition>();

            switch (FindOptions.Singler.FindScope)
            {
            case FindScope.CurrentForm:    //当前窗口
                #region
            {
                if (GetFormDocument((BaseViewForm)_mainFindForm.ActiveMdiChild, out kv))
                {
                    _currentForm     = kv.Value;
                    _currentDocument = kv.Key;
                    _searchDocuments.Add(kv.Key);
                }
                break;
            }

                #endregion
            case FindScope.AllOpenForm:    //所有打开的窗体
                #region
            {
                if (GetFormDocument((BaseViewForm)_mainFindForm.ActiveMdiChild, out kv))
                {
                    _currentForm     = kv.Value;
                    _currentDocument = kv.Key;
                    _searchDocuments.Add(kv.Key);
                }
                foreach (BaseViewForm form in _mainFindForm.MdiChildren)
                {
                    if (form == _mainFindForm.ActiveMdiChild)
                    {
                        continue;
                    }
                    if (GetFormDocument(form, out kv))
                    {
                        _searchDocuments.Add(kv.Key);
                    }
                }
                break;
            }

                #endregion
            case FindScope.WholeChannels:    //所有的频道
                #region
            {
                if (GetFormDocument((BaseViewForm)_mainFindForm.ActiveMdiChild, out kv))
                {
                    _currentForm     = kv.Value;
                    _currentDocument = kv.Key;
                }
                // string[] tmpltIds = Service.Sdsite.CurrentDocument.GetAllTmpltId();
                string[] pageIds = Service.Sdsite.CurrentDocument.GetAllPageId();
                //foreach (string tmpletId in tmpltIds)
                //{
                //    TmpltXmlDocument tmpltDoc = Service.Sdsite.CurrentDocument.GetTmpltDocumentById(tmpletId);
                //    if ((tmpltDoc as ISearch) == kv.Key)
                //    {
                //        continue;
                //    }
                //    if (tmpltDoc != null)
                //    {
                //        _searchDocuments.Add(tmpltDoc);
                //    }

                //}
                foreach (string pageId in pageIds)
                {
                    PageXmlDocument pageDoc = Service.Sdsite.CurrentDocument.GetPageDocumentById(pageId);
                    if ((pageDoc as ISearch) == kv.Key)
                    {
                        continue;
                    }
                    if (pageDoc != null)
                    {
                        _searchDocuments.Add(pageDoc);
                    }
                }
                break;
            }

                #endregion
            default:
                break;
            }
        }
Exemple #15
0
        /// <summary>
        /// 生成页面文件
        /// </summary>
        /// <param name="simpleEle"></param>
        public void BuildPageFile(SimpleExIndexXmlElement simpleEle)
        {
            //添加模板到Dictionary字典中
            AddElementToDictionary(simpleEle);

            PageSimpleExXmlElement pageEle = (PageSimpleExXmlElement)simpleEle;
            string pageId    = pageEle.Id;
            string channelId = pageEle.OwnerChannelElement.Id; //页面直属频道的Id

            PageXmlDocument pageDoc = SdsiteDocument.GetPageDocumentById(pageId);

            if (pageEle.IsDeletedRecursive)
            {
                //如果 是索引页面,删除本身的文件 filename.sdpage index.sdpage,其它页面则还要删除
                //及一系列的关于页面的文件filename_head.inc filename_content.inc filename_list.inc
                pageDoc.DeleteXhtml(ToHtmlHelperObj);

                //此时就要看与此文件关联的其它文件
                if (AsTmpltList(channelId))
                {
                    NewMethod(channelId);
                }
                //文件删除,则其它链接到此文件文件不做处理
            }
            else if (!pageEle.IsAlreadyPublished) //新建
            {
                //如果 是索引页面,新建本身的文件 filename.sdpage 及index.shtml
                //,其它类型页面则要新建一系列的关于页面的文件filename_head.inc filename_content.inc


                //todo:...


                //此时,就要查看与此关联的文件,是如何
                if (AsTmpltList(channelId))
                {
                    //生成filename_list.inc文件

                    NewMethod(channelId);

                    //此处不用考虑其路径问题
                }
            }
            else
            {
                //文件路径是否改变(与重命名有密切关系)
                if (pageEle.IsChangedPosition)
                {
                    //先将文件移动到新的位置
                }

                if (pageEle.IsModified)
                {
                    //页面本身的一些文件重新生成

                    //关联页面
                    NewMethod(channelId);

                    //路径关联的一些页面的重新生成
                    //如果发生CustomID的变化,找到链接到此文件的所有页面,然后重新生成
                }
                else
                {
                    //如果是模板的内容布局有所在改变,则content还是要重新生成
                }
            }
        }
Exemple #16
0
        static public RowEx GetDataGridViewRow(DataGridViewColumn[] columns, PageXmlDocument pageDoc)
        {
            RowEx row = new RowEx();

            row.Tag = pageDoc.Id;
            DataGridViewCell cell;
            SortedDictionary <int, DataGridViewCell> cellDic = new SortedDictionary <int, DataGridViewCell>();

            foreach (DataGridViewColumn column in columns)
            {
                int    index = column.DisplayIndex;
                object obj   = (pageDoc.GetType().GetProperty(column.Name).GetValue(pageDoc, null));

                bool tempBool;
                if (bool.TryParse(obj.ToString(), out tempBool))
                {
                    cell = new DataGridViewImageCell();
                    switch (tempBool.ToString().ToUpper())
                    {
                    case "TRUE":
                        switch (column.Name)
                        {
                        case "IsOnceAd":
                            cell.ToolTipText = "TRUE";
                            cell.Value       = ResourceService.GetResourceImage("page.img.oldadbitmap");  //GetImage(@"Image\del.png");
                            break;

                        case "IsAd":
                            cell.ToolTipText = "TRUE";
                            cell.Value       = ResourceService.GetResourceImage("page.img.adbitmap");  // GetImage(@"Image\ad.png");
                            break;

                        case "IsPublish":
                            cell.ToolTipText = "TRUE";
                            cell.Value       = ResourceService.GetResourceImage("page.img.publishbitmap");  // GetImage(@"Image\public.png");
                            break;

                        default:
                            cell.ToolTipText = "TRUE";
                            cell.Value       = ResourceService.GetResourceImage("page.img.savebitmap");  // GetImage(@"Image\save.png");
                            break;
                        }
                        break;

                    case "FALSE":
                        switch (column.Name)
                        {
                        case "IsOnceAd":
                            cell.ToolTipText = "FALSE";
                            cell.Value       = ResourceService.GetResourceImage("page.img.notoldbitmap");  //GetImage(@"Image\del.png");
                            break;

                        case "IsAd":
                            cell.ToolTipText = "FALSE";
                            cell.Value       = ResourceService.GetResourceImage("page.img.addbitmap");  // GetImage(@"Image\notAd.png");
                            break;

                        case "IsPublish":
                            cell.ToolTipText = "FALSE";
                            cell.Value       = ResourceService.GetResourceImage("page.img.notPublishbitmap");  // GetImage(@"Image\notPublic.png");
                            break;

                        default:
                            cell.ToolTipText = "FALSE";
                            cell.Value       = ResourceService.GetResourceImage("page.img.notsavebitmap");  // GetImage(@"Image\save.png");
                            break;
                        }
                        break;
                    }
                }
                else if (obj.GetType().IsEnum&& obj.GetType() == typeof(PageSimpleState))
                {
                    PageSimpleState state = (PageSimpleState)obj;
                    cell = new DataGridViewImageCell();
                    switch (state)
                    {
                    case PageSimpleState.New:
                        cell.ToolTipText = "NEW";
                        cell.Value       = ResourceService.GetResourceImage("page.img.newbitmap");// GetImage(@"Image\new.png");
                        break;

                    case PageSimpleState.Modified:
                        cell.ToolTipText = "MODIFIED";
                        cell.Value       = ResourceService.GetResourceImage("page.img.modifedbitmap");//GetImage(@"Image\modified.png");
                        break;

                    case PageSimpleState.NotModified:
                        cell.ToolTipText = "NOTMODIFIED";
                        cell.Value       = ResourceService.GetResourceImage("page.img.notModifiedbitmap");//GetImage(@"Image\notModified.png");
                        break;

                    default:
                        Debug.Assert(false);
                        break;
                    }
                }
                else
                {
                    cell             = new DataGridViewTextBoxCell();
                    cell.Value       = obj;
                    cell.ToolTipText = cell.Value.ToString();
                    if (column.Name.Equals("AdTime"))
                    {
                        if (obj.ToString().Equals("9999-12-31 23:31:59"))
                        {
                            cell.Value       = null;
                            cell.ToolTipText = null;
                        }
                    }
                }
                cell.Tag = column.Name;
                cellDic.Add(index, cell);
            }

            foreach (KeyValuePair <int, DataGridViewCell> pair in cellDic)
            {
                row.Cells.Add(pair.Value);
            }

            return(row);
        }