示例#1
0
        /// <summary>
        /// 执行静态化
        /// </summary>
        /// <param name="pages">要静态化的页面列表。Key为页面绝对URL,Value为这个页面保存在本地的路径。URL和Value必须是唯一的。</param>
        /// <param name="stepTaken">静态化状态,默认请传入此实例,它提供对异步线程获取静态化状态的支持。</param>
        /// <returns>静态化状态,与传入的 stepTaken 引用一致。</returns>
        public StaticizeStepStatus Staticize(IEnumerable<KeyValuePair<Uri, String>> pages, StaticizeStepStatus stepTaken)
        {
            if (pages == null)
            {
                throw new ArgumentNullException("pages");
            }
            if (stepTaken == null)
            {
                throw new ArgumentNullException("stepTaken");
            }
            #region 初始化
            stepTaken.Step = StaticizeStep.Initialize;

            int pageCount = pages.Count();
            stepTaken.pageCount = pageCount;
            // 创建 Context 对象,每个页面一个 Context
            HtmlStaticizeContext[] entries = new HtmlStaticizeContext[pageCount];
            {
                int i = 0;
                foreach (var address in pages)
                {
                    entries[i] = new HtmlStaticizeContext
                    {
                        uri = address.Key,
                        fileName = address.Value,
                    };
                    i++;
                }
            }
            stepTaken.Init(entries);

            AddValidation(GenerationSuccessfulValidation.Instance);

            #endregion

            stepTaken.Step = StaticizeStep.GenerationHtml;

            // 生成 HTML
            Generate(entries, stepTaken);
            stepTaken.Step = StaticizeStep.GenerationHtmlCompleted;

            #region 验证

            stepTaken.Step = StaticizeStep.Validation;
            if (
                (m_Behaviors != null && m_Behaviors.Count > 0)
                || (m_Validations != null && m_Validations.Count > 0)
                )
            {
                for (int j = 0; j < entries.Length; j++)
                {
                    var entry = entries[j];
                    // 如果 generationError 不为null,表示 HTML 生成失败。
                    if (entry.generationError != null)
                    {
                        var ex = entry.generationError;
                        var vd = new ValidationResult()
                        {
                            ValidationType = ValidationType.Tag,
                            Uri = entry.uri,
                            Name = "页面HTML是否成功生成。",
                            Message = string.Format("生成HTML期间发生错误:{0}\r\n{1}\r\n", ex.Message, ex.ToString()),
                            Exception = ex,
                        };
                        entry.validationResults = new ValidationResult[] { vd };
                        stepTaken.ValidationErrors.Add(vd);
                        stepTaken.validatedPageCount++;
                        continue;
                    }
                    // load document dom
                    var doc = new HtmlAgilityPack.HtmlDocument();
                    // 尝试加载 document
                    try
                    {
                        doc.Load(entry.fileName, System.Text.Encoding.UTF8);
                    }
                    catch (Exception ex)
                    {
                        // 加载 document失败
                        entry.DocumentLoadError = ex;
                        var vd = new ValidationResult()
                        {
                            ValidationType = ValidationType.Tag,
                            Uri = entry.uri,
                            Name = "页面HTML是否成功生成。",
                            Message = string.Format("加载HTML文档树期间发生错误:{0}\r\n{1}\r\n", ex.Message, ex.ToString()),
                            Exception = ex,
                        };
                        entry.validationResults = new ValidationResult[] { vd };
                        stepTaken.ValidationErrors.Add(vd);
                        stepTaken.AddValidatedPageCount();
                        continue;
                    }

                    if (m_Behaviors != null && m_Behaviors.Count > 0)
                    {
                        for (int k = 0; k < m_Behaviors.Count; k++)
                        {
                            m_Behaviors[k].Process(doc, entry);
                        }
                    }
                    if (m_Validations != null && m_Validations.Count > 0)
                    {
                        Validate(doc, entry, stepTaken);
                    }
                    stepTaken.AddValidatedPageCount();
                }
            }
            stepTaken.Step = StaticizeStep.ValidationCompleted;

            #endregion

            // add context errors results to status
            {
                var all = stepTaken.Errors;
                for (int i = 0; i < entries.Length; i++)
                {
                    var items = entries[i].Errors;
                    if (items != null && items.Count > 0)
                    {
                        all.AddRange(items);
                    }
                }
            }

            stepTaken.Step = StaticizeStep.Completed;
            return stepTaken;
        }
示例#2
0
 /// <summary>
 /// 生成
 /// </summary>
 /// <param name="entries"></param>
 /// <param name="step"></param>
 void Generate(HtmlStaticizeContext[] entries, StaticizeStepStatus step)
 {
     System.Threading.Tasks.Parallel.ForEach(entries, (entry) =>
     {
         using (var wc = new WebClient())
         {
             try
             {
                 wc.DownloadFile(entry.uri, entry.fileName);
                 step.AddGeneratedPageCount();
             }
             catch (Exception ex)
             {
                 entry.generationError = ex;
                 entry.Errors.Add(ex);
                 //step.Errors.Add(ex);
             }
         }
     });
     //// batch download html file
     //using (var wc = new WebClient())
     //{
     //    for (int j = 0; j < entries.Length; j++)
     //    {
     //        var entry = entries[j];
     //        // may be some url down failure
     //        // should log error
     //        try
     //        {
     //            wc.DownloadFile(entry.uri, entry.fileName);
     //        }
     //        catch (Exception ex)
     //        {
     //            entry.generationError = ex;
     //            entry.Errors.Add(ex);
     //        }
     //    }
     //}
 }
示例#3
0
 /// <summary>
 /// 验证
 /// </summary>
 /// <param name="doc"></param>
 /// <param name="context"></param>
 /// <param name="stepTaken"></param>
 void Validate(HtmlAgilityPack.HtmlDocument doc, HtmlStaticizeContext context, StaticizeStepStatus stepTaken)
 {
     if (this.m_Validations != null)
     {
         var result = m_Validations.Validate(doc, context);
         if (result != null && result.Count > 0)
         {
             if (context.validationResults == null)
             {
                 context.validationResults = result;
             }
             else
             {
                 context.validationResults.AddRange(result);
             }
             stepTaken.ValidationErrors.AddRange(result);
         }
     }
 }
示例#4
0
        public void StaticizeTest1()
        {
            // 编号
            String batchId = CreateBatchId();

            // 输出文件夹
            string outputDirectory = System.IO.Path.Combine(AppDomain.CurrentDomain.BaseDirectory, batchId);
            System.IO.Directory.CreateDirectory(outputDirectory);

            List<KeyValuePair<Uri, String>> pages = new List<KeyValuePair<Uri, string>>(10000);

            // 将下面的 URL 生成为 HTML 静态化文件,文件会生成在 bin 下
            var urls = new[] {
                "http://www.zhihu.com/question/25519625",
                "http://www.zhihu.com/question/27232313",
                "http://www.zhihu.com/question/31291872",
                "http://www.zhihu.com/question/31293043",
                "http://www.zhihu.com/question/31318753",
                "http://cn.bing.com/",
                "http://36kr.com/"
            };

            // 需要说明,如果页面内的图片、CSS、JS 采用相对路径 即不含(http://host/),Staticize 能够自动下载并放在文件夹中
            // 但如果是绝对路径,如 http://img3.douban.com/misc/mixed_static/7011201580a8cbed.css ,则是不会下载的。
            {
                for (int i = 1; i < urls.Length; i++)
                {
                    string outputFile = System.IO.Path.Combine(outputDirectory, string.Concat("zihu-", i.ToString(), ".html"));
                    pages.Add(new KeyValuePair<Uri, String>(new Uri(urls[i]), outputFile));
                }
            }

            CreateDirectory(pages, outputDirectory);

            Staticizer staticize = new Staticizer();

            staticize.AddBehavior(
                new ImageResourcesDownloadBehavior(outputDirectory)
                );

            //staticize.AddValidation(
            //    //验证CSS文件是否存在
            //    ValidationProjection.HasCssLink("/resources/css/jquery-ui-themes.css"),
            //    ValidationProjection.HasCssLink("/resources/css/axure_rp_page.css"),
            //    //验证网页主要页面DOM元素(id)是否存在
            //    ValidationProjection.HasElement("main_container"),
            //    //验证JS文件是否存在
            //    ValidationProjection.HasScriptLink("/data/sitemap.js"),
            //    ValidationProjection.HasScriptLink("/resources/scripts/jquery-1.7.1.min.js"),
            //    ValidationProjection.HasScriptLink("/resources/scripts/axutils.js"),
            //    ValidationProjection.HasScriptLink("/resources/scripts/jquery-ui-1.8.10.custom.min.js"),
            //    ValidationProjection.HasScriptLink("/resources/scripts/axurerp_beforepagescript.js"),
            //    ValidationProjection.HasScriptLink("/resources/scripts/messagecenter.js")
            //    );

            //staticize.AddValidation(
            //    //验证 HTML Docuemnt 中引用的资源是否存在。
            //   ValidationProjection.ResourcesExisting(outputDirectory),
            //    //XPath
            //   ValidationProjection.XPathEquals("main_template.html", "main_container"),
            //   ValidationProjection.InternalALinkExisting(outputDirectory)
            //    );

            var stepTaken = new StaticizeStepStatus();

            var staticizeResults = staticize.Staticize(pages, stepTaken);

            var validationResults = staticizeResults.GetValidationResults();
            validationResults.Save(System.IO.Path.Combine(outputDirectory, "validationResults.txt"));
        }