Esempio n. 1
0
 public static void ParseHtmlDom(TextSource snapSource, IHtmlDocument htmldoc, WebDom.DomElement parentElement)
 {
     var parser = GetHtmlParser();
     //------------------------ 
     parser.Parse(snapSource, (LayoutFarm.WebDom.Impl.HtmlDocument)htmldoc, parentElement);
     FreeHtmlParser(parser);
 }
Esempio n. 2
0
        void IMasterView.Initialize( ViewContext context )
        {
            InitializeView( context );

              Document = (IHtmlDocument) Scope;

              HttpContext.Trace.Write( "JumonyMasterView", "Begin GetViewHandler" );
              var handler = GetHandler( VirtualPath );
              HttpContext.Trace.Write( "JumonyMasterView", "End GetViewHandler" );

              HttpContext.Trace.Write( "JumonyMasterView", "Begin Process" );
              OnPreProcess();
              ProcessScope( handler );
              OnPostProcess();
              HttpContext.Trace.Write( "JumonyMasterView", "End Process" );

              HttpContext.Trace.Write( "JumonyMasterView", "Begin ProcessActionRoutes" );
              Url.ProcessActionUrls( Scope );
              HttpContext.Trace.Write( "JumonyMasterView", "End ProcessActionRoutes" );

              HttpContext.Trace.Write( "JumonyMasterView", "Begin ResolveUri" );
              Url.ResolveUri( Scope, VirtualPath );
              HttpContext.Trace.Write( "JumonyMasterView", "End ResolveUri" );

              RenderAdapters = GetRenderAdapters( handler );
        }
Esempio n. 3
0
        /// <summary>
        /// Parses the account and profile settings page of the user and creates a user out of it.
        /// </summary>
        /// <param name="accountSettingsPage">The account settings page, which contains the user name and the email address of the user.</param>
        /// <param name="profileSettingsPage">The profile settings page, which contains the full name and the avatar of the user.</param>
        /// <returns>Returns the created user with the parsed information.</returns>
        public static User FromHtml(IHtmlDocument accountSettingsPage, IHtmlDocument profileSettingsPage)
        {
            // Creates a new user
            User user = new User();

            // Tries to parse the account settings page for the user name and the email address, if it could not be parsed, then an exception is thrown
            try
            {
                IElement accountSettingsForm = accountSettingsPage.QuerySelector("#setting");
                user.UserName = accountSettingsForm.QuerySelectorAll("input").FirstOrDefault(input => input.GetAttribute("name") == "login_name").GetAttribute("value");
                user.EmailAddress = accountSettingsForm.QuerySelectorAll("input").FirstOrDefault(input => input.GetAttribute("name") == "email").GetAttribute("value");
            }
            catch (Exception exception)
            {
                throw new NineGagException("The user name and the email address could not be parsed. This could be an indicator, that the 9GAG website is down or its content has changed. If this problem keeps coming, then please report this problem to 9GAG or the maintainer of the library.", exception);
            }

            // Tries to parse the profile settings page for the full name and the avatar image of the user, if it could not be parsed, then an exception is thrown
            try
            {
                user.FullName = profileSettingsPage.QuerySelectorAll("input").FirstOrDefault(input => input.GetAttribute("name") == "fullName").GetAttribute("value");
                user.AvatarUri = new Uri(profileSettingsPage.QuerySelector("#jsid-profile-avatar").GetAttribute("src"), UriKind.Absolute);
            }
            catch (Exception exception)
            {
                throw new NineGagException("The full name and the avatar image could not be parsed. This could be an indicator, that the 9GAG website is down or its content has changed. If this problem keeps coming, then please report this problem to 9GAG or the maintainer of the library.", exception);
            }

            // Returns the created user
            return user;
        }
Esempio n. 4
0
 /// <summary>
 /// Initializes a new instance of the <see cref="DocumentWrapper"/> class.
 /// </summary>
 /// <param name="buffer">The document represented as a byte array.</param>
 public DocumentWrapper(IEnumerable<byte> buffer)
 {
     var parser = new HtmlParser();
     using (var stream = new MemoryStream(buffer.ToArray()))
     {
         this.document = parser.Parse(stream);
     }
 }
Esempio n. 5
0
		/// <summary>
		/// Constructor for the PreMailer class
		/// </summary>
		/// <param name="html">The HTML input.</param>
		/// <param name="baseUri">Url that all relative urls will be off of</param>
		public PreMailer(string html, Uri baseUri = null)
		{
			_baseUri = baseUri;
			_document = new HtmlParser().Parse(html);
			_warnings = new List<string>();
			_cssParser = new CssParser();
			_cssSelectorParser = new CssSelectorParser();
		}
Esempio n. 6
0
    /// <summary>
    /// 对文档进行数据绑定
    /// </summary>
    /// <param name="document">要进行数据绑定的文档</param>
    protected virtual void DataBind( IHtmlDocument document, object dataContext )
    {
      var bindings = FindBindings( document ).OrderBy( b => b.Priority );

      var context = new BindingContext( this, dataContext );
      bindings.ForAll( b => b.DataBind( context ) );

      document.Elements().ForAll( e => DataBind( e, context ) );
    }
Esempio n. 7
0
    /// <summary>
    /// 创建 BindingManager 对象
    /// </summary>
    /// <param name="document">要进行数据绑定的文档</param>
    public BindingManager( IHtmlDocument document )
    {
      Document = document;

      var modifier = document.DomModifier as ISynchronizedDomModifier;
      if ( modifier == null )
        throw new NotSupportedException();

      SyncRoot = modifier.SyncRoot;
    }
Esempio n. 8
0
File: WebForm.cs Progetto: vknet/vk
		/// <summary>
		/// WEB форма.
		/// </summary>
		/// <param name="result">Результат.</param>
		private WebForm(WebCallResult result)
		{
			Cookies = result.Cookies;
			OriginalUrl = result.RequestUrl.OriginalString;

            var parser = new HtmlParser();
            _html = parser.Parse(result.Response);

		    var uri = result.ResponseUrl;

            _responseBaseUrl = uri.Scheme + "://" + uri.Host + ":" + uri.Port;

			_inputs = ParseInputs();
		}
Esempio n. 9
0
    /// <summary>
    /// 设置文档所采用的文档规范
    /// </summary>
    /// <param name="document">文档</param>
    /// <param name="specificationType">所采用的文档规范名称</param>
    public HtmlSpecificationBase SetHtmlSpecification( IHtmlDocument document, string specificationType )
    {

      if ( document == null )
        throw new ArgumentNullException( "document" );

      var domDocument = document as DomDocument;
      if ( domDocument == null )
        throw new NotSupportedException( "只能处理指定类型的文档" );

      if ( domDocument.HtmlSpecification != null )
        throw new InvalidOperationException();

      return domDocument.HtmlSpecification = SelectSpecification( specificationType );
    }
Esempio n. 10
0
    /// <summary>
    /// 修改文档的 URI
    /// </summary>
    /// <param name="document">要修改的文档</param>
    /// <param name="uri">新的文档 URI</param>
    public void ResolveUri( IHtmlDocument document, Uri uri )
    {
      lock ( _sync )
      {

        unchecked { _version++; }
        
        var domDocument = document as DomDocument;

        if ( domDocument == null )
          throw new InvalidOperationException();

        domDocument.DocumentUri = uri;
      }
    }
Esempio n. 11
0
    public static MailMessage CreateMail( IHtmlDocument document )
    {

      MailMessage message = new MailMessage();
      message.From = new MailAddress( "*****@*****.**" );
      message.To.Add( new MailAddress( "*****@*****.**" ) );


      var view = CreateView( document );

      message.AlternateViews.Add( view );


      return message;

    }
Esempio n. 12
0
    public static IEnumerable<LinkedResource> GetResources( IHtmlDocument document )
    {
      foreach ( var element in document.Find( "[src]" ) )
      {
        var attribute = element.Attribute( "src" );

        var value = attribute.Value();

        if ( string.IsNullOrWhiteSpace( value ) )
          continue;

        Uri resourceUrl;
        if ( !Uri.TryCreate( document.DocumentUri, value, out resourceUrl ) )
          continue;

        yield return LoadResource( resourceUrl );
      }
    }
Esempio n. 13
0
    private static TranslationTerm[] EnsureTermsData( IHtmlDocument document )
    {
      var uri = document.DocumentUri;
      if ( !uri.IsFile )
        throw new InvalidOperationException();

      var path = uri.LocalPath + ".translation";




      var terms = LoadTerms( path );

      if ( !ValidateTerms( document, terms ) )
        return CreateTermsData( document, path );

      return terms;
    }
Esempio n. 14
0
    public static AlternateView CreateView( IHtmlDocument document )
    {
      var stream = new MemoryStream();
      document.Render( stream, Encoding.UTF8 );

      stream.Seek( 0, SeekOrigin.Begin );


      var resources = GetResources( document );

      var view = new AlternateView( stream, "text/html" );
      view.TransferEncoding = TransferEncoding.Base64;
      view.BaseUri = document.DocumentUri;

      foreach ( var r in resources )
        view.LinkedResources.Add( r );


      return view;
    }
Esempio n. 15
0
    private static bool ValidateTerms( IHtmlDocument document, TranslationTerm[] terms )
    {
      if ( terms == null )
        return false;

      var terms2 = ExtractTerms( document );

      if ( terms.Length != terms2.Length )
        return false;

      for ( int i = 0; i < terms.Length; i++ )
      {
        if ( terms[i].SourceTerm != terms2[i].SourceTerm )
          return false;

        terms[i].TextNode = terms2[i].TextNode;
      }

      return true;

    }
Esempio n. 16
0
        /// <summary>
        /// Parses the detail information of the post.
        /// </summary>
        /// <param name="htmlDocument">The HTML document, which contains the details page of the post.</param>
        /// <exception cref="NineGagException">If anything goes wrong during the retrieval of the details, an <see cref="NineGagException"/> exception is thrown.</exception>
        protected override void ParseDetailInformation(IHtmlDocument htmlDocument)
        {
            // Calls the base implementation
            base.ParseDetailInformation(htmlDocument);

            // Tries to parse the the larger version of the image, if could not be parsed, then an exception is thrown
            try
            {
                this.Content = this.Content.Union(new List<Content>
                {
                    new Content
                    {
                        Uri = new Uri(htmlDocument.QuerySelector("article img").GetAttribute("src"), UriKind.Absolute),
                        Kind = ContentKind.Jpeg
                    }
                }).ToList();
            }
            catch (Exception exception)
            {
                throw new NineGagException("The larger version of the content of the image post could not be retrieved. Maybe there is no internet connection available.", exception);
            }
        }
Esempio n. 17
0
    void IMasterView.Initialize( ViewContext context )
    {
      InitializeView( context );

      Document = (IHtmlDocument) Scope;


      HttpContext.Trace.Write( "JumonyMasterView", "Begin GetViewHandler" );
      var handler = GetHandler( VirtualPath );
      HttpContext.Trace.Write( "JumonyMasterView", "End GetViewHandler" );

      HttpContext.Trace.Write( "JumonyMasterView", "Begin Process" );
      OnPreProcess();
      ProcessScope( handler );
      OnPostProcess();
      HttpContext.Trace.Write( "JumonyMasterView", "End Process" );


      Document.DataBind( ViewContext.ViewData, HtmlBinding.ElementBinders, HtmlBinding.ExpressionBinders, new ActionUrlBinder( Url, Document.HtmlSpecification ) );


      RenderAdapters = GetRenderAdapters( handler );
    }
Esempio n. 18
0
    /// <summary>
    /// 添加所有资源引用
    /// </summary>
    /// <param name="document"></param>
    /// <param name="clearReferenceFirst"></param>
    public void AddAllReference( IHtmlDocument document, bool clearReferenceFirst = true )
    {

      if ( document == null )
        return;

      if ( clearReferenceFirst )
        ClearAllReference( document );

      var headElement = document.FindFirstOrDefault( "head" );
      if ( headElement == null )
      {
        var firstElement = document.Elements().FirstOrDefault();

        if ( firstElement != null )
          headElement = firstElement.AddElementBeforeSelf( "head" );
        else
          headElement = document.AddElement( 0, "head" );
      }

      AddStyleReferences( headElement, styleFiles );
      AddScriptReferences( headElement, scriptFiles );
    }
Esempio n. 19
0
            public static CodeMemberMethod GenerateCodeMethod( IHtmlDocument document, string methodName )
            {
                var constructor = new CodeMemberMethod();
                constructor.Name = methodName;
                constructor.Attributes = MemberAttributes.Public | MemberAttributes.Static;

                constructor.Parameters.Add( new CodeParameterDeclarationExpression( typeof( IHtmlDomProvider ), "provider" ) );
                constructor.ReturnType = new CodeTypeReference( typeof( IHtmlDocument ) );

                var providerVariable = new CodeVariableReferenceExpression( "provider" );

                CodeExpression urlExpression;

                if ( document.DocumentUri != null )
                  urlExpression = new CodeObjectCreateExpression( typeof( Uri ), new CodePrimitiveExpression( document.DocumentUri.AbsoluteUri ) );
                else
                  urlExpression = new CodePrimitiveExpression( null );

                constructor.Statements.Add( new CodeVariableDeclarationStatement( typeof( IHtmlDocument ), "document", new CodeMethodInvokeExpression( providerVariable, "CreateDocument", urlExpression ) ) );//var document = provider.CreateDocument();

                var documentVariable = new CodeVariableReferenceExpression( "document" );

                constructor.Statements.Add( new CodeMethodInvokeExpression( providerVariable, "SetHtmlSpecification", documentVariable, new CodePrimitiveExpression( document.HtmlSpecification.ToString() ) ) );// provider.SetHtmlSpecification( document, spec );

                constructor.Statements.Add( new CodeVariableDeclarationStatement( typeof( IDictionary<string, string> ), "attributes" ) );//var attributes

                BuildChildNodesStatement( document, documentVariable, constructor.Statements );//build document

                constructor.Statements.Add( new CodeMethodReturnStatement( new CodeMethodInvokeExpression( providerVariable, "CompleteDocument", documentVariable ) ) );

                return constructor;
            }
Esempio n. 20
0
 private static ushort CompetitiveRank(IHtmlDocument doc)
 {
     ushort.TryParse(doc.QuerySelector("div.competitive-rank div")?.TextContent, out var parsedCompetitiveRank);
     return(parsedCompetitiveRank);
 }
Esempio n. 21
0
        private static string CompetitiveRankImage(IHtmlDocument doc)
        {
            var compImg = doc.QuerySelector("div.competitive-rank img")?.OuterHtml;

            return(!string.IsNullOrEmpty(compImg) ? compImg.Replace("<img src=\"", "").Replace("\">", "") : string.Empty);
        }
Esempio n. 22
0
            private List <KeyValuePair <int, KeyValuePair <int, int?> > > GetSeasonsFirstEpisodesAndLast(IHtmlDocument html)
            {
                var texts = LinkTexts(html);
                // SEASON | START EPISODE | [END EPISODE]
                var regex = new Regex(@"(\d{1,2})x(\d{1,2})(?:.*\d{1,2}x(\d{1,2})?)?", RegexOptions.IgnoreCase);
                var seasonsFirstEpisodesAndLast = texts.Select(text =>
                {
                    var match        = regex.Match(text);
                    int season       = 0;
                    int episode      = 0;
                    int?finalEpisode = null;
                    if (!match.Success)
                    {
                        return(new KeyValuePair <int, KeyValuePair <int, int?> >(season, new KeyValuePair <int, int?>(episode, finalEpisode)));
                    }
                    season  = Int32.Parse(match.Groups[1].Value);
                    episode = Int32.Parse(match.Groups[2].Value);
                    if (match.Groups[3].Success)
                    {
                        finalEpisode = Int32.Parse(match.Groups[3].Value);
                    }
                    return(new KeyValuePair <int, KeyValuePair <int, int?> >(season, new KeyValuePair <int, int?>(episode, finalEpisode)));
                });

                return(seasonsFirstEpisodesAndLast.ToList());
            }
Esempio n. 23
0
 public IEnumerable <Uri> Extract(IHtmlDocument html)
 {
     return(html.QuerySelectorAll("a[href*=\".torrent\"]")
            .Select(e => e.Attributes["href"].Value)
            .Select(link => new Uri(WebUri, link)));
 }
Esempio n. 24
0
 public DeleteUser(HttpClient client, IHtmlDocument deleteUser, DefaultUIContext context)
     : base(client, deleteUser, context)
 {
     _deleteForm = HtmlAssert.HasForm("#delete-user", deleteUser);
 }
        public HtmlDocumentTests()
        {
            Stream workSchedule = StreamHelpers.CreateFromFile(Path.Combine("Resources", "Workschedule.html"));

            document = new HtmlDocument(workSchedule);
        }
Esempio n. 26
0
        public void Parse(IHtmlDocument document)
        {
            Thread.CurrentThread.CurrentCulture = new CultureInfo("en-US");

            var hItems = document.QuerySelectorAll("h1");

            if (hItems.Length == 0)
            {
                return;
            }

            string fullHead = hItems[0].TextContent;
            var    catName  = document.QuerySelectorAll("a.active div.inside div.counter-param");

            if (catName.Length == 0)
            {
                return;
            }

            string priceTitle  = document.QuerySelector("div[data-sort-field='tc-price']").TextContent.Replace(" ", "");
            var    inStockItem = document.QuerySelector("div[data-sort-field='tc-amount']");
            int    coef        = 1;

            if (inStockItem != null)
            {
                if (priceTitle == "Price")
                {
                    coef       = 1000;
                    priceTitle = "Price/1000";
                }
            }

            string category = catName[0].TextContent;
            string fullName = hItems[0].TextContent.Replace(category + " ", "");
            string serv     = "Any";

            if (fullName.Contains(")"))
            {
                var tmpArr = fullName.Split('(');
                fullName = tmpArr[0].Substring(0, tmpArr[0].Length - 1);
                serv     = tmpArr[1];
                serv     = serv.Replace(")", "");
            }

            var aItems = document.QuerySelectorAll("a")
                         .Where(item => item.ClassName != null && item.ClassName.Contains("tc-item"));

            foreach (var item in aItems)
            {
                long   amount      = GetCoinCount(item);
                double cost        = GetCost(item, coef);
                var    description = GetDescriptionContent(item);
                string servName    = GetContent(item, "tc-server");
                string side        = GetContent(item, "tc-side");
                string nick        = GetMediaItem(item, "media-user-name");
                int    rev         = GetReviewsCount(item);
                string regtime     = GetMediaItem(item, "media-user-info");
                bool   isOnline    = GetOnlineStatus(item);

                lock (Program.Locker)
                {
                    var x = new GameInfo(
                        DateTime.Now,
                        DateTime.Now,
                        ((IHtmlAnchorElement)item).Href.Replace("https://", ""),
                        fullName,
                        serv,
                        category,
                        servName,
                        side,
                        amount,
                        cost,
                        description,
                        nick,
                        rev,
                        regtime,
                        priceTitle,
                        isOnline);
                    Form1.testArr.Add(x);
                }
            }
        }
Esempio n. 27
0
        /// <inheritdoc />
        public IEnumerable <Common.Documents.IDocument> Execute(IReadOnlyList <Common.Documents.IDocument> inputs, IExecutionContext context)
        {
            HtmlParser parser = new HtmlParser();

            return(inputs.AsParallel().SelectMany(context, input =>
            {
                // Parse the HTML content
                IHtmlDocument htmlDocument = input.ParseHtml(parser);
                if (htmlDocument == null)
                {
                    return new[] { input };
                }

                // Evaluate the query selector
                try
                {
                    if (!string.IsNullOrWhiteSpace(_querySelector))
                    {
                        IElement[] elements = _first
                            ? new[] { htmlDocument.QuerySelector(_querySelector) }
                            : htmlDocument.QuerySelectorAll(_querySelector).ToArray();
                        if (elements.Length > 0 && elements[0] != null)
                        {
                            List <Common.Documents.IDocument> documents = new List <Common.Documents.IDocument>();
                            foreach (IElement element in elements)
                            {
                                // Get the metadata
                                Dictionary <string, object> metadata = new Dictionary <string, object>();
                                foreach (Action <IElement, Dictionary <string, object> > metadataAction in _metadataActions)
                                {
                                    metadataAction(element, metadata);
                                }

                                // Clone the document and optionally change content to the HTML element
                                if (_outerHtmlContent.HasValue)
                                {
                                    Stream contentStream = context.GetContentStream();
                                    using (StreamWriter writer = contentStream.GetWriter())
                                    {
                                        if (_outerHtmlContent.Value)
                                        {
                                            element.ToHtml(writer, HtmlMarkupFormatter.Instance);
                                        }
                                        else
                                        {
                                            element.ChildNodes.ToHtml(writer, HtmlMarkupFormatter.Instance);
                                        }
                                        writer.Flush();
                                        documents.Add(context.GetDocument(input, contentStream, metadata.Count == 0 ? null : metadata));
                                    }
                                }
                                else
                                {
                                    documents.Add(context.GetDocument(input, metadata));
                                }
                            }
                            return (IEnumerable <Common.Documents.IDocument>)documents;
                        }
                    }
                    return new[] { input };
                }
                catch (Exception ex)
                {
                    Trace.Warning("Exception while processing HTML for {0}: {1}", input.SourceString(), ex.Message);
                    return new[] { input };
                }
            }));
        }
Esempio n. 28
0
 public VideoWatchPageParser(IHtmlDocument root)
 {
     _root = root;
 }
Esempio n. 29
0
        public static IHtmlAnchorElement HasLink(string selector, IHtmlDocument document)
        {
            var element = Assert.Single(document.QuerySelectorAll(selector));

            return(Assert.IsAssignableFrom <IHtmlAnchorElement>(element));
        }
Esempio n. 30
0
        public static IHtmlFormElement HasForm(IHtmlDocument document)
        {
            var form = Assert.Single(document.QuerySelectorAll("form"));

            return(Assert.IsAssignableFrom <IHtmlFormElement>(form));
        }
        public IHtmlCollection <IElement> GetAnimeElements(IHtmlDocument doc)
        {
            var titles = doc.QuerySelectorAll("div.itemBox");

            return(titles);
        }
Esempio n. 32
0
 /// <summary>
 /// 将一个文档编译成一个方法
 /// </summary>
 /// <param name="document">要编译的文档</param>
 /// <returns>编译好的方法,文档可以透过此方法复原</returns>
 public static Func<IHtmlDomProvider, IHtmlDocument> Compile( IHtmlDocument document )
 {
     var method = CompileDynamicMethod( document );
     return new DynamicMethodHandler( method ).Invoke;
 }
Esempio n. 33
0
            private static void EmitCreateDocument( ILGenerator il, IHtmlDocument document )
            {
                //init           provider

                //CreateDocument document
                //dup            document, document
                //st container   document

                //ld provider    document, provider
                //ld container   document, provider, document
                //ld spec        document, provider, document, specS
                //SetHtmlSpecifi document, spec
                //pop            document

                //begin create element

                //ld provider    document, provider
                //ld container   document, provider, document
                //ld ElementName document, provider, document, name
                //create element document, element

                //dup            document, element, element
                //st container   document, element

                //begin create textNode

                //ld provider    document, element, provider
                //ld container   document, element, provider, element
                //ld text        document, element, provider, element, text
                //create text    document, element, textNode
                //pop            document, element

                //end create textNode

                //end create element
                //pop            document
                //dup            document, document
                //st container   document

                //end create document
                //complete   document
                //ret

                if ( document.DocumentUri == null )
                  il.Emit( OpCodes.Ldnull );
                else
                {
                  il.Emit( OpCodes.Ldstr, document.DocumentUri.AbsoluteUri );
                  il.Emit( OpCodes.Newobj, NewUri );
                }

                il.Emit( OpCodes.Callvirt, CreateDocument );

                il.Emit( OpCodes.Dup );
                il.Emit( OpCodes.Stloc_0 );// set container;

                il.Emit( OpCodes.Ldarg_0 );
                il.Emit( OpCodes.Ldloc_0 );
                il.Emit( OpCodes.Castclass, typeof( IHtmlDocument ) );
                il.Emit( OpCodes.Ldstr, document.HtmlSpecification.ToString() );
                il.Emit( OpCodes.Callvirt, SetHtmlSpecification );
                il.Emit( OpCodes.Pop );

                foreach ( var node in document.Nodes() )
                  EmitCreateNode( il, node );
            }
Esempio n. 34
0
 public void Parse(IHtmlDocument document, string gameName, string category, string serverLocation)
 {
 }
Esempio n. 35
0
 private bool IsValidDocument(IHtmlDocument document)
 {
     return(HtmlParseUtility.GetPageTitle(document) == "WORLD'S END レコード");
 }
Esempio n. 36
0
        private void DoSanitize(IHtmlDocument dom, IElement context, string baseUrl = "")
        {
            // remove non-whitelisted tags
            foreach (var tag in context.QuerySelectorAll("*").Where(t => !IsAllowedTag(t)).ToList())
            {
                RemoveTag(tag, RemoveReason.NotAllowedTag);
            }

            SanitizeStyleSheets(dom, baseUrl);

            // cleanup attributes
            foreach (var tag in context.QuerySelectorAll("*").OfType <IElement>().ToList())
            {
                // remove non-whitelisted attributes
                foreach (var attribute in tag.Attributes.Where(a => !IsAllowedAttribute(a)).ToList())
                {
                    RemoveAttribute(tag, attribute, RemoveReason.NotAllowedAttribute);
                }

                // sanitize URLs in URL-marked attributes
                foreach (var attribute in tag.Attributes.Where(IsUriAttribute).ToList())
                {
                    var url = SanitizeUrl(attribute.Value, baseUrl);
                    if (url == null)
                    {
                        RemoveAttribute(tag, attribute, RemoveReason.NotAllowedUrlValue);
                    }
                    else
                    {
                        tag.SetAttribute(attribute.Name, url);
                    }
                }

                // sanitize the style attribute
                SanitizeStyle(tag, baseUrl);

                var checkClasses = AllowedCssClasses != null;
                var allowedTags  = AllowedCssClasses?.ToArray() ?? new string[0];

                // sanitize the value of the attributes
                foreach (var attribute in tag.Attributes.ToList())
                {
                    // The '& Javascript include' is a possible method to execute Javascript and can lead to XSS.
                    // (see https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#.26_JavaScript_includes)
                    if (attribute.Value.Contains("&{"))
                    {
                        RemoveAttribute(tag, attribute, RemoveReason.NotAllowedValue);
                    }
                    else
                    {
                        if (checkClasses && attribute.Name == "class")
                        {
                            var removedClasses = tag.ClassList.Except(allowedTags).ToArray();

                            foreach (var removedClass in removedClasses)
                            {
                                RemoveCssClass(tag, removedClass, RemoveReason.NotAllowedCssClass);
                            }

                            if (!tag.ClassList.Any())
                            {
                                RemoveAttribute(tag, attribute, RemoveReason.ClassAttributeEmpty);
                            }
                        }
                        else if (string.IsNullOrEmpty(attribute.Value))
                        {
                            tag.RemoveAttribute(attribute.Name);
                        }
                    }
                }
            }

            RemoveComments(context);

            DoPostProcess(dom, context);
        }
Esempio n. 37
0
        private Requirement ParseRequirement(string pc_requirement)
        {
            Requirement requirement = new Requirement();

            HtmlParser    parser          = new HtmlParser();
            IHtmlDocument HtmlRequirement = parser.Parse(pc_requirement);

            // Only recent game
            foreach (var ElementRequirement in HtmlRequirement.QuerySelectorAll("li"))
            {
#if DEBUG
                logger.Debug($"SteamRequierements - {ElementRequirement.InnerHtml}");
#endif

                //<strong>OS:</strong> Windows XP / 7 / 8 / 8.1 / 10 x32 and x64<br> </ li >
                if (ElementRequirement.InnerHtml.IndexOf("<strong>OS") > -1)
                {
                    string os = ElementRequirement.InnerHtml
                                .Replace("\t", " ")
                                .Replace("<strong>OS:</strong>", string.Empty)
                                .Replace("with Platform Update for  7 ( versions only)", string.Empty)
                                .Replace("Win ", string.Empty)
                                .Replace("win ", string.Empty)
                                .Replace("windows", string.Empty)
                                .Replace("Windows", string.Empty)
                                .Replace("Microsoft", string.Empty)
                                .Replace("microsoft", string.Empty)
                                .Replace(", 32-bit", string.Empty)
                                .Replace(", 32bit", string.Empty)
                                .Replace(", 64-bit", string.Empty)
                                .Replace(", 64bit", string.Empty)
                                .Replace("®", string.Empty)
                                .Replace("+", string.Empty)
                                .Replace("and above", string.Empty)
                                .Replace("x32", string.Empty)
                                .Replace("and", string.Empty)
                                .Replace("x64", string.Empty)
                                .Replace("32-bit", string.Empty)
                                .Replace("32Bit", string.Empty)
                                .Replace("32 Bit", string.Empty)
                                .Replace("64-bit", string.Empty)
                                .Replace("64Bit", string.Empty)
                                .Replace("64 Bit", string.Empty)
                                .Replace("latest Service Pack", string.Empty)
                                .Replace("latest service pack", string.Empty)
                                .Replace("32-bit/64-bit", string.Empty)
                                .Replace("32bit/64bit", string.Empty)
                                .Replace("64-bit Operating System Required", string.Empty)
                                .Replace("32-bit Operating System Required", string.Empty)
                                .Replace(" Operating System Required", string.Empty)
                                .Replace("Operating System Required", string.Empty)
                                .Replace(" equivalent or better", string.Empty)
                                .Replace(" or equivalent.", string.Empty)
                                .Replace(" or equivalent", string.Empty)
                                .Replace(" or Newer", string.Empty)
                                .Replace(" or newer", string.Empty)
                                .Replace("or Newer", string.Empty)
                                .Replace("or newer", string.Empty)
                                .Replace("or later", string.Empty)
                                .Replace("or higher", string.Empty)
                                .Replace("()", string.Empty)
                                .Replace("<br>", string.Empty)
                                .Trim();

                    foreach (string sTemp in os.Replace(",", "¤").Replace(" or ", "¤").Replace("/", "¤").Split('¤'))
                    {
                        requirement.Os.Add(sTemp.Trim());
                    }
                }

                //< li >< strong > Processor:</ strong > Intel Core2 Duo E6320 or equivalent /\t AMD Athlon 64 X2 5000 + (2 * 2.6 GHz) or equivalent<br></ li >
                if (ElementRequirement.InnerHtml.IndexOf("<strong>Processor") > -1)
                {
                    string cpu = ElementRequirement.InnerHtml
                                 .Replace("\t", " ")
                                 .Replace("<strong>Processor:</strong>", string.Empty)
                                 .Replace("&nbsp;", string.Empty)
                                 .Replace("equivalent or higher processor", string.Empty)
                                 .Replace("- Low budget CPUs such as Celeron or Duron needs to be at about twice the CPU speed", string.Empty)
                                 .Replace(" equivalent or faster processor", string.Empty)
                                 .Replace(" equivalent or better", string.Empty)
                                 .Replace("above", string.Empty)
                                 .Replace("and up", string.Empty)
                                 .Replace("(or higher)", string.Empty)
                                 .Replace("or higher", string.Empty)
                                 .Replace(" or equivalent.", string.Empty)
                                 .Replace(" over", string.Empty)
                                 .Replace(" or faster", string.Empty)
                                 .Replace(" or better", string.Empty)
                                 .Replace(" or equivalent", string.Empty)
                                 .Replace(" or Equivalent", string.Empty)
                                 .Replace("4 CPUs", string.Empty)
                                 .Replace(", ~2.4GHz", string.Empty)
                                 .Replace(", ~3.1GHz", string.Empty)
                                 .Replace("ghz", "GHz")
                                 .Replace("Ghz", "GHz")
                                 .Replace("Processor", string.Empty)
                                 .Replace("processor", string.Empty)
                                 .Replace("(not recommended for Intel HD Graphics cards)", ", not recommended for Intel HD Graphics cards")
                                 .Replace("()", string.Empty)
                                 .Replace("<br>", string.Empty)
                                 .Trim();

                    cpu = Regex.Replace(cpu, ", ([0-9])", " $1");
                    cpu = Regex.Replace(cpu, "([0-9]),([0-9] GHz)", "$1.$2");
                    cpu = Regex.Replace(cpu, "([0-9])GHz", "$1 GHz");
                    cpu = Regex.Replace(cpu, "([0-9999])k", "$1K");
                    cpu = cpu.Replace(",", "¤").Replace(" / ", "¤").Replace(" or ", "¤").Replace(" OR ", "¤")
                          .Replace(" and ", "¤").Replace(" AND ", "¤").Replace(" | ", "¤");
                    foreach (string sTemp in cpu.Split('¤'))
                    {
                        requirement.Cpu.Add(sTemp.Trim());
                    }
                }

                //< li >< strong > Memory:</ strong > 2048 MB RAM<br></ li >
                if (ElementRequirement.InnerHtml.IndexOf("<strong>Memory") > -1)
                {
                    string ram = ElementRequirement.InnerHtml.ToLower()
                                 .Replace("\t", " ")
                                 .Replace("<strong>memory:</strong>", string.Empty)
                                 .Replace("ram", string.Empty)
                                 .Replace("of system", string.Empty)
                                 .Replace("<br>", string.Empty)
                                 .Trim();
                    ram = ram.Split('/')[ram.Split('/').Length - 1];

                    if (ram.ToLower().IndexOf("mb") > -1)
                    {
                        requirement.Ram = 1024 * 1024 * long.Parse(ram.ToLower().Replace("mb", string.Empty).Trim());
                    }
                    if (ram.ToLower().IndexOf("gb") > -1)
                    {
                        requirement.Ram = 1024 * 1024 * 1024 * long.Parse(ram.ToLower().Replace("gb", string.Empty).Trim());
                    }
                    requirement.RamUsage = SizeSuffix(requirement.Ram, true);
                }

                //< li >< strong > Graphics:</ strong > GeForce GT 440(1024 MB) or equivalent / Radeon HD 6450(512 MB) or equivalent / Iris Pro Graphics 5200(1792 MB) < br ></ li >
                if (ElementRequirement.InnerHtml.IndexOf("<strong>Graphics") > -1)
                {
                    string gpu = ElementRequirement.InnerHtml
                                 .Replace("\t", " ")
                                 .Replace("<strong>Graphics:</strong>", string.Empty)
                                 .Replace("ATI or NVidia card w/ 1024 MB RAM (NVIDIA GeForce GTX 260 or ATI HD 4890)", "NVIDIA GeForce GTX 260 or ATI HD 4890")
                                 .Replace("Video card must be 128 MB or more and should be a DirectX 9-compatible with support for Pixel Shader 2.0b (", string.Empty)
                                 .Replace("- *NOT* an Express graphics card).", string.Empty)
                                 .Replace("DirectX 11 class GPU with 1GB VRAM (", string.Empty)
                                 //.Replace(")<br>", string.Empty)
                                 .Replace("/320M 512MB VRAM", string.Empty)
                                 .Replace("/Intel Extreme Graphics 82845, 82865, 82915", string.Empty)
                                 .Replace(" 512MB VRAM (Intel integrated GPUs are not supported!)", " / Intel integrated GPUs are not supported!")
                                 .Replace("(not recommended for Intel HD Graphics cards)", ", not recommended for Intel HD Graphics cards")
                                 .Replace("or similar (no support for onboard cards)", string.Empty)
                                 .Replace("level Graphics Card (requires support for SSE)", string.Empty)
                                 .Replace("- Integrated graphics and very low budget cards might not work.", string.Empty)
                                 .Replace("Shader Model 3.0", string.Empty)
                                 .Replace("shader model 3.0", string.Empty)
                                 .Replace(" compatible", string.Empty)
                                 .Replace("Any", string.Empty)
                                 .Replace("any", string.Empty)
                                 .Replace("/Nvidia", " / Nvidia")
                                 .Replace("or AMD equivalent", string.Empty)
                                 .Replace("DX9 Compliant with PS3.0 support", string.Empty)
                                 .Replace("DX9 Compliant", string.Empty)

                                 .Replace("ATI or NVidia card", "Card")
                                 .Replace("w/", "with")
                                 .Replace("Graphics: ", string.Empty)
                                 .Replace(" equivalent or better", string.Empty)
                                 .Replace(" or equivalent.", string.Empty)
                                 .Replace("or equivalent.", string.Empty)
                                 .Replace(" or equivalent", string.Empty)
                                 .Replace(" or better.", string.Empty)
                                 .Replace("or better.", string.Empty)
                                 .Replace(" or better", string.Empty)
                                 .Replace(" or newer", string.Empty)
                                 .Replace("or newer", string.Empty)
                                 .Replace("or higher", string.Empty)
                                 .Replace("or better", string.Empty)
                                 .Replace("or greater graphics card", string.Empty)
                                 .Replace("or equivalent", string.Empty)
                                 .Replace("Mid-range", string.Empty)
                                 .Replace(" Memory Minimum", string.Empty)
                                 .Replace(" memory minimum", string.Empty)
                                 .Replace(" Memory Recommended", string.Empty)
                                 .Replace(" memory recommended", string.Empty)
                                 .Replace("e.g.", string.Empty)
                                 .Replace("Laptop integrated ", string.Empty)
                                 .Replace("GPU 1GB VRAM", "GPU 1 GB VRAM")
                                 .Replace("with 3GB system ram", "(3 GB)")
                                 .Replace("with 512MB", "(512 MB)")
                                 .Replace("(1Gb)", "(1 GB)")
                                 .Replace("(1GB)", "(1 GB)")
                                 .Replace(" 1GB", " (1 GB)")
                                 .Replace(" 2GB", " (2 GB)")
                                 .Replace("(2GB)", " (2 GB)")
                                 .Replace("(3GB)", " (3 GB)")
                                 .Replace("(4GB)", " (4 GB)")
                                 .Replace(" 6GB", " (6 GB)")
                                 .Replace(" 4GB", " (4 GB)")
                                 .Replace("8GB Memory 8 GB RAM", "(8 GB)")
                                 .Replace(" or more and should be a DirectX 9-compatible with support for Pixel Shader 3.0", string.Empty)
                                 .Replace(", or ", string.Empty)
                                 .Replace("()", string.Empty)
                                 .Replace("<br>", string.Empty)
                                 .Replace("  ", " ")
                                 .Replace(". Integrated Intel HD Graphics should work but is not supported; problems are generally solved with a driver update.", string.Empty)
                                 .Trim();

                    gpu = Regex.Replace(gpu, " - ([0-9]) GB", " ($1 GB)");
                    //gpu = Regex.Replace(gpu, "([0-9])Gb", "($1 GB)");
                    gpu = gpu.Replace(",", "¤").Replace(" or ", "¤").Replace(" OR ", "¤").Replace(" / ", "¤").Replace(" | ", "¤");
                    foreach (string sTemp in gpu.Split('¤'))
                    {
                        if (sTemp.Trim() != string.Empty)
                        {
                            requirement.Gpu.Add(sTemp.Trim());
                        }
                    }
                }
                if (ElementRequirement.InnerHtml.IndexOf("<strong>DirectX") > -1 && ElementRequirement.InnerHtml.IndexOf("8") > -1)
                {
                    requirement.Gpu.Add("DirectX 8");
                }
                if (ElementRequirement.InnerHtml.IndexOf("<strong>DirectX") > -1 && ElementRequirement.InnerHtml.IndexOf("9") > -1)
                {
                    requirement.Gpu.Add("DirectX 9");
                }
                if (ElementRequirement.InnerHtml.IndexOf("<strong>DirectX") > -1 && ElementRequirement.InnerHtml.IndexOf("10") > -1)
                {
                    requirement.Gpu.Add("DirectX 10");
                }
                if (ElementRequirement.InnerHtml.IndexOf("<strong>DirectX") > -1 && ElementRequirement.InnerHtml.IndexOf("11") > -1)
                {
                    requirement.Gpu.Add("DirectX 11");
                }


                //< li >< strong > DirectX:</ strong > Version 10 < br ></ li >
                //< li >< strong > Network:</ strong > Broadband Internet connection<br></ li >

                //< li >< strong > Storage:</ strong > 350 MB available space </ li >
                if (ElementRequirement.InnerHtml.IndexOf("<strong>Storage") > -1 || ElementRequirement.InnerHtml.IndexOf("<strong>Hard Drive") > -1)
                {
                    string storage = ElementRequirement.InnerHtml.ToLower()
                                     .Replace("\t", " ")
                                     .Replace("<strong>storage:</strong>", string.Empty)
                                     .Replace("<strong>hard drive:</strong>", string.Empty)
                                     .Replace("available space", string.Empty)
                                     .Replace("equivalent or better", string.Empty)
                                     .Replace("or equivalent", string.Empty)
                                     .Replace("hd space", string.Empty)
                                     .Replace("free space", string.Empty)
                                     .Replace("free hard drive space", string.Empty)
                                     .Replace("<br>", string.Empty)
                                     .Trim();

                    if (storage.IndexOf("mb") > -1)
                    {
                        requirement.Storage = 1024 * 1024 * double.Parse(storage.Replace("mb", string.Empty).Replace("available hard disk space", string.Empty).Trim());
                    }
                    if (storage.IndexOf("gb") > -1)
                    {
                        requirement.Storage = 1024 * 1024 * 1024 * double.Parse(storage.Replace("gb", string.Empty).Replace("available hard disk space", string.Empty).Trim());
                    }
                    requirement.StorageUsage = SizeSuffix(requirement.Storage);
                }
            }

            return(requirement);
        }
Esempio n. 38
0
 public ChannelPage(IHtmlDocument root) => _root = root;
Esempio n. 39
0
 private List <string> LinkTexts(IHtmlDocument html)
 {
     return(html.QuerySelectorAll(LinkQuerySelector)
            .Select(e => e.TextContent).ToList());
 }
Esempio n. 40
0
 protected override void GetBody(Article _article, IHtmlDocument fullArticle)
 {
     _article.Body = fullArticle.QuerySelector("div.section-content").QuerySelector("div.content").TextContent;
 }
Esempio n. 41
0
 private static ushort EndorsementLevel(IHtmlDocument doc)
 {
     ushort.TryParse(doc.QuerySelector("div.EndorsementIcon-tooltip div.u-center")?.TextContent, out ushort parsedEndorsementLevel);
     return(parsedEndorsementLevel);
 }
Esempio n. 42
0
 private static TranslationTerm[] ExtractTerms( IHtmlDocument document )
 {
   return document.DescendantNodes()
     .OfType<IHtmlTextNode>()
     .Where( IsTranslatable )
     .Select( t => new TranslationTerm( t ) )
     .ToArray();
 }
Esempio n. 43
0
        /// <summary>
        /// 实现IHtmlHandler接口
        /// </summary>
        /// <param name="context">HTTP 上下文</param>
        /// <param name="document">要处理的文档</param>
        void IHtmlHandler.ProcessDocument( HttpContextBase context, IHtmlDocument document )
        {
            _httpContext = context;//如果这里是入口,即被当作IHtmlHandler调用时,需要设置Context供派生类使用
              Document = document;

              OnPreProcessDocument();

              Trace.Write( "Jumony Web", "Begin Process Document." );
              ProcessDocument();
              Trace.Write( "Jumony Web", "End Process Document." );

              OnPostProcessDocument();

              AddGeneratorMetaData();//为处理后的文档加上Jumony生成器的meta信息。
        }
Esempio n. 44
0
 public TranslateTask( IHtmlDocument document, TranslationTerm[] terms )
 {
   this.document = document;
   Terms = terms;
 }
Esempio n. 45
0
 private static string EnsureUniqueness( string identity, IHtmlDocument document )
 {
     return EnsureUniqueness( identity, document.Descendants().Select( element => element.Attribute( "id" ).Value() ).NotNull() );
 }
Esempio n. 46
0
 public IEnumerable <Uri> Extract(IHtmlDocument html)
 {
     return(html.QuerySelectorAll("a[href*=\"/peli-\"]")
            .Select(e => e.GetAttribute("href"))
            .Select(relativeUri => new Uri(WebUri, relativeUri)));
 }
Esempio n. 47
0
 public static Func<IHtmlDocument> Compile( IHtmlDocument document, IHtmlDomProvider provider )
 {
     var method = CompileDynamicMethod( document );
     return method.CreateDelegate( typeof( Func<IHtmlDocument> ), provider ).CastTo<Func<IHtmlDocument>>();
 }
Esempio n. 48
0
 /// <summary>
 /// Parses the HTML document for detail information. This can be overridden by sub-classes to implement custom detail information.
 /// </summary>
 /// <param name="htmlDocument">The HTML document, which is to be parsed.</param>
 protected virtual void ParseDetailInformation(IHtmlDocument htmlDocument) { }
Esempio n. 49
0
            /// <summary>
            /// 将文档编译成一个动态方法,为下一步转换成委托做准备。
            /// </summary>
            /// <param name="document">要编译的文档</param>
            /// <returns>编译好的动态方法</returns>
            private static DynamicMethod CompileDynamicMethod( IHtmlDocument document )
            {
                var method = new DynamicMethod( "", typeof( IHtmlDocument ), new[] { typeof( IHtmlDomProvider ) } );

                var il = method.GetILGenerator();

                il.DeclareLocal( typeof( IHtmlContainer ) );

                il.Emit( OpCodes.Ldarg_0 );                         //ld provider        provider
                il.Emit( OpCodes.Dup );                             //dup                provider provider

                EmitCreateDocument( il, document );                 //create document    provider document

                il.Emit( OpCodes.Callvirt, CompleteDocument );      //complete document  document

                il.Emit( OpCodes.Ret );

                return method;
            }
Esempio n. 50
0
 private static string PortraitImage(IHtmlDocument doc) => doc.QuerySelector(".player-portrait").GetAttribute("src");
Esempio n. 51
0
        public static List <Dictionary <string, string> > _queryItems(this Config config, IHtmlDocument dom)
        {
            List <Dictionary <string, string> > result = new List <Dictionary <string, string> >();
            var elements = dom.QuerySelectorAll(string.IsNullOrEmpty(config.QuerySelectorAll) ? "body" : config.QuerySelectorAll);

            foreach (var e in elements)
            {
                Dictionary <string, string> itemResult = new Dictionary <string, string>();
                if (config.QueryItems != null)
                {
                    foreach (var q in config.QueryItems)
                    {
                        if (string.IsNullOrEmpty(q.KeyName))
                        {
                            continue;
                        }
                        var data = q._queryItems(e);
                        itemResult.Add(q.KeyName, data?.Trim());
                    }
                }
                if (itemResult.Count > 0)
                {
                    result.Add(itemResult);
                }
            }
            return(result);
        }
Esempio n. 52
0
            public MTReleaseInfo Extract(IHtmlDocument html)
            {
                var release = new MTReleaseInfo();

                release.IsMovie = true;
                var selectors          = html.QuerySelectorAll("b");
                var titleSelector      = html.QuerySelector("span>b");
                var titleSelector3do4k = html.QuerySelector("span:nth-child(4) > b:nth-child(1)");

                try
                {
                    var title = titleSelector.TextContent;
                    if (title.Contains("("))
                    {
                        title = title.Substring(0, title.IndexOf("(")).Trim();
                    }
                    release.TitleOriginal = title;
                }
                catch { }
                try
                {
                    var year = selectors.Where(s => s.TextContent.ToLower().Contains("año"))
                               .First().NextSibling.TextContent.Trim();
                    release.Year           = Int32.Parse(year);
                    release.TitleOriginal += " (" + year + ")";
                } catch { }
                try
                {
                    var dateStr = selectors.Where(s => s.TextContent.ToLower().Contains("fecha"))
                                  .First().NextSibling.TextContent.Trim();
                    var date = Convert.ToDateTime(dateStr);
                    release.PublishDate = date;
                } catch { }
                try
                {
                    var sizeStr = selectors.Where(s => s.TextContent.ToLower().Contains("tamaño"))
                                  .First().NextSibling.TextContent.Trim();
                    Regex rgx = new Regex(@"[^0-9,.]");
                    long  size;
                    if (sizeStr.ToLower().Trim().EndsWith("mb"))
                    {
                        size = ReleaseInfo.BytesFromMB(float.Parse(rgx.Replace(sizeStr, "")));
                    }
                    else
                    {
                        sizeStr = rgx.Replace(sizeStr, "").Replace(",", ".");
                        size    = ReleaseInfo.BytesFromGB(float.Parse(rgx.Replace(sizeStr, "")));
                    }
                    release.Size = size;
                } catch { }
                try
                {
                    var category = selectors.Where(s => s.TextContent.ToLower().Contains("formato"))
                                   .First().NextSibling.TextContent.Trim();
                    release.CategoryText = category;
                } catch { }
                try
                {
                    var title = titleSelector.TextContent;
                    if (title.Contains("(") && title.Contains(")") && title.Contains("3D"))
                    {
                        release.CategoryText = "3D";
                    }
                } catch { }
                try
                {
                    var title = titleSelector.TextContent;
                    if (title.Contains("(") && title.Contains(")") && title.Contains("4K"))
                    {
                        release.CategoryText = "4K";
                    }
                } catch { }
                try
                {
                    var title = titleSelector3do4k.TextContent;
                    if (title.Contains("[") && title.Contains("]") && title.Contains("3D"))
                    {
                        release.CategoryText = "3D";
                    }
                } catch { }
                try
                {
                    var title = titleSelector3do4k.TextContent;
                    if (title.Contains("[") && title.Contains("]") && title.Contains("4K"))
                    {
                        release.CategoryText = "4K";
                    }
                } catch { }
                try
                {
                    var link = html.QuerySelector("a[href*=\"sec=descargas\"]").GetAttribute("href");
                    release.Link = new Uri(WebUri, link);
                    release.Guid = release.Link;
                } catch { }
                return(release);
            }
Esempio n. 53
0
        protected override void GetUrlMainImg(Article _article, IElement reducedArticle, IHtmlDocument fullArticle)
        {
            string imgurl = fullArticle.QuerySelector("div.section-content").QuerySelector("figure.msnry_item").QuerySelector("img").Attributes["src"].Value;

            _article.UrlMainImg = imgurl;
        }
Esempio n. 54
0
        protected override void GetDateTime(Article _article, IElement reducedArticle, IHtmlDocument fullArticle)
        {
            string dateSource = fullArticle.QuerySelector("div.post-container").QuerySelector("div.post-pub").QuerySelector("time").Attributes["datetime"].Value;

            _article.DateTime = DateTime.Parse(dateSource);
        }
Esempio n. 55
0
    private static TranslationTerm[] CreateTermsData( IHtmlDocument document, string path )
    {
      var terms = ExtractTerms( document );


      Save( path, terms );

      return terms;
    }
Esempio n. 56
0
        /// <summary>
        /// Analyzes the wiki page to determine which layout was used
        /// </summary>
        /// <param name="doc">html object</param>
        /// <returns>Layout of the wiki page</returns>
        private PageLayout GetLayout(IHtmlDocument doc)
        {
            string spanValue = "";
            var    spanTags  = doc.All.Where(p => p.LocalName == "span" && p.HasAttribute("id"));

            if (spanTags.Any())
            {
                foreach (var span in spanTags)
                {
                    if (span.GetAttribute("id").Equals("layoutsdata", StringComparison.InvariantCultureIgnoreCase))
                    {
                        spanValue = span.InnerHtml.ToLower();

                        if (spanValue == "false,false,1")
                        {
                            return(PageLayout.Wiki_OneColumn);
                        }
                        else if (spanValue == "false,false,2")
                        {
                            var tdTag = doc.All.Where(p => p.LocalName == "td" && p.HasAttribute("style")).FirstOrDefault();
                            if (tdTag != null)
                            {
                                if (tdTag.GetAttribute("style").IndexOf("width:49.95%;", StringComparison.InvariantCultureIgnoreCase) > -1)
                                {
                                    return(PageLayout.Wiki_TwoColumns);
                                }
                                else if (tdTag.GetAttribute("style").IndexOf("width:66.6%;", StringComparison.InvariantCultureIgnoreCase) > -1)
                                {
                                    return(PageLayout.Wiki_TwoColumnsWithSidebar);
                                }
                                else
                                {
                                    return(PageLayout.Wiki_TwoColumns);
                                }
                            }
                        }
                        else if (spanValue == "true,false,2")
                        {
                            return(PageLayout.Wiki_TwoColumnsWithHeader);
                        }
                        else if (spanValue == "true,true,2")
                        {
                            return(PageLayout.Wiki_TwoColumnsWithHeaderAndFooter);
                        }
                        else if (spanValue == "false,false,3")
                        {
                            return(PageLayout.Wiki_ThreeColumns);
                        }
                        else if (spanValue == "true,false,3")
                        {
                            return(PageLayout.Wiki_ThreeColumnsWithHeader);
                        }
                        else if (spanValue == "true,true,3")
                        {
                            return(PageLayout.Wiki_ThreeColumnsWithHeaderAndFooter);
                        }
                    }
                }
            }

            // Oops, we're still here...let's try to deduct a layout as some pages (e.g. from community template) do not add the proper span value
            if (spanValue.StartsWith("false,false,") || spanValue.StartsWith("true,true,") || spanValue.StartsWith("true,false,"))
            {
                // false,false,&#123;0&#125; case..let's try to count the columns via the TD tag data
                var tdTags = doc.All.Where(p => p.LocalName == "td" && p.HasAttribute("style"));
                if (spanValue.StartsWith("false,false,"))
                {
                    if (tdTags.Count() == 1)
                    {
                        return(PageLayout.Wiki_OneColumn);
                    }
                    else if (tdTags.Count() == 2)
                    {
                        if (tdTags.First().GetAttribute("style").IndexOf("width:49.95%;", StringComparison.InvariantCultureIgnoreCase) > -1)
                        {
                            return(PageLayout.Wiki_TwoColumns);
                        }
                        else if (tdTags.First().GetAttribute("style").IndexOf("width:66.6%;", StringComparison.InvariantCultureIgnoreCase) > -1)
                        {
                            return(PageLayout.Wiki_TwoColumnsWithSidebar);
                        }
                        else
                        {
                            return(PageLayout.Wiki_TwoColumns);
                        }
                    }
                    else if (tdTags.Count() == 3)
                    {
                        return(PageLayout.Wiki_ThreeColumns);
                    }
                }
                else if (spanValue.StartsWith("true,true,"))
                {
                    if (tdTags.Count() == 2)
                    {
                        return(PageLayout.Wiki_TwoColumnsWithHeaderAndFooter);
                    }
                    else if (tdTags.Count() == 3)
                    {
                        return(PageLayout.Wiki_ThreeColumnsWithHeaderAndFooter);
                    }
                }
                else if (spanValue.StartsWith("true,false,"))
                {
                    if (tdTags.Count() == 2)
                    {
                        return(PageLayout.Wiki_TwoColumnsWithHeader);
                    }
                    else if (tdTags.Count() == 3)
                    {
                        return(PageLayout.Wiki_ThreeColumnsWithHeader);
                    }
                }
            }

            return(PageLayout.Wiki_Custom);
        }
Esempio n. 57
0
 protected override void GetTitle(Article _article, IElement reducedArticle, IHtmlDocument fullArticle)
 {
     _article.Title = reducedArticle.QuerySelector("h2").QuerySelector("a").TextContent;
 }
Esempio n. 58
0
        private static string PlayerId(IHtmlDocument doc)
        {
            var lastScript = doc.QuerySelectorAll("script").Last().TextContent;

            return(PlayerIdRegex.Match(lastScript).Value);
        }
Esempio n. 59
0
 public TranslateTask( IHtmlDocument document )
 {
   _document = document;
 }
Esempio n. 60
0
 protected override void GetSummary(Article _article, IElement reducedArticle, IHtmlDocument fullArticle)
 {
     _article.Summary = reducedArticle.QuerySelector("div.news_post_excerpt").TextContent.Trim();
 }