コード例 #1
0
ファイル: XmlDApi.cs プロジェクト: xCMNx/BooruViewer.Net
 public IList<DataRecord> GetData(byte[] Data, string Host, IParserSettings Settings)
 {
     try
     {
         var text = System.Text.Encoding.UTF8.GetString(Data);
         var r = new List<DataRecord>();
         var x = XmlToDynamic.Parse(text);
         if (x != null)
             foreach (var o in x.post)
                 if (o.md5 != null)
                     r.Add(new DataRecord()
                     {
                         MD5 = o.md5,
                         Tags = o.tags.Split(' '),
                         Rating = o.rating != null ? (DataRating)o.rating[0] : DataRating.Questionable,
                         Servers = new[]{ new DataServer(){
                             Post = intParseOrDefault(o.id, -1),
                             Server = Host,
                             Size = intParseOrDefault(o.file_size, -1),
                             ParentPost = intParseOrDefault(o.parent_id, -1),
                             Autor = o.author,
                             Ext = Path.GetExtension(o.file_url),
                         }}
                     });
         return r;
     }
     catch (Exception e)
     {
         throw new ParserException(e);
     }
 }
コード例 #2
0
ファイル: HtmlLoader.cs プロジェクト: NovikovDaniil/Parsing
        public HtmlLoader(IParserSettings settings)
        {
            client = new HttpClient();

            //построение ссылки
            url = $"{settings.BaseUrl}{settings.Prefix}/";
        }
コード例 #3
0
        public HtmlLoader(IParserSettings settings)
        {
            client = new HttpClient();

            //Building an url string with BaseUrl+Prefix
            url = $"{settings.BaseUrl}/{settings.Prefix}";
        }
コード例 #4
0
 ICanSpecifyFormatter ICanSpecifyParser.WithParser <T>(
     IParserSettings <T> settings)
 {
     _collection.AddSingleton <IParser, T>();
     _collection.Add(new ServiceDescriptor(settings.GetType(), settings));
     return(new CanSpecifyFormatter(_collection));
 }
コード例 #5
0
        private static void PageCrawlCompleted(object sender, PageCrawlCompletedArgs e)
        {
            if (e.CrawledPage.HttpRequestException != null)
            {
                return;
            }

            IParserSettings settings = e.CrawlContext.CrawlBag.Settings;

            if (!settings.IsPageParseAllowed(e.CrawledPage))
            {
                return;
            }

            IParser <NewsData> parser = e.CrawlContext.CrawlBag.Parser;

            var news = parser.Parse(e.CrawledPage.AngleSharpHtmlDocument, e.CrawledPage.Uri.AbsoluteUri);

            if (news == null) // something went wrong
            {
                return;
            }

            news_list.Add(news);


            var splitted = TextProcessingHelper.TextSplittingAndRemovingSymbols(news.Text);

            TextProcessingHelper.CountFrequentWords(ref words_dictionary, splitted);



            Console.WriteLine(e.CrawledPage.Uri);
            Console.WriteLine("=================================");
        }
コード例 #6
0
 public EkSiteParser(IMainPageParser mainPageParser, IDetailsPageParser detailsPageParser, IParserSettings parserSettings, IHttpGetter httpGetter)
 {
     _mainPageParser    = mainPageParser;
     _detailsPageParser = detailsPageParser;
     _httpGetter        = httpGetter;
     _parserSettings    = parserSettings;
 }
コード例 #7
0
 public IList<DataRecord> GetData(byte[] Data, string Host, IParserSettings Settings)
 {
     try
     {
         var text = System.Text.Encoding.UTF8.GetString(Data);
         var tkn = Newtonsoft.Json.Linq.JToken.Parse(text);
         return tkn.Select(r => new DataRecord()
         {
             MD5 = (string)r["md5"],
             Rating = (DataRating)((string)r["rating"])[0],
             Tags = ((string)r["tags"])?.Split(' ').ToArray(),
             Servers = new[]{
                 new DataServer()
                 {
                     Post = IntOrDefault(r["id"]),
                     Server = Host,
                     subServers = new string[] { new Uri((string)r["file_url"]).Authority },
                     Size = IntOrDefault(r["file_size"]),
                     ParentPost = IntOrDefault(r["parent_id"]),
                     Autor = (string)r["author"],
                     Ext = Path.GetExtension((string)r["file_url"])
                 }
             }
         }).ToArray();
     }
     catch (Exception e)
     {
         throw new ParserException(e);
     }
 }
コード例 #8
0
 public Worker(IEkSiteParser ekSiteParser, IVkPostCreator vkPostCreator, IParserSettings parserSettings)
 {
     _ekSiteParser   = ekSiteParser;
     _vkPostCreator  = vkPostCreator;
     _parserSettings = parserSettings;
     _logger         = LogManager.GetLogger(GetType().Name);
 }
コード例 #9
0
        public async Task <IEnumerable <HardwareItemRequest> > ParseItems(IParserSettings settings, string type)
        {
            var items = new List <HardwareItemRequest>();

            var productsId = await ParseProductId(settings);


            foreach (var id in productsId)
            {
                await Task.Delay(10000); /// lazy way to avoid captcha

                try
                {
                    var item = await ParseProductItem($"{settings.BaseUrl}/{id}");

                    item.HardwareType = type;

                    items.Add(item);
                }
                catch (HttpRequestException ex)
                {
                    logger.LogError($"{ex.Message}: {items.Count}");
                    break;
                }
                catch (Exception ex)
                {
                    logger.LogError($"{ex.Message} at the {settings.BaseUrl}/{id}");
                    continue;
                }
            }

            return(items);
        }
コード例 #10
0
 public ParserWorker(IParser <T> parser, IParserSettings parserSettings)
 {
     _logger         = (new LoggerFactory()).GetLogger();
     _parser         = parser;
     _parserSettings = parserSettings;
     _loader         = new HtmlLoader(_parserSettings.BaseUrl);
 }
コード例 #11
0
 public Loader(IParserSettings settings)
 {
     client = new HttpClient();
     url    = $"{settings.EngineUrl}" +
              $"&url={settings.ImageUrl}"; //image_url
                                           //$"&start={{CountPage}}";
 }
コード例 #12
0
 public HtmlLoader(IParserSettings settings)
 {
     _client = new HttpClient();
     // Индентификации на сайте, который парсится.
     _client.DefaultRequestHeaders.Add("User", "HtmlParser");
     _inputFilePath  = settings.InputFilePath;
     _outputFilePath = settings.OutputFilePath;
 }
コード例 #13
0
ファイル: HtmlLoader.cs プロジェクト: KurkumaApp/Parser
 public HtmlLoader(IParserSettings settings)
 {
     client = new HttpClient();
     client.DefaultRequestHeaders.TryAddWithoutValidation("Accept", "text/html,application/xhtml+xml,application/xml");
     client.DefaultRequestHeaders.TryAddWithoutValidation("Accept-Encoding", "gzip, deflate");
     client.DefaultRequestHeaders.TryAddWithoutValidation("User-Agent", "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:19.0) Gecko/20100101 Firefox/19.0");
     client.DefaultRequestHeaders.TryAddWithoutValidation("Accept-Charset", "ISO-8859-1");
     url = $"{settings.BaseUrl}/{settings.CategoryName}/{settings.GuideName}/{settings.PartNumber}.{settings.ArticleNumber}.php";
 }
コード例 #14
0
ファイル: VacancyService.cs プロジェクト: Zilikaks/SuperParse
 private void UpdateSettings(string companyName)
 {
     _settings = new VacancyParserSettings
     {
         BaseUrl    = $"https://api.hh.ru/vacancies?area=16&search_field=company_name&text={companyName}&per_page=100",
         Pagination = "&page=",
         StartPage  = 0,
         EndPage    = 7
     };
 }
コード例 #15
0
 /// <summary>
 /// Добавить биржу.
 /// </summary>
 /// <param name="parser"> Биржа. </param>
 /// <param name="settings"> Настройки биржи. </param>
 public void AddParser(IParser parser, IParserSettings settings)
 {
     try
     {
         parsers.Add(parser, settings);
     }
     catch (ArgumentException)
     {
         throw new ArgumentException("Такая биржа уже добавлена.");
     }
 }
コード例 #16
0
 public HtmlLoader(IParserSettings setting, int category)
 {
     Client = new HttpClient();
     if (category == 21)
     {
         URL = $"{setting.BaseURL.Replace("{Category}", category.ToString()).Replace("%D0%B1%D1%83%D0%BA%D0%B2", "буквы")}/{setting.Prefix}/";
     }
     else
     {
         URL = $"{setting.BaseURL.Replace("{Category}", category.ToString())}/{setting.Prefix}/";
     }
 }
コード例 #17
0
ファイル: Parser.tt.cs プロジェクト: LosManos/St4mpede
		internal void Init(string hostTemplateFile, string configFilename)
		{
			if (null == hostTemplateFile) { throw new ArgumentNullException("hostTemplateFile"); }

			var configPath = Path.GetDirectoryName(hostTemplateFile);
			configFilename = configFilename ?? Core.DefaultConfigFilename;

            var doc = Core.ReadConfig(
				configPath, 
				configFilename);
			_coreSettings = Core.Init(doc);
			_settings = ParserSettings.Init(configPath, configFilename, doc);
		}
コード例 #18
0
ファイル: OptionMap.cs プロジェクト: schallm/commandline
        /// <summary>
        /// Initializes a new instance of the <see cref="OptionMap"/> class.
        /// It is internal rather than private for unit testing purpose.
        /// </summary>
        /// <param name="capacity">Initial internal capacity.</param>
        /// <param name="settings">Parser settings instance.</param>
        internal OptionMap(int capacity, IParserSettings settings)
        {
            this.settings = settings;

            IEqualityComparer<string> comparer =
                this.settings.CaseSensitive ? StringComparer.Ordinal : StringComparer.OrdinalIgnoreCase;
            this.names = new Dictionary<string, string>(capacity, comparer);
            this.map = new Dictionary<string, OptionInfo>(capacity * 2, comparer);

            if (this.settings.MutuallyExclusive)
            {
                this.mutuallyExclusiveSetMap = new Dictionary<string, MutuallyExclusiveInfo>(capacity, StringComparer.OrdinalIgnoreCase);
            }
        }
コード例 #19
0
        public OffsetGrammar(IParserSettings settings) : base(settings)
        {
            ParseAction <Chain <char, char> > action  = Grammar.Opt <char>(CharGrammar.Ch('\r')).And <char, char>(CharGrammar.Ch('\n').Or <char>(CharGrammar.ChSTX()).Or <char>(CharGrammar.ChETX()));
            ParseAction <Chain <char, char> > action2 = Grammar.Opt <char>(CharGrammar.Ch('\r')).And <char, char>(CharGrammar.Ch('\n').Or <char>(CharGrammar.ChSTX()));
            ParseAction <Chain <char, char> > cond    = Grammar.Opt <char>(CharGrammar.Ch('\r')).And <char, char>(CharGrammar.Ch('\n').Or <char>(CharGrammar.ChETX()));
            ParseAction <IndentationNode>     action4 = action2.And <Chain <char, char>, IList <char> >(Grammar.Rep <char>(CharGrammar.Ch(new char[] { ' ', '\t' }))).NotNext <Chain <Chain <char, char>, IList <char> >, Chain <char, char> >(cond).Build <Chain <Chain <char, char>, IList <char> >, IndentationNode>(hit => new IndentationNode(hit.Down));

            Grammar.Rep1 <char>(CharGrammar.Ch(new char[] { ' ', '\t' }));
            ParseAction <IList <char> >  action5  = Grammar.Rep <char>(CharGrammar.Ch(new char[] { ' ', '\t' }));
            ParseAction <Node[]>         action6  = action2.And <Chain <char, char>, IList <char> >(Grammar.Rep <char>(CharGrammar.Ch(new char[] { ' ', '\t' }))).IfNext <Chain <Chain <char, char>, IList <char> >, Chain <char, char> >(cond).Build <Chain <Chain <char, char>, IList <char> >, Node[]>(hit => new Node[0]);
            ParseAction <char>           parse    = CharGrammar.Ch(new Func <char, bool>(char.IsLetterOrDigit)).Or <char>(CharGrammar.Ch(new char[] { '-', '_', ':' }));
            ParseAction <string>         action8  = CharGrammar.Ch(new Func <char, bool>(char.IsLetter)).Or <char>(CharGrammar.Ch(new char[] { '_', ':' })).And <char, IList <char> >(Grammar.Rep <char>(parse)).Build <Chain <char, IList <char> >, string>(hit => hit.Left + new string(hit.Down.ToArray <char>()));
            ParseAction <TextNode>       action9  = CharGrammar.Ch('|').And <char, IList <char> >(Grammar.Rep <char>(CharGrammar.Ch((Func <char, bool>)(_ => true)).Unless <char, Chain <char, char> >(action))).Build <Chain <char, IList <char> >, TextNode>(hit => new TextNode(hit.Down));
            ParseAction <Node>           action10 = base.AsNode <EntityNode>(base.EntityRef).Or <Node>(base.AsNode <ExpressionNode>(base.Code));
            ParseAction <TextNode>       parser   = Grammar.Rep1 <char>(CharGrammar.Ch((Func <char, bool>)(ch => true)).Unless <char, Chain <char, char> >(cond).Unless <char, Node>(action10)).Build <IList <char>, TextNode>(hit => new TextNode(hit));
            ParseAction <IList <Node> >  action12 = CharGrammar.Ch('|').And <char, IList <Node> >(Grammar.Rep <Node>(base.AsNode <TextNode>(parser).Or <Node>(action10))).Build <Chain <char, IList <Node> >, IList <Node> >(hit => hit.Down);
            ParseAction <ExpressionNode> action13 = CharGrammar.Ch('=').And <char, Snippets>(base.LimitedExpression(cond.Build <Chain <char, char>, string>(x => ""))).Build <Chain <char, Snippets>, ExpressionNode>(hit => new ExpressionNode(hit.Down)
            {
                AutomaticEncoding = true
            });
            ParseAction <StatementNode> action14 = CharGrammar.Ch('-').And <char, IList <Snippet> >(base.Statement1).Build <Chain <char, IList <Snippet> >, StatementNode>(hit => new StatementNode(hit.Down));
            ParseAction <StatementNode> action15 = CharGrammar.Ch("@{").And <string, Snippets>(base.LimitedExpression(CharGrammar.Ch("}"))).And <Chain <string, Snippets>, char>(CharGrammar.Ch('}')).Build <Chain <Chain <string, Snippets>, char>, StatementNode>(hit => new StatementNode(hit.Left.Down));
            ParseAction <StatementNode> action16 = action14.Or <StatementNode>(action15);
            ParseAction <string>        action17 = CharGrammar.Ch('#').And <char, IList <char> >(Grammar.Rep <char>(CharGrammar.Ch(new Func <char, bool>(char.IsLetterOrDigit)).Or <char>(CharGrammar.Ch(new char[] { '-', '_' })))).Skip <Chain <char, IList <char> >, IList <char> >(action5).Build <Chain <char, IList <char> >, string>(hit => new string(hit.Down.ToArray <char>()));
            ParseAction <string>        action18 = CharGrammar.Ch('.').And <char, IList <char> >(Grammar.Rep <char>(CharGrammar.Ch(new Func <char, bool>(char.IsLetterOrDigit)).Or <char>(CharGrammar.Ch(new char[] { '-', '_' })))).Skip <Chain <char, IList <char> >, IList <char> >(action5).Build <Chain <char, IList <char> >, string>(hit => new string(hit.Down.ToArray <char>()));
            var action19 = Grammar.Rep <string>(action18).And <IList <string>, string>(Grammar.Opt <string>(action17)).And <Chain <IList <string>, string>, IList <string> >(Grammar.Rep <string>(action18)).Build(hit => new { id = hit.Left.Down, classes = hit.Left.Left.Concat <string>(hit.Down) });
            var action20 = Grammar.Rep <string>(action18).And <IList <string>, string>(action17).And <Chain <IList <string>, string>, IList <string> >(Grammar.Rep <string>(action18)).Or <Chain <Chain <IList <string>, string>, IList <string> > >(Grammar.Rep1 <string>(action18).And <IList <string>, string>(Grammar.Opt <string>(action17)).And <Chain <IList <string>, string>, IList <string> >(Grammar.Rep <string>(action18))).Or <Chain <Chain <IList <string>, string>, IList <string> > >(Grammar.Rep <string>(action18).And <IList <string>, string>(Grammar.Opt <string>(action17)).And <Chain <IList <string>, string>, IList <string> >(Grammar.Rep1 <string>(action18))).Build(hit => new { id = hit.Left.Down, classes = hit.Left.Left.Concat <string>(hit.Down) });
            ParseAction <ElementNode>   action22 = action8.Skip <string, IList <char> >(action5).And(action19).Or(Grammar.Opt <string>(action8).And(action20)).Build(hit => new { name = hit.Left ?? "div", attrs = ((hit.Down.id != null) ? new AttributeNode[] { new AttributeNode("id", hit.Down.id) } : new AttributeNode[0]).Concat <AttributeNode>(hit.Down.classes.Any <string>() ? new AttributeNode[] { new AttributeNode("class", string.Join(" ", hit.Down.classes.ToArray <string>())) } : new AttributeNode[0]) }).And(Grammar.Rep <AttributeNode>(base.Attribute.Skip <AttributeNode, IList <char> >(action5))).Build(hit => new ElementNode(hit.Left.name, hit.Left.attrs.Concat <AttributeNode>(hit.Down).ToList <AttributeNode>(), false));
            ParseAction <IList <Node> > action23 = action12.Or <IList <Node> >(action13.Build <ExpressionNode, IList <Node> >(hit => ((IList <Node>) new Node[] { hit }))).Or <IList <Node> >(action16.Build <StatementNode, IList <Node> >(hit => (IList <Node>) new Node[] { hit }));
            ParseAction <Chain <ElementNode, IList <Node> > > action24 = action22.Skip <ElementNode, IList <char> >(action5).And <ElementNode, IList <Node> >(Grammar.Opt <IList <Node> >(action23));
            ParseAction <Node[]>          action25 = action4.And <IndentationNode, Chain <ElementNode, IList <Node> > >(action24).Build <Chain <IndentationNode, Chain <ElementNode, IList <Node> > >, Node[]>(hit => new Node[] { hit.Left, hit.Down.Left }.Concat <Node>((hit.Down.Down ?? ((IList <Node>) new Node[0]))).ToArray <Node>());
            ParseAction <Node[]>          action26 = action4.And <IndentationNode, IList <Node> >(action12).Build <Chain <IndentationNode, IList <Node> >, Node[]>(hit => new Node[] { hit.Left }.Concat <Node>(hit.Down).ToArray <Node>());
            ParseAction <Node[]>          action27 = action4.And <IndentationNode, ExpressionNode>(action13).Build <Chain <IndentationNode, ExpressionNode>, Node[]>(hit => new Node[] { hit.Left, hit.Down });
            ParseAction <Node[]>          action28 = action4.And <IndentationNode, StatementNode>(action16).Build <Chain <IndentationNode, StatementNode>, Node[]>(hit => new Node[] { hit.Left, hit.Down });
            ParseAction <Node[]>          action29 = action24.Build <Chain <ElementNode, IList <Node> >, Node[]>(hit => new Node[] { hit.Left }.Concat <Node>((hit.Down ?? ((IList <Node>) new Node[0]))).ToArray <Node>());
            ParseAction <Node[]>          action30 = action6.Or <Node[]>(action25).Or <Node[]>(action26).Or <Node[]>(action27).Or <Node[]>(action28).Or <Node[]>(action29).Skip <Node[], IList <char> >(action5);
            ParseAction <IList <Node[]> > action31 = Grammar.Rep <Node[]>(action30);

            this.Indentation      = action4;
            this.TestLine         = action30;
            this.OffsetElement    = action22;
            this.OffsetText       = action9;
            this.OffsetTexts      = action12;
            this.OffsetExpression = action13;
            this.OffsetStatement  = action16;
            this.OffsetNodes      = action31.Build <IList <Node[]>, IList <Node> >(hit => (from nodes in hit select from node in nodes
                                                                                           where node != null
                                                                                           select node).ToList <Node>());
        }
コード例 #20
0
        private async Task GetCategory(IParser parser, IParserSettings settings)
        {
            var loader = new HtmlLoader(settings);
            var source = await loader.GetSourceByMainPage();

            var domParser = new HtmlParser();

            var document = await domParser.ParseDocumentAsync(source);

            var categories = parser.ParseCategory(document);
            var result     = new Dictionary <string, List <Category> >()
            {
                { settings.BurseName, categories }
            };

            OnNewCategory?.Invoke(this, result);
        }
コード例 #21
0
        public HtmlLoader(IParserSettings settings)
        {
            req           = new HttpRequest();
            req.UserAgent = Http.ChromeUserAgent();
            CookieDictionary cookie = new CookieDictionary();

            req.Cookies = cookie;
            if (settings.JsonCookies != null)
            {
                JObject j = JObject.Parse(settings.JsonCookies);
                foreach (var item in j)
                {
                    req.Cookies.Add(item.Key, item.Value.ToString());
                }
            }
            this.url = settings.BaseUrl;
        }
コード例 #22
0
        public async Task <HardwareItemRequest> ParseItem(IParserSettings settings, string type)
        {
            HardwareItemRequest item = null;

            try
            {
                item = await ParseProductItem(settings.BaseUrl);

                item.HardwareType = type;
            }
            catch (Exception ex)
            {
                logger.LogError($"{ex.Message} at the {settings.BaseUrl}");
            }

            return(item);
        }
コード例 #23
0
        private async Task <IEnumerable <string> > ParseProductId(IParserSettings settings)
        {
            var productsId = new List <string>();

            var parser = new ParserWorker <string[]>(new CitilinkParserId());

            parser.OnCompleted += (s, e) => { productsId.AddRange(e); };

            for (int i = settings.StartPoint; i <= settings.EndPoint; i++)
            {
                parser.Uri = $"{settings.BaseUrl}/?{settings.Prefix}={i}";

                await parser.Start();
            }

            return(productsId);
        }
コード例 #24
0
ファイル: RegExp.cs プロジェクト: xCMNx/BooruViewer.Net
 public IList<DataRecord> GetData(byte[] Data, string Host, IParserSettings Settings)
 {
     try
     {
         var text = System.Text.Encoding.UTF8.GetString(Data);
         var r = new List<DataRecord>();
         //*
         MatchCollection mc = Regex.Matches(text, ((RegExSettings)Settings).Expression);
         foreach (Match m in mc)
         {
             var pos = m.Groups["post"];
             var srv = m.Groups["server"];
             var md5 = m.Groups["md5"];
             var ext = m.Groups["ext"];
             var tagsM = m.Groups["tags"];
             var tags = tagsM == null ? string.Empty : tagsM.Value + " ";
             if (UseHtmlDecode)
                 tags = WebUtility.HtmlDecode(tags);
             var m2 = Regex.Match(tags, "Rating:(\\w)");
             var m3 = Regex.Match(tags, @"User:([\w]*)");
             if (!string.IsNullOrWhiteSpace(TagsFilterRegexp))
                 tags = Regex.Replace(tags, TagsFilterRegexp, string.Empty);
             if (md5 != null)
                 r.Add(new DataRecord()
                 {
                     MD5 = md5.Value,
                     Rating = m2.Success ? (DataRating)m2.Groups[1].Value.ToLower()[0] : (DataRating)'q',
                     Tags = tags.Trim().Split(' '),
                     Servers = new[]{ new DataServer(){
                         Post = pos == null ? 0 : Convert.ToInt32(pos.Value),
                         Server = Host,
                         subServers = (srv == null ? new string[0] : new[]{srv.Value.ToLower().Trim()}),
                         Autor = m3.Success ? m3.Groups[1].Value : string.Empty,
                         Ext = ext == null ? null : ext.Value.ToLower().Trim()
                     }}
                 });
         }
         //*/
         return r;
     }
     catch (Exception e)
     {
         throw new ParserException(e);
     }
 }
コード例 #25
0
        private async Task Worker(IParser parser, IParserSettings settings)
        {
            var loader = new HtmlLoader(settings);

            for (int i = settings.StartPoint; i <= settings.EndPoint; i++)
            {
                var source = await loader.GetSourceByPageId(i);

                var domParser = new HtmlParser();

                var document = await domParser.ParseDocumentAsync(source);

                var result = parser.ParseOrder(document);

                await OnNewOrderAsync?.Invoke(result);

                await Task.Delay(10000);
            }
        }
コード例 #26
0
        private static async Task DemoSimpleCrawler <T>(IParserSettings parserSettings, IParser <T> parser) where T : class
        {
            var config = new CrawlConfiguration
            {
                MaxPagesToCrawl = 20,                      //Only crawl 50 pages
                MinCrawlDelayPerDomainMilliSeconds = 1000, //Wait this many millisecs between requests
            };
            var crawler = new PoliteWebCrawler(config);

            crawler.PageCrawlCompleted += PageCrawlCompleted; // event

            //crawler.ShouldCrawlPageDecisionMaker = CrawlPage; // delegate

            crawler.CrawlBag.Parser   = parser;
            crawler.CrawlBag.Settings = parserSettings;


            var crawlResult = await crawler.CrawlAsync(new Uri(parserSettings.BaseUrl));
        }
コード例 #27
0
        public async Task <string> HtmlLoad(IParserSettings parserSettings)
        {
            if (parserSettings.StartDate > parserSettings.EndDate)
            {
                MessageBox.Show("Error 5");
                return(null);
            }
            string     result     = "";
            HttpClient httpClient = new HttpClient();

            for (DateTime curDate = parserSettings.StartDate; curDate <= parserSettings.EndDate; curDate = curDate.AddDays(1))
            {
                HttpResponseMessage responseMessage = await httpClient.GetAsync($"{parserSettings.BaseUri}{parserSettings.Category}/{curDate.ToString("yyyyMMdd")}");

                result += await responseMessage.Content.ReadAsStringAsync();
            }

            return(result);
        }
コード例 #28
0
        public IEnumerable <string> Parse(IDocument document, IParserSettings settings)
        {
            var items = document.QuerySelectorAll("a")
                        .OfType <IHtmlAnchorElement>()
                        .Where((IHtmlAnchorElement item) =>
            {
                string href = settings.BaseUrl + item.PathName;
                if (Uri.IsWellFormedUriString(href, UriKind.Absolute) &&
                    item.PathName.Contains("/") &&
                    (item.HostName.Equals(String.Empty) || item.Href.Contains(settings.BaseUrl)) &&
                    !item.PathName.Equals(String.Empty))
                {
                    return(true);
                }
                return(false);
            })
                        .Select((IHtmlAnchorElement item) => settings.BaseUrl + item.PathName);

            return(items);
        }
コード例 #29
0
 public PagesEnumerator(ILoader <T> htmlLoader, IParserSettings parserSettings) =>
 (_loader, _currentPos, _settings) = (htmlLoader, parserSettings.StartPage - 1, parserSettings);
コード例 #30
0
ファイル: OptionMap.cs プロジェクト: schallm/commandline
        public static OptionMap Create(
            object target,
            IList<Pair<PropertyInfo, VerbOptionAttribute>> verbs,
            IParserSettings settings)
        {
            var map = new OptionMap(verbs.Count, settings);

            foreach (var verb in verbs)
            {
                var optionInfo = new OptionInfo(verb.Right, verb.Left, settings.ParsingCulture)
                {
                    HasParameterLessCtor = verb.Left.PropertyType.GetConstructor(Type.EmptyTypes) != null
                };

                if (!optionInfo.HasParameterLessCtor && verb.Left.GetValue(target, null) == null)
                {
                    throw new ParserException("Type {0} must have a parameterless constructor or" +
                        " be already initialized to be used as a verb command.".FormatInvariant(verb.Left.PropertyType));
                }

                map[verb.Right.UniqueName] = optionInfo;
            }

            map.RawOptions = target;
            return map;
        }
コード例 #31
0
ファイル: OptionMap.cs プロジェクト: schallm/commandline
        public static OptionMap Create(object target, IParserSettings settings)
        {
            var list = ReflectionUtil.RetrievePropertyList<BaseOptionAttribute>(target);
            if (list == null)
            {
                return null;
            }

            var map = new OptionMap(list.Count, settings);

            foreach (var pair in list)
            {
                if (pair.Left != null && pair.Right != null)
                {
                    string uniqueName;
                    if (pair.Right.AutoLongName)
                    {
                        uniqueName = pair.Left.Name.ToLowerInvariant();
                        pair.Right.LongName = uniqueName;
                    }
                    else
                    {
                        uniqueName = pair.Right.UniqueName;
                    }

                    map[uniqueName] = new OptionInfo(pair.Right, pair.Left, settings.ParsingCulture);
                }
            }

            map.RawOptions = target;
            return map;
        }
コード例 #32
0
        public MarkupGrammar(IParserSettings settings)
        {
            var Apos = Ch('\'');
            var Quot = Ch('\"');
            var Lt   = Ch('<');
            var Gt   = Ch('>');


            //var CombiningChar = Ch('*');
            //var Extener = Ch('*');

            //[4]       NameChar	   ::=       Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender
            var NameChar = Ch(char.IsLetterOrDigit).Or(Ch('.', '-', '_', ':')) /*.Or(CombiningChar).Or(Extener)*/;

            //[5]       Name	   ::=      (Letter | '_' | ':') (NameChar)*
            var Name =
                Ch(char.IsLetter).Or(Ch('_', ':')).And(Rep(NameChar))
                .Build(hit => hit.Left + new string(hit.Down.ToArray()));

            //[7]       Nmtoken	   ::=      (NameChar)+
            var NmToken =
                Rep1(NameChar)
                .Build(hit => new string(hit.ToArray()));

            //[3]       S	   ::=      (#x20 | #x9 | #xD | #xA)+
            Whitespace = Rep1(Ch(char.IsWhiteSpace));

            //[25]      Eq	   ::=       S? '=' S?
            var Eq = Opt(Whitespace).And(Ch('=')).And(Opt(Whitespace));



            var paintedStatement1 = Statement1.Build(hit => new StatementNode(hit)).Paint <StatementNode, Node>();

            var statementMarker = string.IsNullOrEmpty(settings.StatementMarker) ? "#" : settings.StatementMarker;

            // Syntax 1: '\r'? ('\n' | '\u0002') S? '#' (statement ^('\r' | '\n' | '\u0003') )
            var StatementNode1 = Opt(Ch('\r')).And(Ch('\n').Or(ChSTX())).And(Rep(Ch(' ', '\t'))).And(TkCode(Ch(statementMarker))).And(paintedStatement1).IfNext(Ch('\r', '\n').Or(ChETX()))
                                 .Build(hit => hit.Down);


            var paintedStatement2 = Statement2.Build(hit => new StatementNode(hit)).Paint <StatementNode, Node>();

            // Syntax 2: '<%' (statement ^'%>')  '%>'
            var StatementNode2 = TkAspxCode(Ch("<%")).NotNext(Ch('=')).And(paintedStatement2).And(TkAspxCode(Ch("%>")))
                                 .Build(hit => hit.Left.Down);

            Statement = StatementNode1.Or(StatementNode2);



            // Syntax 1: ${csharp_expression}
            var Code1 = TkCode(Ch("${")).And(Expression).And(TkCode(Ch('}')))
                        .Build(hit => new ExpressionNode(hit.Left.Down)
            {
                AutomaticEncoding = settings.AutomaticEncoding
            });

            // Syntax 3: <%=csharp_expression%>;
            var Code3 = TkAspxCode(Ch("<%")).And(TkAttDelim(Ch('='))).And(Expression).And(TkAspxCode(Ch("%>")))
                        .Build(hit => new ExpressionNode(hit.Left.Down));

            // Syntax 4: $!{csharp_expression}
            var Code4 = TkCode(Ch("$!{")).And(Expression).And(TkCode(Ch('}')))
                        .Build(hit => new ExpressionNode(hit.Left.Down)
            {
                SilentNulls = true, AutomaticEncoding = settings.AutomaticEncoding
            });

            // Syntax 5: !{sharp_expression}
            var Code5 = TkCode(Ch("!{")).And(Expression).And(TkCode(Ch('}')))
                        .Build(hit => new ExpressionNode(hit.Left.Down));

            Code = Code1.Or(Code3).Or(Code4).Or(Code5);

            var Condition = TkCode(Ch("?{")).And(Expression).And(TkCode(Ch('}')))
                            .Build(hit => new ConditionNode(hit.Left.Down));

            var LessThanTextNode = Ch('<')
                                   .Build(hit => (Node) new TextNode("<"));

            //[68]      EntityRef	   ::=      '&' Name ';'
            EntityRef =
                TkEntity(Ch('&').And(Name).And(Ch(';')))
                .Build(hit => new EntityNode(hit.Left.Down));

            var EntityRefOrAmpersand = AsNode(EntityRef).Or(Ch('&').Build(hit => (Node) new TextNode("&")));

            //[10]      AttValue	   ::=      '"' ([^<&"] | Reference)* '"' |  "'" ([^<&'] | Reference)* "'"
            var AttValueSingleText = TkAttVal(Rep1(ChNot('<', '&', '\'').Unless(Code).Unless(Condition))).Build(hit => new TextNode(hit));
            var AttValueSingle     = TkAttQuo(Apos).And(Rep(AsNode(AttValueSingleText).Or(EntityRefOrAmpersand).Or(AsNode(Code)).Or(AsNode(Condition)).Or(LessThanTextNode).Paint())).And(TkAttQuo(Apos));
            var AttValueDoubleText = TkAttVal(Rep1(ChNot('<', '&', '\"').Unless(Code).Unless(Condition))).Build(hit => new TextNode(hit));
            var AttValueDouble     = TkAttQuo(Quot).And(Rep(AsNode(AttValueDoubleText).Or(EntityRefOrAmpersand).Or(AsNode(Code)).Or(AsNode(Condition)).Or(LessThanTextNode).Paint())).And(TkAttQuo(Quot));
            var AttValue           = AttValueSingle.Or(AttValueDouble).Left().Down();


            //[41]      Attribute	   ::=       Name  Eq  AttValue
            Attribute =
                TkAttNam(Name).And(TkAttDelim(Eq)).And(AttValue)
                .Build(hit => new AttributeNode(hit.Left.Left, hit.Down)).Paint <AttributeNode, Node>();


            //[40]      STag	   ::=      '<' Name (S  Attribute)* S? '>'
            //[44]      EmptyElemTag	   ::=      '<' Name (S  Attribute)* S? '/>'
            Element =
                Opt(Ch("\r\n").Or(Ch("\n")).And(StringOf(Ch(char.IsWhiteSpace).Unless(Ch('\r', '\n'))))).And(TkTagDelim(Lt)).And(TkEleNam(Name)).And(Rep(Whitespace.And(Attribute).Down())).And(Opt(Whitespace)).And(Opt(TkTagDelim(Ch('/')))).And(TkTagDelim(Gt))
                .Build(hit => new ElementNode(
                           hit.Left.Left.Left.Left.Down,
                           hit.Left.Left.Left.Down,
                           hit.Left.Down != default(char),
                           hit.Left.Left.Left.Left.Left.Left == null ? string.Empty : hit.Left.Left.Left.Left.Left.Left.Left + hit.Left.Left.Left.Left.Left.Left.Down));

            //[42]      ETag	   ::=      '</' Name  S? '>'
            EndElement =
                Opt(Ch("\r\n").Or(Ch("\n")).And(StringOf(Ch(char.IsWhiteSpace).Unless(Ch('\r', '\n'))))).And(TkTagDelim(Lt.And(Ch('/')))).And(TkEleNam(Name)).And(Opt(Whitespace)).And(TkTagDelim(Gt))
                .Build(hit => new EndElementNode(hit.Left.Left.Down, hit.Left.Left.Left.Left == null ? string.Empty : hit.Left.Left.Left.Left.Left + hit.Left.Left.Left.Left.Down));

            Text =
                Rep1(ChNot('&', '<').Unless(Statement).Unless(Code).Unless(Element).Unless(EndElement))
                .Build(hit => new TextNode(hit));

            //[15]      Comment	   ::=      '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
            Comment =
                TkComm(Ch("<!--").And(Rep(ChNot('-').Or(Ch('-').IfNext(ChNot('-'))))).And(Ch("-->")))
                .Build(hit => new CommentNode(hit.Left.Down));

            //[11]      SystemLiteral	   ::=      ('"' [^"]* '"') | ("'" [^']* "'")
            var SystemLiteral =
                Quot.And(Rep(ChNot('\"'))).And(Quot).Or(Apos.And(Rep(ChNot('\''))).And(Apos))
                .Build(hit => new string(hit.Left.Down.ToArray()));

            //[13]      PubidChar	   ::=      #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
            var PubidChar1 = Ch(char.IsLetterOrDigit).Or(Ch(" \r\n-()+,./:=?;!*#@$_%".ToArray()));
            var PubidChar2 = PubidChar1.Or(Apos);

            //[12]      PubidLiteral	   ::=      '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
            var PubidLiteral =
                Quot.And(Rep(PubidChar2)).And(Quot).Or(Apos.And(Rep(PubidChar1)).And(Apos))
                .Build(hit => new string(hit.Left.Down.ToArray()));

            //[75]      ExternalID	   ::=      'SYSTEM' S  SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
            var ExternalIDSystem =
                Ch("SYSTEM").And(Whitespace).And(SystemLiteral)
                .Build(hit => new ExternalIdInfo
            {
                ExternalIdType = hit.Left.Left,
                SystemId       = hit.Down
            });
            var ExternalIDPublic =
                Ch("PUBLIC").And(Whitespace).And(PubidLiteral).And(Whitespace).And(SystemLiteral)
                .Build(hit => new ExternalIdInfo
            {
                ExternalIdType = hit.Left.Left.Left.Left,
                PublicId       = hit.Left.Left.Down,
                SystemId       = hit.Down
            });
            var ExternalID = ExternalIDSystem.Or(ExternalIDPublic);

            //[28]      doctypedecl	   ::=      '<!DOCTYPE' S  Name (S  ExternalID)? S? ('[' intSubset ']' S?)? '>'
            DoctypeDecl = Ch("<!DOCTYPE").And(Whitespace).And(Name).And(Opt(Whitespace.And(ExternalID).Down())).And(Opt(Whitespace)).And(Ch('>'))
                          .Build(hit => new DoctypeNode {
                Name = hit.Left.Left.Left.Down, ExternalId = hit.Left.Left.Down
            });

            //[26]      VersionNum	   ::=      '1.0'
            var VersionNum = Ch("1.0");

            //[24]      VersionInfo	   ::=       S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
            var VersionInfo = Whitespace.And(Ch("version")).And(Eq).And(
                Apos.And(VersionNum).And(Apos).Or(Quot.And(VersionNum).And(Quot)));

            //[81]      EncName	   ::=      [A-Za-z] ([A-Za-z0-9._] | '-')*
            var EncName = Ch(char.IsLetter).And(Rep(Ch(char.IsLetterOrDigit).Or(Ch('.', '_', '-'))))
                          .Build(hit => hit.Left + new string(hit.Down.ToArray()));

            //[80]      EncodingDecl	   ::=       S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
            var EncodingDecl = Whitespace.And(Ch("encoding")).And(Eq).And(
                Apos.And(EncName).And(Apos).Or(Quot.And(EncName).And(Quot)))
                               .Build(hit => hit.Down.Left.Down);

            //[32]      SDDecl	   ::=       S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
            var SSDecl = Whitespace.And(Ch("standalone")).And(Eq).And(
                Apos.And(Ch("yes").Or(Ch("no"))).And(Apos).Or(Quot.And(Ch("yes").Or(Ch("no"))).And(Quot)))
                         .Build(hit => hit.Down.Left.Down);

            //[23]      XMLDecl	   ::=      '<?xml' VersionInfo  EncodingDecl? SDDecl? S? '?>'
            XMLDecl =
                Ch("<?xml").And(VersionInfo).And(Opt(EncodingDecl)).And(Opt(SSDecl)).And(Opt(Whitespace)).And(Ch("?>"))
                .Build(hit => new XMLDeclNode {
                Encoding = hit.Left.Left.Left.Down, Standalone = hit.Left.Left.Down
            });

            //[17]      PITarget	   ::=      Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
            var PITarget = Name.Unless(Ch('X', 'x').And(Ch('M', 'm')).And(Ch('L', 'l')));

            //[16]      PI	   ::=      '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
            ProcessingInstruction = Ch("<?").And(PITarget).And(Opt(Whitespace)).And(Rep(Ch(ch => true).Unless(Ch("?>")))).And(Ch("?>"))
                                    .Build(hit => new ProcessingInstructionNode {
                Name = hit.Left.Left.Left.Down, Body = new string(hit.Left.Down.ToArray())
            });


            AnyNode = AsNode(Element).Paint()
                      .Or(AsNode(EndElement).Paint())
                      .Or(AsNode(Text).Paint())
                      .Or(EntityRefOrAmpersand.Paint())
                      .Or(AsNode(Statement))
                      .Or(AsNode(Code).Paint())
                      .Or(AsNode(DoctypeDecl).Paint())
                      .Or(AsNode(Comment).Paint())
                      .Or(AsNode(XMLDecl).Paint())
                      .Or(AsNode(ProcessingInstruction).Paint())
                      .Or(AsNode(LessThanTextNode).Paint());

            Nodes = Rep(AnyNode);
        }
コード例 #33
0
ファイル: DefaultSyntaxProvider.cs プロジェクト: emiaj/spark
 public DefaultSyntaxProvider(IParserSettings settings)
 {
     _grammar = new MarkupGrammar(settings);
 }
コード例 #34
0
ファイル: ParserWorker.cs プロジェクト: Marti22/HTMLParser
 public ParserWorker(IParser <T> parser, IParserSettings parserSettings) : this(parser)
 {
     this.parserSettings = parserSettings;
 }
コード例 #35
0
ファイル: OxygeneSyntaxProvider.cs プロジェクト: mosh/spark
 public OxygeneSyntaxProvider(IParserSettings settings)
 {
     _grammar = new OxygeneMarkupGrammar(settings);
 }
コード例 #36
0
        readonly string url;        //сюда будем передовать адрес.

        public HtmlLoader(IParserSettings settings)
        {
            client = new HttpClient();
            client.DefaultRequestHeaders.Add("User-Agent", "C# App"); //Это для индентификации на сайте-жертве.
            url = $"{settings.BaseUrl}/{settings.Postfix}/";          //Здесь собирается адресная строка
        }
コード例 #37
0
 public ParserWorker(IParser <T> parser, IParserSettings settings) : this(parser)
 {
     Settings = settings;
 }
コード例 #38
0
 public void SetSettings(IParserSettings settings)
 {
     _settings   = settings;
     _enumerator = new PagesEnumerator <T>(_loader, _settings);
 }
コード例 #39
0
ファイル: PocoGenerator.tt.cs プロジェクト: LosManos/St4mpede
		internal void UT_Init(CoreSettings coreSettings, IParserSettings rdbSchemaSettings, XElement settingsElement)
		{
			Init(coreSettings, rdbSchemaSettings, settingsElement);
		}
コード例 #40
0
ファイル: PocoGenerator.tt.cs プロジェクト: LosManos/St4mpede
		private void Init(CoreSettings settings, IParserSettings rdbSchemaSettings, XElement doc)
		{
			_coreSettings = settings;
			_rdbSchemaSettings = rdbSchemaSettings;
			_pocoSettings = new PocoSettings(
				bool.Parse(doc.Descendants(MakePartialElement).Single().Value),

				doc.Descendants(NameSpaceElement).Single().Attributes(NameSpaceNameAttribute).Single().Value,
				
				doc.Descendants(NameSpaceElement).Single().Descendants(NameSpaceCommentsElement).Single().Descendants(NameSpaceCommentElement).Select(e=>e.Value).ToList(),

				bool.Parse(doc.Descendants(ConstructorsElement).Single().Descendants(ConstructorsDefaultElement).Single().Value),
				bool.Parse(doc.Descendants(ConstructorsElement).Single().Descendants(ConstructorsAllPropertiesElement).Single().Value),
				bool.Parse(doc.Descendants(ConstructorsElement).Single().Descendants(ConstructorsAllPropertiesSansPrimaryKeyElement).Single().Value),
				bool.Parse(doc.Descendants(ConstructorsElement).Single().Descendants(ConstructorCopy).Single().Value),

				bool.Parse(doc.Descendants(MethodsElement).Single().Descendants(MethodsEqualsElement).Single().Value),
				doc.Descendants(MethodsElement).Single().Descendants(MethodsEqualsElement).Single().Attributes(MethodsEqualsRegexAttribute).Single().Value,

				doc.Descendants(OutputFolderElement).Single().Value,
				doc.Descendants(ProjectPathElement).Single().Value,
				doc.Descendants(XmlOutputFilenameElement).Single().Value
			);
		}
コード例 #41
0
ファイル: HtmlLoader.cs プロジェクト: Jensen-kaz/C-ParserHTML
 public HtmlLoader(IParserSettings settings)
 {
     client = new HttpClient();
     url    = $"{settings.BaseUrl}/{settings.Prefix}/";
 }
コード例 #42
0
		private void Init(CoreSettings settings, IParserSettings rdbSchemaSettings, XElement doc)
		{
			_coreSettings = settings;
			_rdbSchemaSettings = rdbSchemaSettings;

			var outputFolder =
				doc
					.Descendants()
					.Single(e => e.Name == OutputFolderElement)
					.Value;
			_log.Add("OutputFolder={0}.", outputFolder);

			var projectPath =
				doc
					.Descendants()
					.Single(e => e.Name == ProjectPathElement)
					.Value;
			_log.Add("ProjectPath={0}.", projectPath);

			var xmlOutputFilename =
				doc
					.Descendants()
					.Single(e => e.Name == XmlOutputFilenameElement)
					.Value;
			_log.Add("XmlOutputFilename={0}.", xmlOutputFilename);

			_surfaceSettings = new SurfaceSettings(
				outputFolder, 
				projectPath, 
				xmlOutputFilename
			);
		}