public IList<DataRecord> GetData(byte[] Data, string Host, IParserSettings Settings) { try { var text = System.Text.Encoding.UTF8.GetString(Data); var r = new List<DataRecord>(); var x = XmlToDynamic.Parse(text); if (x != null) foreach (var o in x.post) if (o.md5 != null) r.Add(new DataRecord() { MD5 = o.md5, Tags = o.tags.Split(' '), Rating = o.rating != null ? (DataRating)o.rating[0] : DataRating.Questionable, Servers = new[]{ new DataServer(){ Post = intParseOrDefault(o.id, -1), Server = Host, Size = intParseOrDefault(o.file_size, -1), ParentPost = intParseOrDefault(o.parent_id, -1), Autor = o.author, Ext = Path.GetExtension(o.file_url), }} }); return r; } catch (Exception e) { throw new ParserException(e); } }
public HtmlLoader(IParserSettings settings) { client = new HttpClient(); //построение ссылки url = $"{settings.BaseUrl}{settings.Prefix}/"; }
public HtmlLoader(IParserSettings settings) { client = new HttpClient(); //Building an url string with BaseUrl+Prefix url = $"{settings.BaseUrl}/{settings.Prefix}"; }
ICanSpecifyFormatter ICanSpecifyParser.WithParser <T>( IParserSettings <T> settings) { _collection.AddSingleton <IParser, T>(); _collection.Add(new ServiceDescriptor(settings.GetType(), settings)); return(new CanSpecifyFormatter(_collection)); }
private static void PageCrawlCompleted(object sender, PageCrawlCompletedArgs e) { if (e.CrawledPage.HttpRequestException != null) { return; } IParserSettings settings = e.CrawlContext.CrawlBag.Settings; if (!settings.IsPageParseAllowed(e.CrawledPage)) { return; } IParser <NewsData> parser = e.CrawlContext.CrawlBag.Parser; var news = parser.Parse(e.CrawledPage.AngleSharpHtmlDocument, e.CrawledPage.Uri.AbsoluteUri); if (news == null) // something went wrong { return; } news_list.Add(news); var splitted = TextProcessingHelper.TextSplittingAndRemovingSymbols(news.Text); TextProcessingHelper.CountFrequentWords(ref words_dictionary, splitted); Console.WriteLine(e.CrawledPage.Uri); Console.WriteLine("================================="); }
public EkSiteParser(IMainPageParser mainPageParser, IDetailsPageParser detailsPageParser, IParserSettings parserSettings, IHttpGetter httpGetter) { _mainPageParser = mainPageParser; _detailsPageParser = detailsPageParser; _httpGetter = httpGetter; _parserSettings = parserSettings; }
public IList<DataRecord> GetData(byte[] Data, string Host, IParserSettings Settings) { try { var text = System.Text.Encoding.UTF8.GetString(Data); var tkn = Newtonsoft.Json.Linq.JToken.Parse(text); return tkn.Select(r => new DataRecord() { MD5 = (string)r["md5"], Rating = (DataRating)((string)r["rating"])[0], Tags = ((string)r["tags"])?.Split(' ').ToArray(), Servers = new[]{ new DataServer() { Post = IntOrDefault(r["id"]), Server = Host, subServers = new string[] { new Uri((string)r["file_url"]).Authority }, Size = IntOrDefault(r["file_size"]), ParentPost = IntOrDefault(r["parent_id"]), Autor = (string)r["author"], Ext = Path.GetExtension((string)r["file_url"]) } } }).ToArray(); } catch (Exception e) { throw new ParserException(e); } }
public Worker(IEkSiteParser ekSiteParser, IVkPostCreator vkPostCreator, IParserSettings parserSettings) { _ekSiteParser = ekSiteParser; _vkPostCreator = vkPostCreator; _parserSettings = parserSettings; _logger = LogManager.GetLogger(GetType().Name); }
public async Task <IEnumerable <HardwareItemRequest> > ParseItems(IParserSettings settings, string type) { var items = new List <HardwareItemRequest>(); var productsId = await ParseProductId(settings); foreach (var id in productsId) { await Task.Delay(10000); /// lazy way to avoid captcha try { var item = await ParseProductItem($"{settings.BaseUrl}/{id}"); item.HardwareType = type; items.Add(item); } catch (HttpRequestException ex) { logger.LogError($"{ex.Message}: {items.Count}"); break; } catch (Exception ex) { logger.LogError($"{ex.Message} at the {settings.BaseUrl}/{id}"); continue; } } return(items); }
public ParserWorker(IParser <T> parser, IParserSettings parserSettings) { _logger = (new LoggerFactory()).GetLogger(); _parser = parser; _parserSettings = parserSettings; _loader = new HtmlLoader(_parserSettings.BaseUrl); }
public Loader(IParserSettings settings) { client = new HttpClient(); url = $"{settings.EngineUrl}" + $"&url={settings.ImageUrl}"; //image_url //$"&start={{CountPage}}"; }
public HtmlLoader(IParserSettings settings) { _client = new HttpClient(); // Индентификации на сайте, который парсится. _client.DefaultRequestHeaders.Add("User", "HtmlParser"); _inputFilePath = settings.InputFilePath; _outputFilePath = settings.OutputFilePath; }
public HtmlLoader(IParserSettings settings) { client = new HttpClient(); client.DefaultRequestHeaders.TryAddWithoutValidation("Accept", "text/html,application/xhtml+xml,application/xml"); client.DefaultRequestHeaders.TryAddWithoutValidation("Accept-Encoding", "gzip, deflate"); client.DefaultRequestHeaders.TryAddWithoutValidation("User-Agent", "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:19.0) Gecko/20100101 Firefox/19.0"); client.DefaultRequestHeaders.TryAddWithoutValidation("Accept-Charset", "ISO-8859-1"); url = $"{settings.BaseUrl}/{settings.CategoryName}/{settings.GuideName}/{settings.PartNumber}.{settings.ArticleNumber}.php"; }
private void UpdateSettings(string companyName) { _settings = new VacancyParserSettings { BaseUrl = $"https://api.hh.ru/vacancies?area=16&search_field=company_name&text={companyName}&per_page=100", Pagination = "&page=", StartPage = 0, EndPage = 7 }; }
/// <summary> /// Добавить биржу. /// </summary> /// <param name="parser"> Биржа. </param> /// <param name="settings"> Настройки биржи. </param> public void AddParser(IParser parser, IParserSettings settings) { try { parsers.Add(parser, settings); } catch (ArgumentException) { throw new ArgumentException("Такая биржа уже добавлена."); } }
public HtmlLoader(IParserSettings setting, int category) { Client = new HttpClient(); if (category == 21) { URL = $"{setting.BaseURL.Replace("{Category}", category.ToString()).Replace("%D0%B1%D1%83%D0%BA%D0%B2", "буквы")}/{setting.Prefix}/"; } else { URL = $"{setting.BaseURL.Replace("{Category}", category.ToString())}/{setting.Prefix}/"; } }
internal void Init(string hostTemplateFile, string configFilename) { if (null == hostTemplateFile) { throw new ArgumentNullException("hostTemplateFile"); } var configPath = Path.GetDirectoryName(hostTemplateFile); configFilename = configFilename ?? Core.DefaultConfigFilename; var doc = Core.ReadConfig( configPath, configFilename); _coreSettings = Core.Init(doc); _settings = ParserSettings.Init(configPath, configFilename, doc); }
/// <summary> /// Initializes a new instance of the <see cref="OptionMap"/> class. /// It is internal rather than private for unit testing purpose. /// </summary> /// <param name="capacity">Initial internal capacity.</param> /// <param name="settings">Parser settings instance.</param> internal OptionMap(int capacity, IParserSettings settings) { this.settings = settings; IEqualityComparer<string> comparer = this.settings.CaseSensitive ? StringComparer.Ordinal : StringComparer.OrdinalIgnoreCase; this.names = new Dictionary<string, string>(capacity, comparer); this.map = new Dictionary<string, OptionInfo>(capacity * 2, comparer); if (this.settings.MutuallyExclusive) { this.mutuallyExclusiveSetMap = new Dictionary<string, MutuallyExclusiveInfo>(capacity, StringComparer.OrdinalIgnoreCase); } }
public OffsetGrammar(IParserSettings settings) : base(settings) { ParseAction <Chain <char, char> > action = Grammar.Opt <char>(CharGrammar.Ch('\r')).And <char, char>(CharGrammar.Ch('\n').Or <char>(CharGrammar.ChSTX()).Or <char>(CharGrammar.ChETX())); ParseAction <Chain <char, char> > action2 = Grammar.Opt <char>(CharGrammar.Ch('\r')).And <char, char>(CharGrammar.Ch('\n').Or <char>(CharGrammar.ChSTX())); ParseAction <Chain <char, char> > cond = Grammar.Opt <char>(CharGrammar.Ch('\r')).And <char, char>(CharGrammar.Ch('\n').Or <char>(CharGrammar.ChETX())); ParseAction <IndentationNode> action4 = action2.And <Chain <char, char>, IList <char> >(Grammar.Rep <char>(CharGrammar.Ch(new char[] { ' ', '\t' }))).NotNext <Chain <Chain <char, char>, IList <char> >, Chain <char, char> >(cond).Build <Chain <Chain <char, char>, IList <char> >, IndentationNode>(hit => new IndentationNode(hit.Down)); Grammar.Rep1 <char>(CharGrammar.Ch(new char[] { ' ', '\t' })); ParseAction <IList <char> > action5 = Grammar.Rep <char>(CharGrammar.Ch(new char[] { ' ', '\t' })); ParseAction <Node[]> action6 = action2.And <Chain <char, char>, IList <char> >(Grammar.Rep <char>(CharGrammar.Ch(new char[] { ' ', '\t' }))).IfNext <Chain <Chain <char, char>, IList <char> >, Chain <char, char> >(cond).Build <Chain <Chain <char, char>, IList <char> >, Node[]>(hit => new Node[0]); ParseAction <char> parse = CharGrammar.Ch(new Func <char, bool>(char.IsLetterOrDigit)).Or <char>(CharGrammar.Ch(new char[] { '-', '_', ':' })); ParseAction <string> action8 = CharGrammar.Ch(new Func <char, bool>(char.IsLetter)).Or <char>(CharGrammar.Ch(new char[] { '_', ':' })).And <char, IList <char> >(Grammar.Rep <char>(parse)).Build <Chain <char, IList <char> >, string>(hit => hit.Left + new string(hit.Down.ToArray <char>())); ParseAction <TextNode> action9 = CharGrammar.Ch('|').And <char, IList <char> >(Grammar.Rep <char>(CharGrammar.Ch((Func <char, bool>)(_ => true)).Unless <char, Chain <char, char> >(action))).Build <Chain <char, IList <char> >, TextNode>(hit => new TextNode(hit.Down)); ParseAction <Node> action10 = base.AsNode <EntityNode>(base.EntityRef).Or <Node>(base.AsNode <ExpressionNode>(base.Code)); ParseAction <TextNode> parser = Grammar.Rep1 <char>(CharGrammar.Ch((Func <char, bool>)(ch => true)).Unless <char, Chain <char, char> >(cond).Unless <char, Node>(action10)).Build <IList <char>, TextNode>(hit => new TextNode(hit)); ParseAction <IList <Node> > action12 = CharGrammar.Ch('|').And <char, IList <Node> >(Grammar.Rep <Node>(base.AsNode <TextNode>(parser).Or <Node>(action10))).Build <Chain <char, IList <Node> >, IList <Node> >(hit => hit.Down); ParseAction <ExpressionNode> action13 = CharGrammar.Ch('=').And <char, Snippets>(base.LimitedExpression(cond.Build <Chain <char, char>, string>(x => ""))).Build <Chain <char, Snippets>, ExpressionNode>(hit => new ExpressionNode(hit.Down) { AutomaticEncoding = true }); ParseAction <StatementNode> action14 = CharGrammar.Ch('-').And <char, IList <Snippet> >(base.Statement1).Build <Chain <char, IList <Snippet> >, StatementNode>(hit => new StatementNode(hit.Down)); ParseAction <StatementNode> action15 = CharGrammar.Ch("@{").And <string, Snippets>(base.LimitedExpression(CharGrammar.Ch("}"))).And <Chain <string, Snippets>, char>(CharGrammar.Ch('}')).Build <Chain <Chain <string, Snippets>, char>, StatementNode>(hit => new StatementNode(hit.Left.Down)); ParseAction <StatementNode> action16 = action14.Or <StatementNode>(action15); ParseAction <string> action17 = CharGrammar.Ch('#').And <char, IList <char> >(Grammar.Rep <char>(CharGrammar.Ch(new Func <char, bool>(char.IsLetterOrDigit)).Or <char>(CharGrammar.Ch(new char[] { '-', '_' })))).Skip <Chain <char, IList <char> >, IList <char> >(action5).Build <Chain <char, IList <char> >, string>(hit => new string(hit.Down.ToArray <char>())); ParseAction <string> action18 = CharGrammar.Ch('.').And <char, IList <char> >(Grammar.Rep <char>(CharGrammar.Ch(new Func <char, bool>(char.IsLetterOrDigit)).Or <char>(CharGrammar.Ch(new char[] { '-', '_' })))).Skip <Chain <char, IList <char> >, IList <char> >(action5).Build <Chain <char, IList <char> >, string>(hit => new string(hit.Down.ToArray <char>())); var action19 = Grammar.Rep <string>(action18).And <IList <string>, string>(Grammar.Opt <string>(action17)).And <Chain <IList <string>, string>, IList <string> >(Grammar.Rep <string>(action18)).Build(hit => new { id = hit.Left.Down, classes = hit.Left.Left.Concat <string>(hit.Down) }); var action20 = Grammar.Rep <string>(action18).And <IList <string>, string>(action17).And <Chain <IList <string>, string>, IList <string> >(Grammar.Rep <string>(action18)).Or <Chain <Chain <IList <string>, string>, IList <string> > >(Grammar.Rep1 <string>(action18).And <IList <string>, string>(Grammar.Opt <string>(action17)).And <Chain <IList <string>, string>, IList <string> >(Grammar.Rep <string>(action18))).Or <Chain <Chain <IList <string>, string>, IList <string> > >(Grammar.Rep <string>(action18).And <IList <string>, string>(Grammar.Opt <string>(action17)).And <Chain <IList <string>, string>, IList <string> >(Grammar.Rep1 <string>(action18))).Build(hit => new { id = hit.Left.Down, classes = hit.Left.Left.Concat <string>(hit.Down) }); ParseAction <ElementNode> action22 = action8.Skip <string, IList <char> >(action5).And(action19).Or(Grammar.Opt <string>(action8).And(action20)).Build(hit => new { name = hit.Left ?? "div", attrs = ((hit.Down.id != null) ? new AttributeNode[] { new AttributeNode("id", hit.Down.id) } : new AttributeNode[0]).Concat <AttributeNode>(hit.Down.classes.Any <string>() ? new AttributeNode[] { new AttributeNode("class", string.Join(" ", hit.Down.classes.ToArray <string>())) } : new AttributeNode[0]) }).And(Grammar.Rep <AttributeNode>(base.Attribute.Skip <AttributeNode, IList <char> >(action5))).Build(hit => new ElementNode(hit.Left.name, hit.Left.attrs.Concat <AttributeNode>(hit.Down).ToList <AttributeNode>(), false)); ParseAction <IList <Node> > action23 = action12.Or <IList <Node> >(action13.Build <ExpressionNode, IList <Node> >(hit => ((IList <Node>) new Node[] { hit }))).Or <IList <Node> >(action16.Build <StatementNode, IList <Node> >(hit => (IList <Node>) new Node[] { hit })); ParseAction <Chain <ElementNode, IList <Node> > > action24 = action22.Skip <ElementNode, IList <char> >(action5).And <ElementNode, IList <Node> >(Grammar.Opt <IList <Node> >(action23)); ParseAction <Node[]> action25 = action4.And <IndentationNode, Chain <ElementNode, IList <Node> > >(action24).Build <Chain <IndentationNode, Chain <ElementNode, IList <Node> > >, Node[]>(hit => new Node[] { hit.Left, hit.Down.Left }.Concat <Node>((hit.Down.Down ?? ((IList <Node>) new Node[0]))).ToArray <Node>()); ParseAction <Node[]> action26 = action4.And <IndentationNode, IList <Node> >(action12).Build <Chain <IndentationNode, IList <Node> >, Node[]>(hit => new Node[] { hit.Left }.Concat <Node>(hit.Down).ToArray <Node>()); ParseAction <Node[]> action27 = action4.And <IndentationNode, ExpressionNode>(action13).Build <Chain <IndentationNode, ExpressionNode>, Node[]>(hit => new Node[] { hit.Left, hit.Down }); ParseAction <Node[]> action28 = action4.And <IndentationNode, StatementNode>(action16).Build <Chain <IndentationNode, StatementNode>, Node[]>(hit => new Node[] { hit.Left, hit.Down }); ParseAction <Node[]> action29 = action24.Build <Chain <ElementNode, IList <Node> >, Node[]>(hit => new Node[] { hit.Left }.Concat <Node>((hit.Down ?? ((IList <Node>) new Node[0]))).ToArray <Node>()); ParseAction <Node[]> action30 = action6.Or <Node[]>(action25).Or <Node[]>(action26).Or <Node[]>(action27).Or <Node[]>(action28).Or <Node[]>(action29).Skip <Node[], IList <char> >(action5); ParseAction <IList <Node[]> > action31 = Grammar.Rep <Node[]>(action30); this.Indentation = action4; this.TestLine = action30; this.OffsetElement = action22; this.OffsetText = action9; this.OffsetTexts = action12; this.OffsetExpression = action13; this.OffsetStatement = action16; this.OffsetNodes = action31.Build <IList <Node[]>, IList <Node> >(hit => (from nodes in hit select from node in nodes where node != null select node).ToList <Node>()); }
private async Task GetCategory(IParser parser, IParserSettings settings) { var loader = new HtmlLoader(settings); var source = await loader.GetSourceByMainPage(); var domParser = new HtmlParser(); var document = await domParser.ParseDocumentAsync(source); var categories = parser.ParseCategory(document); var result = new Dictionary <string, List <Category> >() { { settings.BurseName, categories } }; OnNewCategory?.Invoke(this, result); }
public HtmlLoader(IParserSettings settings) { req = new HttpRequest(); req.UserAgent = Http.ChromeUserAgent(); CookieDictionary cookie = new CookieDictionary(); req.Cookies = cookie; if (settings.JsonCookies != null) { JObject j = JObject.Parse(settings.JsonCookies); foreach (var item in j) { req.Cookies.Add(item.Key, item.Value.ToString()); } } this.url = settings.BaseUrl; }
public async Task <HardwareItemRequest> ParseItem(IParserSettings settings, string type) { HardwareItemRequest item = null; try { item = await ParseProductItem(settings.BaseUrl); item.HardwareType = type; } catch (Exception ex) { logger.LogError($"{ex.Message} at the {settings.BaseUrl}"); } return(item); }
private async Task <IEnumerable <string> > ParseProductId(IParserSettings settings) { var productsId = new List <string>(); var parser = new ParserWorker <string[]>(new CitilinkParserId()); parser.OnCompleted += (s, e) => { productsId.AddRange(e); }; for (int i = settings.StartPoint; i <= settings.EndPoint; i++) { parser.Uri = $"{settings.BaseUrl}/?{settings.Prefix}={i}"; await parser.Start(); } return(productsId); }
public IList<DataRecord> GetData(byte[] Data, string Host, IParserSettings Settings) { try { var text = System.Text.Encoding.UTF8.GetString(Data); var r = new List<DataRecord>(); //* MatchCollection mc = Regex.Matches(text, ((RegExSettings)Settings).Expression); foreach (Match m in mc) { var pos = m.Groups["post"]; var srv = m.Groups["server"]; var md5 = m.Groups["md5"]; var ext = m.Groups["ext"]; var tagsM = m.Groups["tags"]; var tags = tagsM == null ? string.Empty : tagsM.Value + " "; if (UseHtmlDecode) tags = WebUtility.HtmlDecode(tags); var m2 = Regex.Match(tags, "Rating:(\\w)"); var m3 = Regex.Match(tags, @"User:([\w]*)"); if (!string.IsNullOrWhiteSpace(TagsFilterRegexp)) tags = Regex.Replace(tags, TagsFilterRegexp, string.Empty); if (md5 != null) r.Add(new DataRecord() { MD5 = md5.Value, Rating = m2.Success ? (DataRating)m2.Groups[1].Value.ToLower()[0] : (DataRating)'q', Tags = tags.Trim().Split(' '), Servers = new[]{ new DataServer(){ Post = pos == null ? 0 : Convert.ToInt32(pos.Value), Server = Host, subServers = (srv == null ? new string[0] : new[]{srv.Value.ToLower().Trim()}), Autor = m3.Success ? m3.Groups[1].Value : string.Empty, Ext = ext == null ? null : ext.Value.ToLower().Trim() }} }); } //*/ return r; } catch (Exception e) { throw new ParserException(e); } }
private async Task Worker(IParser parser, IParserSettings settings) { var loader = new HtmlLoader(settings); for (int i = settings.StartPoint; i <= settings.EndPoint; i++) { var source = await loader.GetSourceByPageId(i); var domParser = new HtmlParser(); var document = await domParser.ParseDocumentAsync(source); var result = parser.ParseOrder(document); await OnNewOrderAsync?.Invoke(result); await Task.Delay(10000); } }
private static async Task DemoSimpleCrawler <T>(IParserSettings parserSettings, IParser <T> parser) where T : class { var config = new CrawlConfiguration { MaxPagesToCrawl = 20, //Only crawl 50 pages MinCrawlDelayPerDomainMilliSeconds = 1000, //Wait this many millisecs between requests }; var crawler = new PoliteWebCrawler(config); crawler.PageCrawlCompleted += PageCrawlCompleted; // event //crawler.ShouldCrawlPageDecisionMaker = CrawlPage; // delegate crawler.CrawlBag.Parser = parser; crawler.CrawlBag.Settings = parserSettings; var crawlResult = await crawler.CrawlAsync(new Uri(parserSettings.BaseUrl)); }
public async Task <string> HtmlLoad(IParserSettings parserSettings) { if (parserSettings.StartDate > parserSettings.EndDate) { MessageBox.Show("Error 5"); return(null); } string result = ""; HttpClient httpClient = new HttpClient(); for (DateTime curDate = parserSettings.StartDate; curDate <= parserSettings.EndDate; curDate = curDate.AddDays(1)) { HttpResponseMessage responseMessage = await httpClient.GetAsync($"{parserSettings.BaseUri}{parserSettings.Category}/{curDate.ToString("yyyyMMdd")}"); result += await responseMessage.Content.ReadAsStringAsync(); } return(result); }
public IEnumerable <string> Parse(IDocument document, IParserSettings settings) { var items = document.QuerySelectorAll("a") .OfType <IHtmlAnchorElement>() .Where((IHtmlAnchorElement item) => { string href = settings.BaseUrl + item.PathName; if (Uri.IsWellFormedUriString(href, UriKind.Absolute) && item.PathName.Contains("/") && (item.HostName.Equals(String.Empty) || item.Href.Contains(settings.BaseUrl)) && !item.PathName.Equals(String.Empty)) { return(true); } return(false); }) .Select((IHtmlAnchorElement item) => settings.BaseUrl + item.PathName); return(items); }
public PagesEnumerator(ILoader <T> htmlLoader, IParserSettings parserSettings) => (_loader, _currentPos, _settings) = (htmlLoader, parserSettings.StartPage - 1, parserSettings);
public static OptionMap Create( object target, IList<Pair<PropertyInfo, VerbOptionAttribute>> verbs, IParserSettings settings) { var map = new OptionMap(verbs.Count, settings); foreach (var verb in verbs) { var optionInfo = new OptionInfo(verb.Right, verb.Left, settings.ParsingCulture) { HasParameterLessCtor = verb.Left.PropertyType.GetConstructor(Type.EmptyTypes) != null }; if (!optionInfo.HasParameterLessCtor && verb.Left.GetValue(target, null) == null) { throw new ParserException("Type {0} must have a parameterless constructor or" + " be already initialized to be used as a verb command.".FormatInvariant(verb.Left.PropertyType)); } map[verb.Right.UniqueName] = optionInfo; } map.RawOptions = target; return map; }
public static OptionMap Create(object target, IParserSettings settings) { var list = ReflectionUtil.RetrievePropertyList<BaseOptionAttribute>(target); if (list == null) { return null; } var map = new OptionMap(list.Count, settings); foreach (var pair in list) { if (pair.Left != null && pair.Right != null) { string uniqueName; if (pair.Right.AutoLongName) { uniqueName = pair.Left.Name.ToLowerInvariant(); pair.Right.LongName = uniqueName; } else { uniqueName = pair.Right.UniqueName; } map[uniqueName] = new OptionInfo(pair.Right, pair.Left, settings.ParsingCulture); } } map.RawOptions = target; return map; }
public MarkupGrammar(IParserSettings settings) { var Apos = Ch('\''); var Quot = Ch('\"'); var Lt = Ch('<'); var Gt = Ch('>'); //var CombiningChar = Ch('*'); //var Extener = Ch('*'); //[4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender var NameChar = Ch(char.IsLetterOrDigit).Or(Ch('.', '-', '_', ':')) /*.Or(CombiningChar).Or(Extener)*/; //[5] Name ::= (Letter | '_' | ':') (NameChar)* var Name = Ch(char.IsLetter).Or(Ch('_', ':')).And(Rep(NameChar)) .Build(hit => hit.Left + new string(hit.Down.ToArray())); //[7] Nmtoken ::= (NameChar)+ var NmToken = Rep1(NameChar) .Build(hit => new string(hit.ToArray())); //[3] S ::= (#x20 | #x9 | #xD | #xA)+ Whitespace = Rep1(Ch(char.IsWhiteSpace)); //[25] Eq ::= S? '=' S? var Eq = Opt(Whitespace).And(Ch('=')).And(Opt(Whitespace)); var paintedStatement1 = Statement1.Build(hit => new StatementNode(hit)).Paint <StatementNode, Node>(); var statementMarker = string.IsNullOrEmpty(settings.StatementMarker) ? "#" : settings.StatementMarker; // Syntax 1: '\r'? ('\n' | '\u0002') S? '#' (statement ^('\r' | '\n' | '\u0003') ) var StatementNode1 = Opt(Ch('\r')).And(Ch('\n').Or(ChSTX())).And(Rep(Ch(' ', '\t'))).And(TkCode(Ch(statementMarker))).And(paintedStatement1).IfNext(Ch('\r', '\n').Or(ChETX())) .Build(hit => hit.Down); var paintedStatement2 = Statement2.Build(hit => new StatementNode(hit)).Paint <StatementNode, Node>(); // Syntax 2: '<%' (statement ^'%>') '%>' var StatementNode2 = TkAspxCode(Ch("<%")).NotNext(Ch('=')).And(paintedStatement2).And(TkAspxCode(Ch("%>"))) .Build(hit => hit.Left.Down); Statement = StatementNode1.Or(StatementNode2); // Syntax 1: ${csharp_expression} var Code1 = TkCode(Ch("${")).And(Expression).And(TkCode(Ch('}'))) .Build(hit => new ExpressionNode(hit.Left.Down) { AutomaticEncoding = settings.AutomaticEncoding }); // Syntax 3: <%=csharp_expression%>; var Code3 = TkAspxCode(Ch("<%")).And(TkAttDelim(Ch('='))).And(Expression).And(TkAspxCode(Ch("%>"))) .Build(hit => new ExpressionNode(hit.Left.Down)); // Syntax 4: $!{csharp_expression} var Code4 = TkCode(Ch("$!{")).And(Expression).And(TkCode(Ch('}'))) .Build(hit => new ExpressionNode(hit.Left.Down) { SilentNulls = true, AutomaticEncoding = settings.AutomaticEncoding }); // Syntax 5: !{sharp_expression} var Code5 = TkCode(Ch("!{")).And(Expression).And(TkCode(Ch('}'))) .Build(hit => new ExpressionNode(hit.Left.Down)); Code = Code1.Or(Code3).Or(Code4).Or(Code5); var Condition = TkCode(Ch("?{")).And(Expression).And(TkCode(Ch('}'))) .Build(hit => new ConditionNode(hit.Left.Down)); var LessThanTextNode = Ch('<') .Build(hit => (Node) new TextNode("<")); //[68] EntityRef ::= '&' Name ';' EntityRef = TkEntity(Ch('&').And(Name).And(Ch(';'))) .Build(hit => new EntityNode(hit.Left.Down)); var EntityRefOrAmpersand = AsNode(EntityRef).Or(Ch('&').Build(hit => (Node) new TextNode("&"))); //[10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" var AttValueSingleText = TkAttVal(Rep1(ChNot('<', '&', '\'').Unless(Code).Unless(Condition))).Build(hit => new TextNode(hit)); var AttValueSingle = TkAttQuo(Apos).And(Rep(AsNode(AttValueSingleText).Or(EntityRefOrAmpersand).Or(AsNode(Code)).Or(AsNode(Condition)).Or(LessThanTextNode).Paint())).And(TkAttQuo(Apos)); var AttValueDoubleText = TkAttVal(Rep1(ChNot('<', '&', '\"').Unless(Code).Unless(Condition))).Build(hit => new TextNode(hit)); var AttValueDouble = TkAttQuo(Quot).And(Rep(AsNode(AttValueDoubleText).Or(EntityRefOrAmpersand).Or(AsNode(Code)).Or(AsNode(Condition)).Or(LessThanTextNode).Paint())).And(TkAttQuo(Quot)); var AttValue = AttValueSingle.Or(AttValueDouble).Left().Down(); //[41] Attribute ::= Name Eq AttValue Attribute = TkAttNam(Name).And(TkAttDelim(Eq)).And(AttValue) .Build(hit => new AttributeNode(hit.Left.Left, hit.Down)).Paint <AttributeNode, Node>(); //[40] STag ::= '<' Name (S Attribute)* S? '>' //[44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' Element = Opt(Ch("\r\n").Or(Ch("\n")).And(StringOf(Ch(char.IsWhiteSpace).Unless(Ch('\r', '\n'))))).And(TkTagDelim(Lt)).And(TkEleNam(Name)).And(Rep(Whitespace.And(Attribute).Down())).And(Opt(Whitespace)).And(Opt(TkTagDelim(Ch('/')))).And(TkTagDelim(Gt)) .Build(hit => new ElementNode( hit.Left.Left.Left.Left.Down, hit.Left.Left.Left.Down, hit.Left.Down != default(char), hit.Left.Left.Left.Left.Left.Left == null ? string.Empty : hit.Left.Left.Left.Left.Left.Left.Left + hit.Left.Left.Left.Left.Left.Left.Down)); //[42] ETag ::= '</' Name S? '>' EndElement = Opt(Ch("\r\n").Or(Ch("\n")).And(StringOf(Ch(char.IsWhiteSpace).Unless(Ch('\r', '\n'))))).And(TkTagDelim(Lt.And(Ch('/')))).And(TkEleNam(Name)).And(Opt(Whitespace)).And(TkTagDelim(Gt)) .Build(hit => new EndElementNode(hit.Left.Left.Down, hit.Left.Left.Left.Left == null ? string.Empty : hit.Left.Left.Left.Left.Left + hit.Left.Left.Left.Left.Down)); Text = Rep1(ChNot('&', '<').Unless(Statement).Unless(Code).Unless(Element).Unless(EndElement)) .Build(hit => new TextNode(hit)); //[15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' Comment = TkComm(Ch("<!--").And(Rep(ChNot('-').Or(Ch('-').IfNext(ChNot('-'))))).And(Ch("-->"))) .Build(hit => new CommentNode(hit.Left.Down)); //[11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") var SystemLiteral = Quot.And(Rep(ChNot('\"'))).And(Quot).Or(Apos.And(Rep(ChNot('\''))).And(Apos)) .Build(hit => new string(hit.Left.Down.ToArray())); //[13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] var PubidChar1 = Ch(char.IsLetterOrDigit).Or(Ch(" \r\n-()+,./:=?;!*#@$_%".ToArray())); var PubidChar2 = PubidChar1.Or(Apos); //[12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" var PubidLiteral = Quot.And(Rep(PubidChar2)).And(Quot).Or(Apos.And(Rep(PubidChar1)).And(Apos)) .Build(hit => new string(hit.Left.Down.ToArray())); //[75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral var ExternalIDSystem = Ch("SYSTEM").And(Whitespace).And(SystemLiteral) .Build(hit => new ExternalIdInfo { ExternalIdType = hit.Left.Left, SystemId = hit.Down }); var ExternalIDPublic = Ch("PUBLIC").And(Whitespace).And(PubidLiteral).And(Whitespace).And(SystemLiteral) .Build(hit => new ExternalIdInfo { ExternalIdType = hit.Left.Left.Left.Left, PublicId = hit.Left.Left.Down, SystemId = hit.Down }); var ExternalID = ExternalIDSystem.Or(ExternalIDPublic); //[28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' DoctypeDecl = Ch("<!DOCTYPE").And(Whitespace).And(Name).And(Opt(Whitespace.And(ExternalID).Down())).And(Opt(Whitespace)).And(Ch('>')) .Build(hit => new DoctypeNode { Name = hit.Left.Left.Left.Down, ExternalId = hit.Left.Left.Down }); //[26] VersionNum ::= '1.0' var VersionNum = Ch("1.0"); //[24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') var VersionInfo = Whitespace.And(Ch("version")).And(Eq).And( Apos.And(VersionNum).And(Apos).Or(Quot.And(VersionNum).And(Quot))); //[81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* var EncName = Ch(char.IsLetter).And(Rep(Ch(char.IsLetterOrDigit).Or(Ch('.', '_', '-')))) .Build(hit => hit.Left + new string(hit.Down.ToArray())); //[80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) var EncodingDecl = Whitespace.And(Ch("encoding")).And(Eq).And( Apos.And(EncName).And(Apos).Or(Quot.And(EncName).And(Quot))) .Build(hit => hit.Down.Left.Down); //[32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) var SSDecl = Whitespace.And(Ch("standalone")).And(Eq).And( Apos.And(Ch("yes").Or(Ch("no"))).And(Apos).Or(Quot.And(Ch("yes").Or(Ch("no"))).And(Quot))) .Build(hit => hit.Down.Left.Down); //[23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' XMLDecl = Ch("<?xml").And(VersionInfo).And(Opt(EncodingDecl)).And(Opt(SSDecl)).And(Opt(Whitespace)).And(Ch("?>")) .Build(hit => new XMLDeclNode { Encoding = hit.Left.Left.Left.Down, Standalone = hit.Left.Left.Down }); //[17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) var PITarget = Name.Unless(Ch('X', 'x').And(Ch('M', 'm')).And(Ch('L', 'l'))); //[16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' ProcessingInstruction = Ch("<?").And(PITarget).And(Opt(Whitespace)).And(Rep(Ch(ch => true).Unless(Ch("?>")))).And(Ch("?>")) .Build(hit => new ProcessingInstructionNode { Name = hit.Left.Left.Left.Down, Body = new string(hit.Left.Down.ToArray()) }); AnyNode = AsNode(Element).Paint() .Or(AsNode(EndElement).Paint()) .Or(AsNode(Text).Paint()) .Or(EntityRefOrAmpersand.Paint()) .Or(AsNode(Statement)) .Or(AsNode(Code).Paint()) .Or(AsNode(DoctypeDecl).Paint()) .Or(AsNode(Comment).Paint()) .Or(AsNode(XMLDecl).Paint()) .Or(AsNode(ProcessingInstruction).Paint()) .Or(AsNode(LessThanTextNode).Paint()); Nodes = Rep(AnyNode); }
public DefaultSyntaxProvider(IParserSettings settings) { _grammar = new MarkupGrammar(settings); }
public ParserWorker(IParser <T> parser, IParserSettings parserSettings) : this(parser) { this.parserSettings = parserSettings; }
public OxygeneSyntaxProvider(IParserSettings settings) { _grammar = new OxygeneMarkupGrammar(settings); }
readonly string url; //сюда будем передовать адрес. public HtmlLoader(IParserSettings settings) { client = new HttpClient(); client.DefaultRequestHeaders.Add("User-Agent", "C# App"); //Это для индентификации на сайте-жертве. url = $"{settings.BaseUrl}/{settings.Postfix}/"; //Здесь собирается адресная строка }
public ParserWorker(IParser <T> parser, IParserSettings settings) : this(parser) { Settings = settings; }
public void SetSettings(IParserSettings settings) { _settings = settings; _enumerator = new PagesEnumerator <T>(_loader, _settings); }
internal void UT_Init(CoreSettings coreSettings, IParserSettings rdbSchemaSettings, XElement settingsElement) { Init(coreSettings, rdbSchemaSettings, settingsElement); }
private void Init(CoreSettings settings, IParserSettings rdbSchemaSettings, XElement doc) { _coreSettings = settings; _rdbSchemaSettings = rdbSchemaSettings; _pocoSettings = new PocoSettings( bool.Parse(doc.Descendants(MakePartialElement).Single().Value), doc.Descendants(NameSpaceElement).Single().Attributes(NameSpaceNameAttribute).Single().Value, doc.Descendants(NameSpaceElement).Single().Descendants(NameSpaceCommentsElement).Single().Descendants(NameSpaceCommentElement).Select(e=>e.Value).ToList(), bool.Parse(doc.Descendants(ConstructorsElement).Single().Descendants(ConstructorsDefaultElement).Single().Value), bool.Parse(doc.Descendants(ConstructorsElement).Single().Descendants(ConstructorsAllPropertiesElement).Single().Value), bool.Parse(doc.Descendants(ConstructorsElement).Single().Descendants(ConstructorsAllPropertiesSansPrimaryKeyElement).Single().Value), bool.Parse(doc.Descendants(ConstructorsElement).Single().Descendants(ConstructorCopy).Single().Value), bool.Parse(doc.Descendants(MethodsElement).Single().Descendants(MethodsEqualsElement).Single().Value), doc.Descendants(MethodsElement).Single().Descendants(MethodsEqualsElement).Single().Attributes(MethodsEqualsRegexAttribute).Single().Value, doc.Descendants(OutputFolderElement).Single().Value, doc.Descendants(ProjectPathElement).Single().Value, doc.Descendants(XmlOutputFilenameElement).Single().Value ); }
public HtmlLoader(IParserSettings settings) { client = new HttpClient(); url = $"{settings.BaseUrl}/{settings.Prefix}/"; }
private void Init(CoreSettings settings, IParserSettings rdbSchemaSettings, XElement doc) { _coreSettings = settings; _rdbSchemaSettings = rdbSchemaSettings; var outputFolder = doc .Descendants() .Single(e => e.Name == OutputFolderElement) .Value; _log.Add("OutputFolder={0}.", outputFolder); var projectPath = doc .Descendants() .Single(e => e.Name == ProjectPathElement) .Value; _log.Add("ProjectPath={0}.", projectPath); var xmlOutputFilename = doc .Descendants() .Single(e => e.Name == XmlOutputFilenameElement) .Value; _log.Add("XmlOutputFilename={0}.", xmlOutputFilename); _surfaceSettings = new SurfaceSettings( outputFolder, projectPath, xmlOutputFilename ); }