public static ParseRule GetRule(HtmlNodeWithUrl[] nodes, string label, System.Drawing.Size minSize, bool collectIMGTags, bool collectLINKTags, bool collectMETATags) { ParseRule result = GetRuleByLink(nodes, label, collectIMGTags, collectLINKTags, collectMETATags, null) ?? GetRuleByXPath(nodes, label, collectIMGTags, collectLINKTags, collectMETATags, null) ?? GetRuleByLink(nodes, label, collectIMGTags, collectLINKTags, collectMETATags, minSize) ?? GetRuleByXPath(nodes, label, collectIMGTags, collectLINKTags, collectMETATags, minSize) ?? new ParseRule(); return result; }
private static ParseRule GetRuleByXPath(HtmlNodeWithUrl[] nodesarr, string label, bool collectIMGTags, bool collectLINKTags, bool collectMETATags, System.Drawing.Size? minSize = null) { ParseRule result = null; if (nodesarr != null && nodesarr.Length > 0) { #region ByXPath string mask1 = string.Empty; string mask2 = string.Empty; string[] xpaths = nodesarr.Select(n => n.Node.XPath).ToArray(); mask1 = LongestMaskedPathBetween(xpaths); mask2 = LongestMaskedStringBetween(xpaths); if ( nodesarr .Select(n => new { Node = n.Node.OwnerDocument.DocumentNode, Url = n.Url }) .Select(nodeItem => { var links = Helper .GetAllImagesUrlsFromUrl(nodeItem.Node.OwnerDocument, nodeItem.Url.AbsoluteUri, collectIMGTags, collectLINKTags, collectMETATags, null) .Where(n => Helper.StringLikes(n.Node.XPath, mask1)); return minSize == null ? links.Count() : Helper.GetAllImagesUrlsWithMinSize(links.ToArray(), minSize.Value).Count(); } ) .Where(c => c != 1) .Count() == 0) result = new ParseRule() { Label = label, Condition = ParseFindRuleCondition.ByXPath, Parameter = mask1 }; if ( nodesarr .Select(n => new { Node = n.Node.OwnerDocument.DocumentNode, Url = n.Url }) .Select(nodeItem => { var links = Helper .GetAllImagesUrlsFromUrl(nodeItem.Node.OwnerDocument, nodeItem.Url.AbsoluteUri, collectIMGTags, collectLINKTags, collectMETATags, null) .Where(n => Helper.StringLikes(n.Node.XPath, mask2)); return minSize == null ? links.Count() : Helper.GetAllImagesUrlsWithMinSize(links.ToArray(), minSize.Value).Count(); } ) .Where(c => c != 1) .Count() == 0) result = new ParseRule() { Label = label, Condition = ParseFindRuleCondition.ByXPath, Parameter = mask2 }; #endregion #region ByXPathAndIndex if (result == null) { string betterMask = mask2; int index = nodesarr .Select(n => new { Doc = n.Node.OwnerDocument, Node = n.Node.OwnerDocument.DocumentNode, Url = n.Url }) .Select(n => { var links = Helper .GetAllImagesUrlsFromUrl(n.Node.OwnerDocument, n.Url.AbsoluteUri, collectIMGTags, collectLINKTags, collectMETATags, null) .Where(i => Helper.StringLikes(i.Node.XPath, betterMask)); string[] images = (minSize == null ? links.ToArray() : Helper.GetAllImagesUrlsWithMinSize(links.ToArray(), minSize.Value) ) .Select(i => i.Url.AbsoluteUri) .ToArray(); for (int i = 0; i < images.Length; i++) if (images[i].ToLower() == n.Url.AbsoluteUri.ToLower()) return i; return -1; } ).Distinct().OrderBy( i => i).FirstOrDefault(); if (index != -1) result = new ParseRule() { Label = label, Condition = ParseFindRuleCondition.ByXPathAndIndex, Parameter = betterMask + ";" + index.ToString() }; } #endregion } if (result != null) { result.CheckImageSize = minSize != null ? true : false; if (minSize != null) result.MinImageSize = minSize.Value; result.CollectIMGTags = collectIMGTags; result.CollectLINKTags = collectLINKTags; result.CollectMETATags = collectMETATags; } if (minSize != null && result == null) { System.Drawing.Size minCalcedSize = new System.Drawing.Size(); foreach (var sz in Helper.GetImageSizes(nodesarr.Select(n => new SomeNodeElement() { Node = n.Node, Url = n.Url }).ToArray()).Select(n => n.Value)) { if (minCalcedSize.Width > sz.Width) minCalcedSize.Width = sz.Width; if (minCalcedSize.Height > sz.Height) minCalcedSize.Height = sz.Height; } if (minSize.Value.Height < minCalcedSize.Height || minSize.Value.Width < minCalcedSize.Width) result = GetRuleByLink(nodesarr, label, collectIMGTags, collectLINKTags, collectMETATags, minCalcedSize); } return result; }