private static void Entitize(HtmlAttributeCollection collection) { foreach (HtmlAttribute current in (IEnumerable <HtmlAttribute>)collection) { current.Value = HtmlEntity.Entitize(current.Value); } }
private static void Entitize(HtmlAttributeCollection collection) { foreach (HtmlAttribute attribute in (IEnumerable <HtmlAttribute>)collection) { attribute.Value = Entitize(attribute.Value); } }
private static void Entitize(HtmlAttributeCollection collection) { foreach (HtmlAttribute at in collection) { at.Value = Entitize(at.Value); } }
private bool IsHidden(HtmlAttributeCollection atts) { if (atts.Contains("class")) { return atts["class"].Value.Split(' ').Any(str => str == "hidden" || str.StartsWith("promo") || str.Contains("comment")); } else return false; }
//private static IWebElement CreateNMock2IWebElement(this HtmlNode htmlNode) //{ // var mockIWebElement = new Mockery().NewMock<IWebElement>(); // var attributes = htmlNode.Attributes; // if (attributes["id"] == null) // Stub.On(mockIWebElement).Method("GetAttribute").With("id").Will(Return.Value("")); // foreach (var att in attributes) // Stub.On(mockIWebElement).Method("GetAttribute").With(att.Name).Will(Return.Value(att.Value)); // Stub.On(mockIWebElement).GetProperty("TagName").Will(Return.Value(htmlNode.Name)); // var text = htmlNode.SelectSingleNode("text()"); // if (text != null) // Stub.On(mockIWebElement).GetProperty("Text").Will(Return.Value(text.InnerText)); // else // Stub.On(mockIWebElement).GetProperty("Text").Will(Return.Value("")); // return mockIWebElement; //} //public static IWebElement ToNMock2IWebElement(this HtmlNode htmlNode) //{ // var mockIWebElement = CreateNMock2IWebElement(htmlNode); // if (htmlNode.HasChildNodes) // { // foreach (var child in htmlNode.ChildNodes) // { // var tempElement = new Mockery().NewMock<IWebElement>(); // tempElement = CreateNMock2IWebElement(child); // var byID = By.Id(child.Id); // Stub.On(mockIWebElement).Method("FindElement").With(byID).Will(Return.Value(tempElement)); // } // } // return mockIWebElement; //} private static string GetAttributeValue(HtmlAttributeCollection attributes, string attributeName) { var attribute = attributes[attributeName]; if (attribute != null) { return(attribute.Value); } return(""); }
private static void Entitize(HtmlAttributeCollection collection) { foreach (HtmlAttribute at in collection) { if (at.Value == null) { continue; } at.Value = Entitize(at.Value); } }
public static bool EqualsAttributes(this HtmlAttributeCollection collection, HtmlAttributeCollection second) { if(collection.Count != second.Count) return false; bool all = true; for (int i = 0; i < collection.Count; i++) { if (collection[i] != second[i]) { all = false; } } return all; }
/// <exception cref="Sharpen.SAXException"></exception> public bool Start(NBoilerpipeContentHandler instance, string localName, HtmlAttributeCollection atts) { IList<string> labels = new AList<string> (5); labels.AddItem (DefaultLabels.MARKUP_PREFIX + localName); string classVal = atts ["class"].Value; if (classVal != null && classVal.Length > 0) { classVal = PAT_NUM.Matcher (classVal).ReplaceAll ("#"); classVal = classVal.Trim (); string[] vals = classVal.Split ("[ ]+"); labels.AddItem (DefaultLabels.MARKUP_PREFIX + "." + classVal.Replace (' ', '.')); if (vals.Length > 1) { foreach (string s in vals) { labels.AddItem (DefaultLabels.MARKUP_PREFIX + "." + s); } } } var att = atts["id"]; var id = ( atts !=null) ? att.Name : ""; if (id != null && id.Length > 0) { id = PAT_NUM.Matcher (id).ReplaceAll ("#"); labels.AddItem (DefaultLabels.MARKUP_PREFIX + "#" + id); } ICollection<string> ancestors = GetAncestorLabels (); IList<string> labelsWithAncestors = new AList<string> ((ancestors.Count + 1) * labels .Count); foreach (string l in labels) { foreach (string an in ancestors) { labelsWithAncestors.AddItem (an); labelsWithAncestors.AddItem (an + " " + l); } labelsWithAncestors.AddItem (l); } instance.AddLabelAction (new LabelAction (Sharpen.Collections.ToArray (labelsWithAncestors , new string[labelsWithAncestors.Count]))); labelStack.AddItem (labels); return isBlockLevel; }
static Item GetItemFromAttr(HtmlAttributeCollection itemAttr) { string hold = ""; //Data quality 6 is normal, 1 is genuine Item tradeItem = new Item(); tradeItem.Name = Types[Int32.Parse(itemAttr["data-quality"].Value)] + itemAttr["data-name"].Value; if (itemAttr.Contains("data-particleeffectname")) tradeItem.Name += " with " + itemAttr["data-particleeffectname"].Value; if (itemAttr.Contains("data-customname")) tradeItem.Name += " called " + itemAttr["data-customname"].Value; if (itemAttr.Contains("data-customdescription")) tradeItem.Name += " with description " + itemAttr["data-customdescription"].Value; if (itemAttr.Contains("data-tint")) { if (!Tints.TryGetValue(Int32.Parse(itemAttr["data-tint"].Value), out hold)) tradeItem.Name += " painted a colour I haven't found yet. Contact me"; else tradeItem.Name += " painted " + Tints[Int32.Parse(itemAttr["data-tint"].Value)]; } tradeItem.ID = Int32.Parse(itemAttr["data-tf2itemid"].Value); tradeItem.Level = Int32.Parse(itemAttr["data-level"].Value); return tradeItem; }
public bool Start(NBoilerpipeContentHandler instance, string localName, HtmlAttributeCollection atts) { instance.AddWhitespaceIfNecessary(); instance.AddLabelAction(action); return false; }
public bool Start(NBoilerpipeContentHandler instance, string localName, HtmlAttributeCollection atts) { try { var alt = atts.Contains("alt") ? atts["alt"].Value : ""; if (alt.Length < 5) { alt = (atts.Contains("title") ? atts["title"].Value : alt); } int width = Math.Max(atts.Contains("width") ? int.Parse(atts["width"].Value.TrimEnd('p', 'x', ';')) : 0, 1); int height = Math.Max(atts.Contains("height") ? int.Parse(atts["height"].Value.TrimEnd('p', 'x', ';')) : 0, 1); var src = atts.Contains("src") ? atts["src"].Value : FindAlternateSrc(atts); bool isWikimedia = false; if (instance.inIgnorableElement <= 0 && !string.IsNullOrWhiteSpace(src) && (alt.Length > 5 || width > 400 || height > 320 || (isWikimedia = src.StartsWith("//upload.wikimedia.org")))) { var altWidthHeight = FindAlternateWidthHieght(src); width = Math.Max(altWidthHeight.Item1, width); height = Math.Max(altWidthHeight.Item2, height); if (src.StartsWith("//")) src = "http:" + src; if (width > 400 || height > 320 || isWikimedia) { var tb = new Document.TextBlock("", new Sharpen.BitSet(), Math.Max((Math.Max(width, height) / 6), alt.Length), 0, 0, 0, 0, src); tb.SetIsContent(true); instance.textBlocks.Add(tb); } } instance.inIgnorableElement++; return true; } catch(Exception ex) { Debug.WriteLine("during boilerpipe parsing: " + ex.ToString()); } instance.inIgnorableElement++; return true; }
private static void Entitize(HtmlAttributeCollection collection) { foreach (HtmlAttribute htmlAttribute in (IEnumerable<HtmlAttribute>) collection) htmlAttribute.Value = HtmlEntity.Entitize(htmlAttribute.Value); }
private string FindAlternateSrc(HtmlAttributeCollection atts) { foreach (var att in atts) { if (att.Value.EndsWith(".jpg") || att.Value.EndsWith(".png")) return att.Value; } return null; }
public bool Start(NBoilerpipeContentHandler instance, string localName,HtmlAttributeCollection atts) { instance.inIgnorableElement++; return true; }
/// <exception cref="Sharpen.SAXException"></exception> public bool Start(NBoilerpipeContentHandler instance, string localName, HtmlAttributeCollection atts) { return t1.Start(instance, localName, atts) | t2.Start(instance, localName,atts); }
public bool Start (NBoilerpipeContentHandler instance, string localName, HtmlAttributeCollection atts) { string sizeAttr = atts ["size"].Value; if (sizeAttr != null) { var m = CommonTagActions.PAT_FONT_SIZE.Matcher(sizeAttr); if (m.Matches()) { string rel = m.Group(1); int val = System.Convert.ToInt32(m.Group(2)); int size; if (rel.Length == 0) { // absolute size = val; } else { // relative int? prevSize; if (instance.fontSizeStack.Count == 0) { prevSize = 3; } else { prevSize = 3; foreach (int? s in instance.fontSizeStack) { if (s != null) { prevSize = s; break; } } } if (rel[0] == '+') { size = (int)prevSize + val; } else { size = (int)prevSize - val; } } instance.fontSizeStack.Insert(0, size); } else { instance.fontSizeStack.Insert(0, null); } } else { instance.fontSizeStack.Insert(0, null); } return false; }
public bool Start(NBoilerpipeContentHandler instance, string localName, HtmlAttributeCollection atts) { return false; }
public bool Start(NBoilerpipeContentHandler instance, string localName, HtmlAttributeCollection atts) { instance.FlushBlock(); instance.inBody++; return false; }
/// <exception cref="Sharpen.SAXException"></exception> public bool Start (NBoilerpipeContentHandler instance, string localName, HtmlAttributeCollection atts) { if (instance.inAnchor++ > 0) { // as nested A elements are not allowed per specification, we // are probably reaching this branch due to a bug in the XML // parser //System.Console.Error.WriteLine ("Warning: SAX input contains nested A elements -- You have probably hit a bug in your HTML parser (e.g., NekoHTML bug #2909310). Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow..." //); //this.End (instance, localName); instance.inIgnorableElement++; } if (instance.inIgnorableElement == 0) { instance.AddWhitespaceIfNecessary (); instance.tokenBuilder.Append(NBoilerpipeContentHandler.ANCHOR_TEXT_START); instance.tokenBuilder.Append(' '); instance.sbLastWasWhitespace = true; } return false; }
public bool Start(NBoilerpipeContentHandler instance, string localName, HtmlAttributeCollection atts) { instance.AddLabelAction(action); return true; }
/// <summary> /// Gets the value of an HTML attribute safely from an HTMLAttributeCollection /// </summary> /// <param name="p_hacCollection"></param> /// <param name="p_strItem"></param> /// <returns></returns> private static string GetHtmlAttributeValue(HtmlAttributeCollection p_hacCollection, string p_strItem) { if (p_hacCollection.Contains(p_strItem)) return ((HtmlAttribute)p_hacCollection[p_strItem]).Value; else return ""; }