private static void Entitize(HtmlAttributeCollection collection)
 {
     foreach (HtmlAttribute current in (IEnumerable <HtmlAttribute>)collection)
     {
         current.Value = HtmlEntity.Entitize(current.Value);
     }
 }
Esempio n. 2
0
 private static void Entitize(HtmlAttributeCollection collection)
 {
     foreach (HtmlAttribute attribute in (IEnumerable <HtmlAttribute>)collection)
     {
         attribute.Value = Entitize(attribute.Value);
     }
 }
 private static void Entitize(HtmlAttributeCollection collection)
 {
     foreach (HtmlAttribute at in collection)
     {
         at.Value = Entitize(at.Value);
     }
 }
 private bool IsHidden(HtmlAttributeCollection atts)
 {
     if (atts.Contains("class"))
     {
         return atts["class"].Value.Split(' ').Any(str => str == "hidden" || str.StartsWith("promo") || str.Contains("comment"));
     }
     else
         return false;
 }
Esempio n. 5
0
        //private static IWebElement CreateNMock2IWebElement(this HtmlNode htmlNode)
        //{
        //    var mockIWebElement = new Mockery().NewMock<IWebElement>();
        //    var attributes = htmlNode.Attributes;
        //    if (attributes["id"] == null)
        //        Stub.On(mockIWebElement).Method("GetAttribute").With("id").Will(Return.Value(""));

        //    foreach (var att in attributes)
        //        Stub.On(mockIWebElement).Method("GetAttribute").With(att.Name).Will(Return.Value(att.Value));

        //    Stub.On(mockIWebElement).GetProperty("TagName").Will(Return.Value(htmlNode.Name));
        //    var text = htmlNode.SelectSingleNode("text()");
        //    if (text != null)
        //        Stub.On(mockIWebElement).GetProperty("Text").Will(Return.Value(text.InnerText));
        //    else
        //        Stub.On(mockIWebElement).GetProperty("Text").Will(Return.Value(""));
        //    return mockIWebElement;
        //}

        //public static IWebElement ToNMock2IWebElement(this HtmlNode htmlNode)
        //{
        //    var mockIWebElement = CreateNMock2IWebElement(htmlNode);
        //    if (htmlNode.HasChildNodes)
        //    {
        //        foreach (var child in htmlNode.ChildNodes)
        //        {
        //            var tempElement = new Mockery().NewMock<IWebElement>();
        //            tempElement = CreateNMock2IWebElement(child);
        //            var byID = By.Id(child.Id);
        //            Stub.On(mockIWebElement).Method("FindElement").With(byID).Will(Return.Value(tempElement));
        //        }
        //    }
        //    return mockIWebElement;
        //}

        private static string GetAttributeValue(HtmlAttributeCollection attributes, string attributeName)
        {
            var attribute = attributes[attributeName];

            if (attribute != null)
            {
                return(attribute.Value);
            }
            return("");
        }
Esempio n. 6
0
 private static void Entitize(HtmlAttributeCollection collection)
 {
     foreach (HtmlAttribute at in collection)
     {
         if (at.Value == null)
         {
             continue;
         }
         at.Value = Entitize(at.Value);
     }
 }
Esempio n. 7
0
        public static bool EqualsAttributes(this HtmlAttributeCollection collection, HtmlAttributeCollection second)
        {
            if(collection.Count != second.Count)
                return false;

            bool all = true;
            for (int i = 0; i < collection.Count; i++)
            {
                if (collection[i] != second[i])
                {
                    all = false;
                }
            }

            return all;
        }
Esempio n. 8
0
 /// <exception cref="Sharpen.SAXException"></exception>
 public bool Start(NBoilerpipeContentHandler instance, string localName, HtmlAttributeCollection atts)
 {
     IList<string> labels = new AList<string> (5);
     labels.AddItem (DefaultLabels.MARKUP_PREFIX + localName);
     string classVal = atts ["class"].Value;
     if (classVal != null && classVal.Length > 0) {
         classVal = PAT_NUM.Matcher (classVal).ReplaceAll ("#");
         classVal = classVal.Trim ();
         string[] vals = classVal.Split ("[ ]+");
         labels.AddItem (DefaultLabels.MARKUP_PREFIX + "." + classVal.Replace (' ', '.'));
         if (vals.Length > 1) {
             foreach (string s in vals) {
                 labels.AddItem (DefaultLabels.MARKUP_PREFIX + "." + s);
             }
         }
     }
     var att = atts["id"];
     var id =  ( atts !=null) ? att.Name : "";
     if (id != null && id.Length > 0) {
         id = PAT_NUM.Matcher (id).ReplaceAll ("#");
         labels.AddItem (DefaultLabels.MARKUP_PREFIX + "#" + id);
     }
     ICollection<string> ancestors = GetAncestorLabels ();
     IList<string> labelsWithAncestors = new AList<string> ((ancestors.Count + 1) * labels
         .Count);
     foreach (string l in labels) {
         foreach (string an in ancestors) {
             labelsWithAncestors.AddItem (an);
             labelsWithAncestors.AddItem (an + " " + l);
         }
         labelsWithAncestors.AddItem (l);
     }
     instance.AddLabelAction (new LabelAction (Sharpen.Collections.ToArray (labelsWithAncestors
         , new string[labelsWithAncestors.Count])));
     labelStack.AddItem (labels);
     return isBlockLevel;
 }
Esempio n. 9
0
 static Item GetItemFromAttr(HtmlAttributeCollection itemAttr)
 {
     string hold = "";
     //Data quality 6 is normal, 1 is genuine
     Item tradeItem = new Item();
     tradeItem.Name = Types[Int32.Parse(itemAttr["data-quality"].Value)] +
         itemAttr["data-name"].Value;
     if (itemAttr.Contains("data-particleeffectname"))
         tradeItem.Name += " with " + itemAttr["data-particleeffectname"].Value;
     if (itemAttr.Contains("data-customname"))
         tradeItem.Name += " called " + itemAttr["data-customname"].Value;
     if (itemAttr.Contains("data-customdescription"))
         tradeItem.Name += " with description " + itemAttr["data-customdescription"].Value;
     if (itemAttr.Contains("data-tint"))
     {
         if (!Tints.TryGetValue(Int32.Parse(itemAttr["data-tint"].Value), out hold))
             tradeItem.Name += " painted a colour I haven't found yet. Contact me";
         else
             tradeItem.Name += " painted " + Tints[Int32.Parse(itemAttr["data-tint"].Value)];
     }
     tradeItem.ID = Int32.Parse(itemAttr["data-tf2itemid"].Value);
     tradeItem.Level = Int32.Parse(itemAttr["data-level"].Value);
     return tradeItem;
 }
Esempio n. 10
0
			public bool Start(NBoilerpipeContentHandler instance, string localName, HtmlAttributeCollection atts)
			{
				instance.AddWhitespaceIfNecessary();
				instance.AddLabelAction(action);
				return false;
			}
Esempio n. 11
0
            public bool Start(NBoilerpipeContentHandler instance, string localName, HtmlAttributeCollection atts)
            {
                try
                {
                    var alt = atts.Contains("alt") ? atts["alt"].Value : "";
                    if (alt.Length < 5)
                    {
                        alt = (atts.Contains("title") ? atts["title"].Value : alt);
                    }

                    int width = Math.Max(atts.Contains("width") ? int.Parse(atts["width"].Value.TrimEnd('p', 'x', ';')) : 0, 1);
                    int height = Math.Max(atts.Contains("height") ? int.Parse(atts["height"].Value.TrimEnd('p', 'x', ';')) : 0, 1);
                    var src = atts.Contains("src") ? atts["src"].Value : FindAlternateSrc(atts);
                    bool isWikimedia = false;
                    if (instance.inIgnorableElement <= 0 && !string.IsNullOrWhiteSpace(src) &&
                        (alt.Length > 5 || width > 400 || height > 320 || (isWikimedia = src.StartsWith("//upload.wikimedia.org"))))
                    {
                        var altWidthHeight = FindAlternateWidthHieght(src);
                        width = Math.Max(altWidthHeight.Item1, width);
                        height = Math.Max(altWidthHeight.Item2, height);

                        if (src.StartsWith("//"))
                            src = "http:" + src;

                        if (width > 400 || height > 320 || isWikimedia)
                        {
                            var tb = new Document.TextBlock("", new Sharpen.BitSet(), Math.Max((Math.Max(width, height) / 6), alt.Length), 0, 0, 0, 0, src);
                            tb.SetIsContent(true);
                            instance.textBlocks.Add(tb);
                        }
                    }
                    instance.inIgnorableElement++;
                    return true;
                }
                catch(Exception ex)
                {
                    Debug.WriteLine("during boilerpipe parsing: " + ex.ToString());
                }
                instance.inIgnorableElement++;
                return true;
            }
Esempio n. 12
0
 private static void Entitize(HtmlAttributeCollection collection)
 {
   foreach (HtmlAttribute htmlAttribute in (IEnumerable<HtmlAttribute>) collection)
     htmlAttribute.Value = HtmlEntity.Entitize(htmlAttribute.Value);
 }
Esempio n. 13
0
 private string FindAlternateSrc(HtmlAttributeCollection atts)
 {
     foreach (var att in atts)
     {
         if (att.Value.EndsWith(".jpg") || att.Value.EndsWith(".png"))
             return att.Value;
     }
     return null;
 }
Esempio n. 14
0
			public bool Start(NBoilerpipeContentHandler instance, string localName,HtmlAttributeCollection atts)
			{
				instance.inIgnorableElement++;
				return true;
			}
Esempio n. 15
0
 private static void Entitize(HtmlAttributeCollection collection)
 {
     foreach (HtmlAttribute at in collection)
     {
         at.Value = Entitize(at.Value);
     }
 }
Esempio n. 16
0
			/// <exception cref="Sharpen.SAXException"></exception>
			public bool Start(NBoilerpipeContentHandler instance, string localName, HtmlAttributeCollection atts)
			{
				return t1.Start(instance, localName, atts) | t2.Start(instance, localName,atts);
			}
Esempio n. 17
0
			public bool Start (NBoilerpipeContentHandler instance, string localName, HtmlAttributeCollection atts)
			{
				string sizeAttr = atts ["size"].Value;
				if (sizeAttr != null)
				{
					var m = CommonTagActions.PAT_FONT_SIZE.Matcher(sizeAttr);
					if (m.Matches())
					{
						string rel = m.Group(1);
						int val = System.Convert.ToInt32(m.Group(2));
						int size;
						if (rel.Length == 0)
						{
							// absolute
							size = val;
						}
						else
						{
							// relative
							int? prevSize;
							if (instance.fontSizeStack.Count == 0)
							{
								prevSize = 3;
							}
							else
							{
								prevSize = 3;
								foreach (int? s in instance.fontSizeStack)
								{
									if (s != null)
									{
										prevSize = s;
										break;
									}
								}
							}
							if (rel[0] == '+')
							{
								size = (int)prevSize + val;
							}
							else
							{
								size = (int)prevSize - val;
							}
						}
						instance.fontSizeStack.Insert(0, size);
					}
					else
					{
                        instance.fontSizeStack.Insert(0, null);
					}
				}
				else
				{
                    instance.fontSizeStack.Insert(0, null);
				}
				return false;
			}
Esempio n. 18
0
			public bool Start(NBoilerpipeContentHandler instance, string localName, HtmlAttributeCollection atts)
			{
				return false;
			}
Esempio n. 19
0
			public bool Start(NBoilerpipeContentHandler instance, string localName, HtmlAttributeCollection atts)
			{
				instance.FlushBlock();
				instance.inBody++;
				return false;
			}
Esempio n. 20
0
			/// <exception cref="Sharpen.SAXException"></exception>
			public bool Start (NBoilerpipeContentHandler instance, string localName, HtmlAttributeCollection atts)
			{
				if (instance.inAnchor++ > 0) {
					// as nested A elements are not allowed per specification, we
					// are probably reaching this branch due to a bug in the XML
					// parser
					//System.Console.Error.WriteLine ("Warning: SAX input contains nested A elements -- You have probably hit a bug in your HTML parser (e.g., NekoHTML bug #2909310). Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow..."
						//);
					//this.End (instance, localName);
                    instance.inIgnorableElement++;
				}
				if (instance.inIgnorableElement == 0) {
					instance.AddWhitespaceIfNecessary ();
					instance.tokenBuilder.Append(NBoilerpipeContentHandler.ANCHOR_TEXT_START);
					instance.tokenBuilder.Append(' ');
					instance.sbLastWasWhitespace = true;
				}
				return false;
			}
Esempio n. 21
0
			public bool Start(NBoilerpipeContentHandler instance, string localName, HtmlAttributeCollection atts)
			{
				instance.AddLabelAction(action);
				return true;
			}
Esempio n. 22
0
 /// <summary>
 /// Gets the value of an HTML attribute safely from an HTMLAttributeCollection
 /// </summary>
 /// <param name="p_hacCollection"></param>
 /// <param name="p_strItem"></param>
 /// <returns></returns>
 private static string GetHtmlAttributeValue(HtmlAttributeCollection p_hacCollection, string p_strItem)
 {
     if (p_hacCollection.Contains(p_strItem)) return ((HtmlAttribute)p_hacCollection[p_strItem]).Value;
     else return "";
 }