public void TestEncoding()
 {
     var htmlCleaner = new HtmlCleaner();
     Assert.AreEqual("a", htmlCleaner.RemoveHtml("<p>a&nbsp;</p>"));
     Assert.AreEqual("&", htmlCleaner.RemoveHtml("<p>&amp;</p>"));
     Assert.AreEqual("£", htmlCleaner.RemoveHtml("<p>&pound;</p>"));
 }
 private static void SetMetadataProperties(CkanPackage package, IndicatorMetadata indicatorMetadata)
 {
     var htmlCleaner = new HtmlCleaner();
     IDictionary<string, string> descriptiveMetadata = indicatorMetadata.Descriptive;
     package.Title = descriptiveMetadata[IndicatorMetadataTextColumnNames.Name];
     package.Notes = htmlCleaner.RemoveHtml(descriptiveMetadata[IndicatorMetadataTextColumnNames.Definition]);
     package.Origin = htmlCleaner.RemoveHtml(descriptiveMetadata[IndicatorMetadataTextColumnNames.Source]);
 }
        public void AddIndicatorMetadata(IList<IndicatorMetadata> metadata)
        {
            IWorksheet ws = IndicatorMetadataWorksheet;
            ws.Name = "Indicator Metadata";

            // Property names to export
            IOrderedEnumerable<IndicatorMetadataTextProperty> propertiesToDisplay = GetPropertiesToDisplay(metadata);

            // Headers
            AddIndicatorMetadataHeader(ws, propertiesToDisplay);

            // Data
            HtmlCleaner htmlCleaner = new HtmlCleaner();
            IRange cells = ws.Cells;
            int metadataRow = IndicatorMetadataRow;
            foreach (IndicatorMetadata indicatorMetadata in metadata)
            {
                IDictionary<string, string> m = indicatorMetadata.Descriptive;
                int column = 0;
                cells[metadataRow, column++].Value = m[IndicatorMetadataTextColumnNames.Name];

                // Definition may be null
                string val;
                if (m.TryGetValue(IndicatorMetadataTextColumnNames.Definition, out val) == false)
                {
                    val = string.Empty;
                }
                cells[metadataRow, column++].Value = htmlCleaner.RemoveHtml(val);

                cells[metadataRow, column++].Value = indicatorMetadata.ValueType.Name;
                cells[metadataRow, column++].Value = indicatorMetadata.Unit.Label;

                foreach (var property in propertiesToDisplay)
                {
                    if (m.ContainsKey(property.ColumnName))
                    {
                        string text = m[property.ColumnName];

                        if (property.IsHtml)
                        {
                            // Remove HTML
                            text = htmlCleaner.RemoveHtml(text);
                        }

                        cells[metadataRow, column].Value = text;
                    }
                    column++;
                }

                IRange range = cells.Cells[metadataRow, 0, metadataRow, column];
                range.WrapText = true;
                range.VerticalAlignment = VAlign.Top;
                metadataRow++;
            }
        }
        public void TestEncodingWithTextBlock()
        {
            var html =
                @"The Public Health Outcomes Framework&nbsp;Healthy lives, healthy people: Improving outcomes and supporting transparency&nbsp;sets out a vision for public health, desired outcomes and the indicators that will help us understand how well public health is being improved and protected.&nbsp;The framework concentrates on two high-level outcomes to be achieved across the public health system, and groups further indicators into four &lsquo;domains&rsquo; that cover the full spectrum of public health. The outcomes reflect a focus not only on how long people live, but on how well they live at all stages of life.&nbsp;The data published in the tool are the baselines for the Public Health Outcomes Framework, with more recent and historical trend data where these are available. The baseline period is 2010 or equivalent, unless these data are unavailable or not deemed to be of sufficient quality.";

            var noHtml = new HtmlCleaner().RemoveHtml(html);

            Assert.IsFalse(noHtml.Contains("&nbsp;"));
            Assert.IsFalse(noHtml.Contains("&rsquo;"));
            Assert.IsFalse(noHtml.Contains("&lsquo;"));
        }
        public void TestTransformLinks()
        {
            var htmlCleaner = new HtmlCleaner();

            const string expected = "url";

            // Single quotes
            Assert.AreEqual(expected, htmlCleaner.TransformLinks("<a href='url'>text</a>"));

            // Double quotes
            Assert.AreEqual(expected, htmlCleaner.TransformLinks("<a href=\"url\">text</a>"));

            // Uppercase
            Assert.AreEqual(expected, htmlCleaner.TransformLinks("<A href='url'>text</A>"));

            // 2 URls
            Assert.AreEqual("url  url2", htmlCleaner.TransformLinks("<a href=\"url\">b</a><a href=\"url2\">c</a>"));

            // URLs separated from adjacent text
            Assert.AreEqual("ww url  url2 ww", htmlCleaner.TransformLinks("ww<a href=\"url\">b</a><a href=\"url2\">c</a>ww"));
        }
 public void TestRemoveVoidHtmlElements()
 {
     var htmlCleaner = new HtmlCleaner();
     Assert.AreEqual(1, htmlCleaner.RemoveHtml("<img />a").Length);
 }
 public void TestRemoveHtml()
 {
     var htmlCleaner = new HtmlCleaner();
     Assert.AreEqual(1, htmlCleaner.RemoveHtml("<p>a</p>").Length);
 }
 public ContentProvider(IContentReader contentReader, HtmlCleaner htmlCleaner)
 {
     _contentReader = contentReader;
     _htmlCleaner = htmlCleaner;
 }