Exemplo n.º 1
0
        public async Task HtmlToText_TableElement_RowSpansWithMultiline()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <table>
                    <tbody>
                        <tr>
                            <td rowspan=""3"">Cell 1</td>
                            <td>Cell 2</td>
                            <td rowspan=""2"">Cell 3</td>
                        </tr>
                        <tr>
                            <td rowspan=""2"">
                                <p>Cell 4 paragraph 1</p>
                                <ul>
                                    <li>Cell 4 list item 1</li>
                                    <li>Cell 4 list item 2</li>
                                <ul>
                            </td>
                        </tr>
                        <tr>
                            <td>Cell 5</td>
                        </tr>
                    </tbody>
                </table>");

            Snapshot.Match(text);
        }
Exemplo n.º 2
0
        public async Task HtmlToText_TableElement_ColumnAndRowHeaders()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <table>
                    <thead>
                        <tr>
                            <th>Header 1</th>
                            <th>Header 2</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <th>Header 3</th>
                            <td>Cell 1</td>
                        </tr>
                        <tr>
                            <th>Header 4</th>
                            <td>Cell 2</td>
                        </tr>
                    </tbody>
                </table>");

            Snapshot.Match(text);
        }
Exemplo n.º 3
0
        public async Task HtmlToText_Blockquote()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                "<blockquote>Test quote</blockquote>");

            Assert.Equal("Test quote", text);
        }
        private static async Task DoWrite(
            TextWriter file,
            Release release,
            IList <DataGuidanceSubjectViewModel> subjects)
        {
            // Add header information including publication/release title
            await file.WriteLineAsync(release.Publication.Title);

            await file.WriteLineAsync(
                TimePeriodLabelFormatter.Format(
                    release.Year,
                    release.TimePeriodCoverage,
                    TimePeriodLabelFormat.FullLabel
                    )
                );

            if (!release.DataGuidance.IsNullOrWhitespace())
            {
                await file.WriteLineAsync();

                // Add the release's guidance content
                var guidance = await HtmlToTextUtils.HtmlToText(release.DataGuidance);

                await file.WriteAsync(guidance);

                await file.WriteLineAsync();
            }

            await WriteDataFiles(file, subjects);
        }
Exemplo n.º 5
0
        public async Task HtmlToText_TableElement_EmptyCells()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <table>
                    <tbody>
                        <tr>
                            <td>Cell 1</td>
                            <td></td>
                            <td>Cell 2</td>
                        </tr>
                        <tr>
                            <td></td>
                            <td>Cell 4</td>
                            <td>Cell 5</td>
                        </tr>
                        <tr>
                            <td>Cell 6</td>
                            <td>Cell 7</td>
                            <td></td>
                        </tr>
                    </tbody>
                </table>");

            Snapshot.Match(text);
        }
Exemplo n.º 6
0
        public async Task HtmlToText_SingleElementInDiv()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                "<div><p>Test paragraph</p></div>");

            Assert.Equal("Test paragraph", text);
        }
Exemplo n.º 7
0
        public async Task HtmlToText_TableElement_RowHeaderWithRowSpans()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <table>
                    <thead>
                        <tr>
                            <th rowspan=""2"">Header 1</th>
                            <th>Header 2</th>
                            <th>Header 3</th>
                        </tr>
                        <tr>
                            <th>Header 4</th>
                            <th>Header 5</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td>Cell 1</td>
                            <td>Cell 2</td>
                            <td>Cell 3</td>
                        </tr>
                        <tr>
                            <td>Cell 4</td>
                            <td>Cell 5</td>
                            <td>Cell 6</td>
                        </tr>
                    </tbody>
                </table>");

            Snapshot.Match(text);
        }
Exemplo n.º 8
0
        public async Task HtmlToText_TableElement_MultilineCell()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <table>
                    <tbody>
                        <tr>
                            <td>Cell 1</td>
                            <td>Cell 2</td>
                            <td>Cell 3</td>
                        </tr>
                        <tr>
                            <td>Cell 4</td>
                            <td>
                                <p>Cell 5 paragraph 1</p>
                                <ul>
                                    <li>Cell 5 list item 1</li>
                                    <li>Cell 5 list item 2</li>
                                <ul>
                            </td>
                            <td>Cell 6</td>
                        </tr>
                        <tr>
                            <td>Cell 7</td>
                            <td>Cell 8</td>
                            <td>Cell 9</td>
                        </tr>
                    </tbody>
                </table>");

            Snapshot.Match(text);
        }
Exemplo n.º 9
0
        public async Task HtmlToText_InlineElements()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                "<p>Test paragraph with <strong>bold text</strong> and <em>italic text</em></p>");

            Assert.Equal("Test paragraph with bold text and italic text", text);
        }
Exemplo n.º 10
0
        public async Task HtmlToText_UnorderedList_OverTenItemsWithMultiline()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <ul>
                    <li>List item 1</li>
                    <li>List item 2</li>
                    <li>List item 3</li>
                    <li>List item 4</li>
                    <li>List item 5</li>
                    <li>List item 6</li>
                    <li>List item 7</li>
                    <li>List item 8</li>
                    <li>
                        <p>List item 9</p>
                        <p>Over multiple lines</p>
                    </li>
                    <li>List item 10</li>
                    <li>
                        <p>List item 11</p>
                        <p>Over multiple lines</p>
                    </li>
                </ul>");

            Snapshot.Match(text);
        }
Exemplo n.º 11
0
        public async Task HtmlToText_TableElement_RowAndColSpansInCenterRight()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <table>
                    <tbody>
                        <tr>
                            <td>Cell 1</td>
                            <td>Cell 2</td>
                            <td>Cell 3</td>
                        </tr>
                        <tr>
                            <td>Cell 4</td>
                            <td colspan=""2"" rowspan=""2"">Cell 5</td>
                        </tr>
                        <tr>
                            <td>Cell 6</td>
                        </tr>
                        <tr>
                            <td>Cell 6</td>
                            <td>Cell 7</td>
                            <td>Cell 8</td>
                        </tr>
                    </tbody>
                </table>");

            Snapshot.Match(text);
        }
Exemplo n.º 12
0
        public async Task HtmlToText_TestHtml1()
        {
            var html = await File.ReadAllTextAsync(Path.Combine(_dir, "Resources/test-html-1.html"));

            var text = await HtmlToTextUtils.HtmlToText(html);

            Snapshot.Match(text);
        }
Exemplo n.º 13
0
        public async Task HtmlToText_Blockquote_HasLineAfter()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <blockquote>Test quote</blockquote>
                <p>Paragraph after</p>");

            Snapshot.Match(text);
        }
Exemplo n.º 14
0
        public async Task HtmlToText_HorizontalLineElement()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <p>Test paragraph 1</p>
                <hr/>
                <p>Test paragraph 2</p>
                ");

            Snapshot.Match(text);
        }
Exemplo n.º 15
0
        public async Task HtmlToText_Blockquote_WithParagraphs()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <blockquote>
                    <p>Test paragraph quote 1</p>
                    <p>Test paragraph quote 2</p>
                </blockquote>");

            Snapshot.Match(text);
        }
Exemplo n.º 16
0
        public async Task HtmlToText_OrderedList()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <ol>
                    <li>List item 1</li>
                    <li>List item 2</li>
                </ol>");

            Snapshot.Match(text);
        }
Exemplo n.º 17
0
        public async Task HtmlToText_InlineElementsWithMultilineFormatting()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"<p>
                    Test paragraph with 
                    <strong>bold text</strong> 
                    and <em>italic text</em> and
                    <small>small text</small>
                  </p>");

            Assert.Equal("Test paragraph with bold text and italic text and small text", text);
        }
Exemplo n.º 18
0
        public async Task HtmlToText_LineBreakElements()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <p>Test paragraph 1
                    <br/>with line break</p>
                <br/>
                <p>Test paragraph 2</p>
                ");

            Snapshot.Match(text);
        }
Exemplo n.º 19
0
        public async Task HtmlToText_OrderedList_HasLineAfter()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <ol>
                    <li>List item 1</li>
                    <li>List item 2</li>
                </ol>
                <p>Paragraph after</p>");

            Snapshot.Match(text);
        }
Exemplo n.º 20
0
        public async Task HtmlToText_InlineElementsWithPunctuation()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"<p>
                    Test paragraph with 
                    <strong>bold text</strong>, 
                    <em>italic text</em>! And
                    <small>small text</small>.
                    <strong>Next sentence?</strong>
                    Over here.
                  </p>");

            Assert.Equal("Test paragraph with bold text, italic text! And small text. Next sentence? Over here.", text);
        }
Exemplo n.º 21
0
        public async Task HtmlToText_MultipleElements()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <h1>Test heading 1</h1>
                <h2>Test heading 2</h2>
                <h3>Test heading 3</h3>
                <h4>Test heading 4</h4>
                <h5>Test heading 5</h5>
                <h6>Test heading 6</h6>
                <p>Test paragraph 1</p>
                <span>Test span 1</span>");

            Snapshot.Match(text);
        }
Exemplo n.º 22
0
        public async Task HtmlToText_DescriptionList_HasLineAfter()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <dl>
                    <dt>Term 1</dt>
                    <dd>Description 1</dd>
                    <dt>Term 2</dt>
                    <dd>Description 2</dd>
                    <dt>Term 3</dt>
                    <dd>Description 3</dd>
                </dl>
                <p>Paragraph after</p>");

            Snapshot.Match(text);
        }
Exemplo n.º 23
0
        public async Task HtmlToText_UnorderedList_WithNestedText()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <ul>
                    <li>List item 1</li>
                    <li>
                        List item 2
                        <p>List item 2 paragraph 1</p>
                        <p>List item 2 paragraph 2</p>
                    </li>
                    <li>List item 3</li>
                </ul>");

            Snapshot.Match(text);
        }
Exemplo n.º 24
0
        public async Task HtmlToText_Blockquote_WithCaption()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <figure>
                    <blockquote>
                        <p>Test paragraph quote 1</p>
                        <p>Test paragraph quote 2</p>
                    </blockquote>
                    <figcaption>
                        <cite>Test citation</cite>
                    </figcaption>
                </figure>");

            Snapshot.Match(text);
        }
Exemplo n.º 25
0
        public async Task HtmlToText_UnorderedList_WithNestedList()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <ul>
                    <li>List item 1</li>
                    <li>
                        Nested list
                        <ul>
                            <li>Nested list item 1</li>
                            <li>Nested list item 2</li>
                        </ul>
                    </li>
                </ul>");

            Snapshot.Match(text);
        }
Exemplo n.º 26
0
        public async Task HtmlToText_TableElement_PadsToLargestCell()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <table>
                    <tbody>
                        <tr>
                            <td>Cell 1</td>
                            <td>Cell 2</td>
                        </tr>
                        <tr>
                            <td>Cell 3 with more text</td>
                            <td>Cell 4</td>
                        </tr>
                    </tbody>
                </table>");

            Snapshot.Match(text);
        }
Exemplo n.º 27
0
        public async Task HtmlToText_TableElement_ColumnHeaderWithColSpans()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <table>
                    <tbody>
                        <tr>
                            <th colspan=""2"">Header 1</th>
                            <td>Cell 1</td>
                        </tr>
                        <tr>
                            <th>Header 2</th>
                            <th>Header 3</th>
                            <td>Cell 2</td>
                        </tr>
                    </tbody>
                </table>");

            Snapshot.Match(text);
        }
Exemplo n.º 28
0
        public async Task HtmlToText_DescriptionList_WithMultilineItem()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <dl>
                    <dt>Term 1</dt>
                    <dd>Description 1</dd>
                    <dt>Term 2</dt>
                    <dd>
                        Description 2
                        <p>Description 2 paragraph 1</p>
                        <ul>
                            <li>Description 2 list item 1</li>
                            <li>Description 2 list item 2</li>
                        </ul>
                    </dd>
                    <dt>Term 3</dt>
                    <dd>Description 3</dd>
                </dl>");

            Snapshot.Match(text);
        }
Exemplo n.º 29
0
        public async Task HtmlToText_TableElement_RowSpans()
        {
            var text = await HtmlToTextUtils.HtmlToText(
                @"
                <table>
                    <tbody>
                        <tr>
                            <td rowspan=""3"">Cell 1</td>
                            <td>Cell 2</td>
                            <td rowspan=""2"">Cell 3</td>
                        </tr>
                        <tr>
                            <td rowspan=""2"">Cell 4</td>
                        </tr>
                        <tr>
                            <td>Cell 5</td>
                        </tr>
                    </tbody>
                </table>");

            Snapshot.Match(text);
        }
        private static async Task WriteDataFiles(TextWriter file, IList <DataGuidanceSubjectViewModel> subjects)
        {
            if (subjects.Count == 0)
            {
                return;
            }

            // Add 'Data files' section
            await file.WriteLineAsync();

            await file.WriteLineAsync("Data files");

            await file.WriteLineAsync();

            await subjects
            .ToAsyncEnumerable()
            .ForEachAwaitAsync(
                async(subject, index) =>
            {
                await file.WriteLineAsync(subject.Name);
                await file.WriteLineAsync();

                await file.WriteLineAsync("Filename: " + subject.Filename);

                if (subject.GeographicLevels.Any())
                {
                    await file.WriteLineAsync("Geographic levels: " +
                                              string.Join("; ", subject.GeographicLevels));
                }

                var timePeriodsLabel = subject.TimePeriods.ToLabel();

                if (!timePeriodsLabel.IsNullOrWhitespace())
                {
                    await file.WriteLineAsync($"Time period: {timePeriodsLabel}");
                }

                if (!subject.Content.IsNullOrWhitespace())
                {
                    var content = await HtmlToTextUtils.HtmlToText(subject.Content);
                    await file.WriteLineAsync($"Content summary: {content}");
                }

                var variables = subject.Variables
                                .Where(
                    variable =>
                    !variable.Label.IsNullOrWhitespace() ||
                    !variable.Value.IsNullOrWhitespace()
                    )
                                .ToList();

                if (variables.Any())
                {
                    await file.WriteLineAsync();
                    await file.WriteLineAsync(
                        "Variable names and descriptions for this file are provided below:");
                    await file.WriteLineAsync();

                    var padding = variables.Aggregate(
                        (Value: 0, Label: 0),
                        (acc, variable) =>
                    {
                        if (variable.Value.Length > acc.Value)
                        {
                            acc.Value = variable.Value.Length;
                        }

                        if (variable.Label.Length > acc.Label)
                        {
                            acc.Label = variable.Label.Length;
                        }

                        return(acc);
                    }
                        );

                    // Adds a table header for variable names/descriptions
                    await file.WriteLineAsync(
                        "Variable name".PadRight(padding.Value) + VariableSeparator + "Variable description"
                        );
                    await file.WriteLineAsync(
                        string.Empty.PadRight(
                            padding.Value,
                            '-'
                            ) + VariableSeparator + string.Empty.PadRight(padding.Label, '-')
                        );

                    // Add table body for variable names/descriptions
                    await variables
                    .ToAsyncEnumerable()
                    .ForEachAwaitAsync(
                        async variable =>
                    {
                        await file.WriteLineAsync(
                            variable.Value.PadRight(padding.Value) + VariableSeparator + variable.Label
                            );
                    }
                        );
                }

                var footnotes = subject.Footnotes
                                .Where(footnote => !footnote.Label.IsNullOrWhitespace())
                                .ToList();

                if (footnotes.Any())
                {
                    await file.WriteLineAsync();
                    await file.WriteLineAsync("Footnotes:");
                    await file.WriteLineAsync();

                    await footnotes
                    .ToAsyncEnumerable()
                    .ForEachAwaitAsync(
                        async(footnote, footnoteIndex) =>
                    {
                        var listItemStart = $"{footnoteIndex + 1}. ";

                        await file.WriteAsync(listItemStart);

                        var indent = string.Empty.PadRight(listItemStart.Length);

                        await footnote.Label
                        .ToLines()
                        .ToAsyncEnumerable()
                        .ForEachAwaitAsync(
                            async(line, lineIndex) =>
                        {
                            if (lineIndex == 0)
                            {
                                await file.WriteLineAsync(line);
                                return;
                            }

                            await file.WriteLineAsync(indent + line);
                        }
                            );
                    }
                        );
                }

                // Add some extra lines between data files
                if (index < subjects.Count - 1)
                {
                    await file.WriteLineAsync();
                    await file.WriteLineAsync();
                }
            }
                );
        }