Example #1
0
        static void ProcessFileList(IEnumerable <string> inputFiles, string outputFilePath)
        {
            PostParser _postParser;

            Post[]   posts     = new Post[] { };
            string[] htmlFiles = inputFiles.Select(GetHtmlFromFile).ToArray();


            _postParser = new PostParser(new HtmlPostContentParser(new HtmlDocument()));
            _postParser.SetPosts(htmlFiles);
            posts = _postParser.ToPosts();

            using (TextWriter writer = new StreamWriter(outputFilePath))
            {
                CsvWriter csv = new CsvWriter(writer);
                csv.WriteHeader <Post>();
                csv.NextRecord();

                foreach (Post post in posts)
                {
                    csv.WriteRecord(post);
                    csv.NextRecord();
                }
            }
        }
Example #2
0
        public void Given_Header_That_Contains_Two_Properties_With_Values_Should_Map_To_Dictionary_Correctly()
        {
            var fileData = File.ReadAllText("SettingParsers/TestFiles/series-sample-test-3.md");
            var result   = PostParser.ParseSettings(fileData);

            Assert.Equal("post", result["layout"]);
            Assert.Equal("some title", result["title"]);
        }
Example #3
0
        public void Given_Header_With_Empty_Settings_Should_Return_Empty_Dictionary()
        {
            var result = PostParser.ParseSettings(@"---

---");

            Assert.Empty(result);
        }
Example #4
0
        public void Given_Header_Which_Contains_Blank_Lines_Should_Only_Parse_Out_Valid_Lines_Correctly()
        {
            var fileData = File.ReadAllText("SettingParsers/TestFiles/series-sample-test-4.md");
            var result   = PostParser.ParseSettings(fileData);

            Assert.Equal("post", result["layout"]);
            Assert.Equal("some title", result["title"]);
            Assert.Equal(2, result.Count);
        }
Example #5
0
        public void Given_A_File_With_No_Header_Should_Return_Raw_Post_And_Empty_Header()
        {
            var          fileData = File.ReadAllText("SettingParsers/TestFiles/series-sample-test-2.md");
            const string expected = @"Lorem ipsum dolor sit amet, consectetur adipiscing elit.

Donec porttitor non velit nec feugiat.";

            var result = PostParser.ParseDataFromFile(fileData);

            Assert.Equal(string.Empty, result.Item1);
            Assert.Equal(expected, result.Item2);
        }
Example #6
0
        public void Given_File_Should_Return_Tuple_With_Item2_Containing_Post()
        {
            var          fileData = File.ReadAllText("SettingParsers/TestFiles/series-sample-test-1.md");
            const string expected = @"

Lorem ipsum dolor sit amet, consectetur adipiscing elit.

Donec porttitor non velit nec feugiat.";

            var result = PostParser.ParseDataFromFile(fileData);

            Assert.Equal(expected, result.Item2);
        }
Example #7
0
        public void Given_Header_Which_Contains_Series_Should_Return_Series_Key_With_Series_Object()
        {
            var fileData = File.ReadAllText("SettingParsers/TestFiles/series-sample-test-5.md");
            var result   = PostParser.ParseSettings(fileData);

            Assert.True(result.ContainsKey("series"));

            var series = (Series)result["series"];

            Assert.NotEmpty(series.Parts);
            Assert.Equal("123", series.Name);
            Assert.Equal(2, series.Current);
        }
Example #8
0
        public void Given_File_Should_Return_Tuple_With_Item1_Containing_Header()
        {
            var          fileData = File.ReadAllText("SettingParsers/TestFiles/series-sample-test-1.md");
            const string expected = @"---
layout: post
series:
    name: 123
    current: 1
    part: test part 1
    part: test part 2
    part: test part 3
title: some title
---";

            var result = PostParser.ParseDataFromFile(fileData);

            Assert.Equal(expected, result.Item1);
        }
Example #9
0
        public static IParser BuildParser(ISetting setting)
        {
            IParser parser;

            parser = new DefaultParser(setting.BlockBegin, setting.BlockContinue, setting.BlockEnd, setting.Escape);

            if (setting.Optimize)
            {
                parser = new PostParser(parser, new IOptimizer[]
                {
                    ConstantInvokeOptimizer.Instance,
                    ConstantMapOptimizer.Instance,
                    IfOptimizer.Instance,
                    ReturnOptimizer.Instance
                });
            }

            return(parser);
        }
Example #10
0
 public PostPage()
 {
     this.InitializeComponent();
     parser = new PostParser();
 }
Example #11
0
        public async Task ParseAsync()
        {
            try
            {
                RaiseReport("START");
                var pages = await ParsePagesCountAsync().ConfigureAwait(false);   // Site peges count

                RaiseReport($"Pages: { pages }");
                var lastUrl = _storage.GetLastPostUrl();    // Load last parsed post
                RaiseReport($"Last: { lastUrl ?? "New session" }");
                var isEnd = false;
                for (int i = _start; i <= pages; i++)
                {
                    try
                    {
                        if (i == 1)
                        {
                            continue;           // Skip first page
                        }
                        var pageUrl = string.Format(PagePattern, i);

                        RaisePage(new Uri(pageUrl));
                        var postLinkTags = await GetPostUrlsFromPageAsync(pageUrl).ConfigureAwait(false);    // Parse post url's from page

                        var stopWatch = Stopwatch.StartNew();
                        foreach (var postUrl in postLinkTags)
                        {
                            try
                            {
                                RaisePage(new Uri(postUrl));
                                if (postUrl == lastUrl)     // Save only new posts
                                {
                                    isEnd = true;
                                    break;
                                }
                                else if (_start != 0 && _storage.IsExists(postUrl))     // Continue loading from page
                                {
                                    continue;
                                }

                                stopWatch.Restart();
                                var html = await LoadPageAsync(postUrl).ConfigureAwait(false);    // Load post html

                                RaiseReport($"Page loaded: [{ stopWatch.Elapsed.TotalMilliseconds }]");
                                if (string.IsNullOrEmpty(html))
                                {
                                    RaiseError($"Can't load page: { postUrl }");
                                    continue;
                                }
                                stopWatch.Restart();

                                // Parse elements and save to storage

                                using var postParser = new PostParser(html);
                                var postDto = await postParser.GetPostDtoAsync().ConfigureAwait(false);

                                postDto.Comments = await postParser.GetPostCommentsAsync().ConfigureAwait(false);

                                postDto.Files = _saveFiles ? await postParser.GetPostFilesAsync().ConfigureAwait(false) : new List <string>();

                                RaiseReport($"Post parsed: [{ stopWatch.Elapsed.TotalMilliseconds }] ms");
                                stopWatch.Restart();
                                await _storage.SavePostAsync(postUrl, postDto).ConfigureAwait(false);

                                RaiseReport($"Post saved: [{ stopWatch.Elapsed.TotalMilliseconds }] ms");
                            }
                            catch (Exception ex)
                            {
                                RaiseError(ex.Message);
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        RaiseError(ex.Message);
                    }
                    if (isEnd)
                    {
                        break;
                    }
                }
                RaiseReport("DONE!");
            }
            catch (Exception ex)
            {
                RaiseError(ex.Message);
            }
        }
Example #12
0
        public void Given_Empty_RawSettings_Should_Return_Empty_Dictionary()
        {
            var result = PostParser.ParseSettings("");

            Assert.Empty(result);
        }