Пример #1
0
        private static PercolatorSettings JobOpenings()
        {
            var pipelineSettings = new PercolatorSettings
            {
                Pipelines = new[]
                {
                    new PipelineSettings
                    {
                        Inputs = new []
                        {
                            "http://rss.indeed.com/rss?q=machine+learning&l=San+Fransisco%2C+CA&sort=date",
                            "http://rss.indeed.com/rss?q=data+mining&l=San+Fransisco%2C+CA&sort=date",
                        },

                        Output      = "job_feed.xml",
                        Title       = "Jobs",
                        Description = "Aggregated & filtered feed of ML job openings"
                    }
                },
                Filters = new[]
                {
                    // First, exlude all
                    new FilterSettings
                    {
                        Action      = FilterAction.Exclude,
                        Field       = FeedField.Any,
                        PatternType = PatternType.Glob,
                        Patterns    = new [] { "*" }
                    },

                    // Only include posts with the following keywords
                    new FilterSettings
                    {
                        Action      = FilterAction.Include,
                        Field       = FeedField.Any,
                        PatternType = PatternType.String,
                        Patterns    = new []
                        {
                            ".net", "sql", "c#", "java", "python", "data mining", "machine learning"
                        }
                    },

                    // Remove irrelevant posts & spam
                    new FilterSettings
                    {
                        Action      = FilterAction.Exclude,
                        Field       = FeedField.Title,
                        PatternType = PatternType.String,
                        Patterns    = new []
                        {
                            "associate", "clerk", "webmaster", "specialist", "sales", "insurance", "junior", "entry", "jr.", "intern"
                        }
                    },
                }
            };

            return(pipelineSettings);
        }
Пример #2
0
        static void Main(string[] args)
        {
            //string settingsFile = "";
            //github.SaveToFile(settingsFile);
            //PercolatorSettings github = PercolatorSettings.LoadFromFile(settingsFile);

            PercolatorSettings bikes  = CraigslistBikes();
            PercolatorSettings github = GitHub();
            PercolatorSettings jobs   = JobOpenings();

            Percolate(bikes);
            Percolate(github);
            Percolate(jobs);
        }
Пример #3
0
        private static PercolatorSettings GitHub()
        {
            var pipelineSettings = new PercolatorSettings
            {
                Pipelines = new[]
                {
                    new PipelineSettings
                    {
                        Inputs = new []
                        {
                            "https://github.com/StackExchange/dapper-dot-net/commits.atom",
                            "https://github.com/JamesNK/Newtonsoft.Json/commits/master.atom",
                            "https://github.com/StackExchange/StackExchange.Redis/commits/master.atom",
                            "https://github.com/olviko/RssPercolator/commits/master.atom"
                        },
                        Output      = "github_feed.xml",
                        Title       = "Github - Project releases",
                        Description = "Aggregated feed of multiple projects"
                    }
                },
                Filters = new[]
                {
                    // First, exlude all
                    new FilterSettings
                    {
                        Action      = FilterAction.Exclude,
                        Field       = FeedField.Any,
                        PatternType = PatternType.Glob,
                        Patterns    = new [] { "*" }
                    },

                    // Include activity related to the specific project
                    new FilterSettings
                    {
                        Action      = FilterAction.Include,
                        Field       = FeedField.Any,
                        PatternType = PatternType.String,
                        Patterns    = new []
                        {
                            "Release", "Version"
                        }
                    }
                }
            };

            return(pipelineSettings);
        }
Пример #4
0
        private static PercolatorSettings CraigslistBikes()
        {
            var pipelineSettings = new PercolatorSettings
            {
                Pipelines = new[]
                {
                    new PipelineSettings
                    {
                        Inputs = new []
                        {
                            "http://miami.craigslist.org/search/pbc/bik?format=rss"
                        },

                        Output      = "bikes4sale.xml",
                        Title       = "Bikes for sale",
                        Description = "Aggregated & filtered feed of bikes for sale on craigslist.org"
                    }
                },
                Filters = new[]
                {
                    // First, exlude all
                    new FilterSettings
                    {
                        Action      = FilterAction.Exclude,
                        Field       = FeedField.Any,
                        PatternType = PatternType.Glob,
                        Patterns    = new [] { "*" }
                    },

                    // Only include posts with the following keywords
                    new FilterSettings
                    {
                        Action      = FilterAction.Include,
                        Field       = FeedField.Title,
                        PatternType = PatternType.String,
                        Patterns    = new []
                        {
                            "cannondale", "trek", "giant", "specialized"
                        }
                    }
                }
            };

            return(pipelineSettings);
        }
Пример #5
0
        private static void Percolate(PercolatorSettings percolatorSettings)
        {
            IFilter[] filters = percolatorSettings.Filters != null ?
                percolatorSettings.Filters.Select(filterSettings => Filter.Create(filterSettings)).ToArray() :
                new IFilter[0];

            IPipelineEvaluator pipeline = PipelineEvaluator.Create();

            foreach (var pipelineSettings in percolatorSettings.Pipelines)
            {
                if (!Path.IsPathRooted(pipelineSettings.Output))
                {
                    pipelineSettings.Output = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, pipelineSettings.Output);
                }

                pipeline.Execute(filters, pipelineSettings);
            }
        }
Пример #6
0
        private static void Percolate(PercolatorSettings percolatorSettings)
        {
            IFilter[] filters = percolatorSettings.Filters != null?
                                percolatorSettings.Filters.Select(filterSettings => Filter.Create(filterSettings)).ToArray() :
                                    new IFilter[0];

            IPipelineEvaluator pipeline = PipelineEvaluator.Create();

            foreach (var pipelineSettings in percolatorSettings.Pipelines)
            {
                if (!Path.IsPathRooted(pipelineSettings.Output))
                {
                    pipelineSettings.Output = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, pipelineSettings.Output);
                }

                pipeline.Execute(filters, pipelineSettings);
            }
        }
Пример #7
0
        private static PercolatorSettings JobOpenings()
        {
            var pipelineSettings = new PercolatorSettings
            {
                Pipelines = new[]
                { 
                    new PipelineSettings
                    {
                        Inputs = new []
                        { 
                            "http://rss.indeed.com/rss?q=machine+learning&l=San+Fransisco%2C+CA&sort=date",
                            "http://rss.indeed.com/rss?q=data+mining&l=San+Fransisco%2C+CA&sort=date",
                        },

                        Output = "job_feed.xml",
                        Title = "Jobs",
                        Description = "Aggregated & filtered feed of ML job openings"
                    }
                },
                Filters = new[]
                {
                    // First, exlude all
                    new FilterSettings
                    {
                        Action = FilterAction.Exclude,
                        Field = FeedField.Any,
                        PatternType = PatternType.Glob,
                        Patterns = new [] { "*" }
                    },
                    
                    // Only include posts with the following keywords
                    new FilterSettings
                    {
                        Action = FilterAction.Include,
                        Field = FeedField.Any,
                        PatternType = PatternType.String,
                        Patterns = new [] 
                        { 
                            ".net","sql","c#","java","python","data mining","machine learning" 
                        }
                    },

                    // Remove irrelevant posts & spam
                    new FilterSettings
                    {
                        Action = FilterAction.Exclude,
                        Field = FeedField.Title,
                        PatternType = PatternType.String,
                        Patterns = new [] 
                        { 
                            "associate","clerk","webmaster","specialist","sales","insurance","junior","entry","jr.","intern"
                        }
                    },

                }
            };

            return pipelineSettings;
        }
Пример #8
0
        private static PercolatorSettings CraigslistBikes()
        {
            var pipelineSettings = new PercolatorSettings
            {
                Pipelines = new[]
                { 
                    new PipelineSettings
                    {
                        Inputs = new []
                        { 
                            "http://miami.craigslist.org/search/pbc/bik?format=rss"
                        },

                        Output = "bikes4sale.xml",
                        Title = "Bikes for sale",
                        Description = "Aggregated & filtered feed of bikes for sale on craigslist.org"
                    }
                },
                Filters = new[]
                {
                    // First, exlude all
                    new FilterSettings
                    {
                        Action = FilterAction.Exclude,
                        Field = FeedField.Any,
                        PatternType = PatternType.Glob,
                        Patterns = new [] { "*" }
                    },
                    
                    // Only include posts with the following keywords
                    new FilterSettings
                    {
                        Action = FilterAction.Include,
                        Field = FeedField.Title,
                        PatternType = PatternType.String,
                        Patterns = new [] 
                        {
                            "cannondale","trek","giant","specialized"
                        }
                    }
                }
            };

            return pipelineSettings;
        }
Пример #9
0
        private static PercolatorSettings GitHub()
        {
            var pipelineSettings = new PercolatorSettings
            {
                Pipelines = new[]
                { 
                    new PipelineSettings
                    {
                        Inputs = new []
                        { 
                            "https://github.com/StackExchange/dapper-dot-net/commits.atom",
                            "https://github.com/JamesNK/Newtonsoft.Json/commits/master.atom",
                            "https://github.com/StackExchange/StackExchange.Redis/commits/master.atom",
                            "https://github.com/olviko/RssPercolator/commits/master.atom"
                        },
                        Output = "github_feed.xml",
                        Title = "Github - Project releases",
                        Description = "Aggregated feed of multiple projects"
                    }
                },
                Filters = new[]
                {
                    // First, exlude all
                    new FilterSettings
                    {
                        Action = FilterAction.Exclude,
                        Field = FeedField.Any,
                        PatternType = PatternType.Glob,
                        Patterns = new [] { "*" }
                    },
                
                    // Include activity related to the specific project
                    new FilterSettings
                    {
                        Action = FilterAction.Include,
                        Field = FeedField.Any,
                        PatternType = PatternType.String,
                        Patterns = new [] 
                        { 
                            "Release", "Version"
                        }
                    }
                }
            };

            return pipelineSettings;
        }