Example #1
0
        public void CrawlerTest()
        {
            CookieContainer cookieContainer = new CookieContainer();
            HttpWebRequest  request         = (HttpWebRequest)WebRequest.Create(@"https://bbs.sjtu.edu.cn/bbslogin?id=guest");

            request.ProtocolVersion = HttpVersion.Version10;
            request.ContentType     = "application/x-www-form-urlencoded";
            request.UserAgent       = @"Mozilla/5.0";
            HttpWebResponse response = (HttpWebResponse)(request.GetResponse());

            cookieContainer.Add(response.Cookies);

            string url   = @"https://bbs.sjtu.edu.cn/bbsdoc?board=PPPerson";
            var    model = new XMLEntityModel(@".//tr[position() > 1]");

            model.AddXMLColumn("ID", @"./td[1]");

            WebCrawlerSourceNode crawler = new WebCrawlerSourceNode(new string[] { url }, model, cookieContainer, @"bbs.sjtu.edu.cn");

            PipelineTask.Create(crawler)
            .AddMonitor(
                (entity) =>
            {
                Console.WriteLine(entity);
            })
            .Start();
        }
Example #2
0
        /// <summary>
        /// Deletes a file
        /// The method takes and returns the same object (with additional data) for easy method chaining.
        /// WARNING: In debug mode nothing happens!
        /// </summary>
        /// <param name="task">The pipeline task witht the input data</param>
        /// <returns>The pipeline task with the input and the output data</returns>
        public PipelineTask DoWork(PipelineTask task)
        {
            if (task == null)
            {
                Logger.Error("Null task received");
                throw new ArgumentNullException("task");
            }

            #if DEBUG
            //Do not delete files, some test may depend on them
            #else
            DeleteFile(task.OriginalFilePath);
            #endif
            task.OriginalFilePath = string.Empty;

            #if DEBUG
            //Do not delete files, some test may depend on them
            #else
            DeleteFile(task.ActualFilePath);
            #endif
            task.ActualFilePath = string.Empty;

            Logger.Info("DeleterPostProcessor.DoWork finshed");

            return(task);
        }
Example #3
0
        /// <summary>
        /// Parses the text and returns an opinion by counting the positive words, the negative words and comparing them
        /// using a very basic algorithm.
        /// The method takes and returns the same object (with additional data) for easy method chaining.
        /// </summary>
        /// <param name="task">The pipeline task witht the input data</param>
        /// <returns>The pipeline task with the input and the output data</returns>
        public PipelineTask DoWork(PipelineTask task)
        {
            if (task == null)
            {
                Logger.Error("Null task received");
                throw new ArgumentNullException("task");
            }

            var score = GetOpinionOfText(task.ExtractedText);

            switch (score)
            {
            case OpinionType.Negative:
                task.AnalyzerResult = "Negative Opinion";
                break;

            case OpinionType.Neutral:
                task.AnalyzerResult = "Neutral Opinion";
                break;

            case OpinionType.Unknown:
                task.AnalyzerResult = "Unknown Opinion";
                break;

            case OpinionType.Positive:
                task.AnalyzerResult = "Positive Opinion";
                break;
            }

            Logger.Info("OpinionAnalyzer.DoWork finshed, result: {0}", task.AnalyzerResult);

            return(task);
        }
Example #4
0
        public void CsvBasic()
        {
            PipelineTask.FromCsvFile(SampleSource)
            .ToTextFile(Output)
            .Start();

            Assert.IsTrue(TestHelper.CompareTwoFile(SampleSource, Output));
        }
Example #5
0
        public void SpiltByT()
        {
            PipelineTask.Create(new SingleLineFileSourceNode(SimpleSourceT))
            .Spilt(Entity.DefaultColumn, separator: "\t")
            .To(new TextFileConsumer(SimpleFileOutput))
            .Start();

            Assert.IsTrue(TestHelper.CompareTwoFile(SimpleSource, SimpleFileOutput));
        }
Example #6
0
        public void FromFileToTemplateFile()
        {
            PipelineTask.Create(new SingleLineFileSourceNode(SimpleSourceT))
            .Spilt(Entity.DefaultColumn, separator: "\t")
            .ToTemplateFile(TemplateFileOutput, "##col1## dddd ##col2##")
            .Start();

            Assert.IsTrue(TestHelper.CompareTwoFile(SimpleSource, SimpleFileOutput));
        }
Example #7
0
        public void AddTemplateColumn()
        {
            PipelineTask.Create(new SingleLineFileSourceNode(SimpleSourceT))
            .Spilt(Entity.DefaultColumn, separator: "\t")
            .AddTemplateColumn("Template", "##col1## ##col2")
            .ToTextFile(TemplateFileOutput)
            .Start();

            Assert.IsTrue(TestHelper.CompareTwoFile(SampleTemplateFileOutput, TemplateFileOutput));
        }
Example #8
0
        public SsisPipelineTask(PipelineTask objETL, SSISEmitterContext context)
            : base(objETL, context)
        {
            _logicalETL = objETL;

            _guid = Guid.NewGuid();
            // TODO: Do this for everything
            _message       = MessageEngine.Create(String.Format(System.Globalization.CultureInfo.InvariantCulture, "__SSIS2008Emitter:SSISDataFlow {0}", _guid.ToString()));
            _componentList = new List <SsisComponent>();
        }
Example #9
0
        public void JsonBasic()
        {
            XMLEntityModel model = new XMLEntityModel(@".//Results");

            model.AddXMLColumn("Name", "./Name");
            model.AddXMLColumn("Desc", "./Desc");

            PipelineTask.FromJsonFile("Course", model)
            .ToTextFile(Output)
            .Start();

            Assert.IsTrue(TestHelper.CompareTwoFile(SampleJsonOutput, Output));
        }
Example #10
0
        public void XmlBasic()
        {
            XMLEntityModel model = new XMLEntityModel(@".//Entity");

            model.AddXMLColumn("col1", "./col1");
            model.AddXMLColumn("col2", "./col2");

            PipelineTask.FromXmlFile(XmlSource, model)
            .ToTextFile(Output)
            .Start();

            Assert.IsTrue(TestHelper.CompareTwoFile(SampleXMLOutput, Output));
        }
Example #11
0
        public void MonitorConsumer()
        {
            int count = 0;

            PipelineTask.Create(new SingleLineFileSourceNode(SimpleSource))
            .AddMonitor((sender, args) =>
            {
                count++;
            })
            .Start();

            Assert.AreEqual(2, count);
        }
Example #12
0
        public void FileSerilization()
        {
            PipelineTask.Create(new SingleLineFileSourceNode(SimpleSource))
            .Spilt(Entity.DefaultColumn)
            .ToFile(SimpleFileOutput)
            .Start();

            PipelineTask.Create(new FileSourceNode(SimpleFileOutput))
            .ToTextFile(SimpleSourceT)
            .Start();

            Assert.IsTrue(TestHelper.CompareTwoFile(SimpleSource, SimpleSourceT));
        }
        /// <summary>
        /// Extracts the text of a txt or pdf file.
        /// The method takes and returns the same object (with additional data) for easy method chaining.
        /// </summary>
        /// <param name="task">The pipeline task witht the input data</param>
        /// <returns>The pipeline task with the input and the output data</returns>
        public PipelineTask DoWork(PipelineTask task)
        {
            if (task == null)
            {
                Logger.Error("Null task received");
                throw new ArgumentNullException("task");
            }

            task.ExtractedText = this.ExtractText(task.ActualFilePath);

            Logger.Info("BaseTextExtractor.DoWork finshed");

            return(task);
        }
        /// <summary>
        /// Unzipps a zip archive that contains exactly one file
        /// The method takes and returns the same object (with additional data) for easy method chaining.
        /// </summary>
        /// <param name="task">The pipeline task witht the input data</param>
        /// <returns>The pipeline task with the input and the output data</returns>
        public PipelineTask DoWork(PipelineTask task)
        {
            if (task == null)
            {
                Logger.Error("Null task received");
                throw new ArgumentNullException("task");
            }

            task.ActualFilePath = UnzipFile(task.OriginalFilePath);

            Logger.Info("UnzipperPreProcessor.DoWork finshed");

            return(task);
        }
Example #15
0
        public void WebSource()
        {
            int    count   = 0;
            string content = null;

            PipelineTask.Create(new WebSourceNode(@"http://www.bing.com"))
            .AddMonitor((sender, args) =>
            {
                count++;
                content = args.CurrentEntity.GetValue <string>(Entity.DefaultColumn);
            })
            .Start();

            Assert.AreEqual(1, count);
            Assert.IsNotNull(content);
        }
Example #16
0
 public void Extend()
 {
     PipelineTask.Create(new SingleLineFileSourceNode(SimpleSource))
     .Spilt(Entity.DefaultColumn)
     .AddMonitor((sender, args) =>
     {
         Assert.AreEqual(2, args.CurrentEntity.Values.Keys.Count);
     })
     .Extend((entity) =>
     {
         entity.SetValue("a", "");
     })
     .AddMonitor((sender, args) =>
     {
         Assert.AreEqual(3, args.CurrentEntity.Values.Keys.Count);
     })
     .Start();
 }
Example #17
0
        public void HtmlBasic()
        {
            XMLEntityModel model = new XMLEntityModel(@"//table[@class='wikitable sortable']/tr[not(@*)]");

            model.AddXMLColumn("GB", "./td[1]");
            model.AddXMLColumn("Province", "./td[3]");

            PipelineTask.FromWeb("http://en.wikipedia.org/wiki/China_provinces")
            .ParseHtml(model)
            .AddMonitor((entity) =>
            {
                Console.WriteLine();
            })
            .ToTextFile(Output, model)
            .Start();

            TestHelper.CompareTwoFile(Province, Output);
        }
Example #18
0
 public void Convert()
 {
     PipelineTask.Create(new SingleLineFileSourceNode(SimpleSource))
     .Spilt(Entity.DefaultColumn)
     .AddMonitor((sender, args) =>
     {
         Assert.AreEqual(2, args.CurrentEntity.Values.Keys.Count);
     })
     .Convert((entity) =>
     {
         return(new Entity());
     })
     .AddMonitor((sender, args) =>
     {
         Assert.AreEqual(0, args.CurrentEntity.Values.Keys.Count);
     })
     .Start();
 }
        static async System.Threading.Tasks.Task Main(string[] args)
        {
            string pid = "";

            if (args[0] == "-pid")
            {
                pid = args[1];
            }
            else
            {
                Console.WriteLine("Please, start program with pipeline id:\nPipelineConsole.exe -pid YOUR_PIPELINE_ID_HERE");
                Environment.Exit(0);
            }

            string         connectionString = ConfigurationManager.ConnectionStrings["MongoDb"].ConnectionString;
            MongoClient    client           = new MongoClient(connectionString);
            IMongoDatabase database         = client.GetDatabase("test");
            var            collection       = database.GetCollection <BsonDocument>("pipelines");
            var            taskCollection   = database.GetCollection <BsonDocument>("tasks");

            int pipelineRunTime = 0;

            var          builder = Builders <BsonDocument> .Filter;
            var          filter  = builder.Eq("_id", ObjectId.Parse(pid));
            BsonDocument bson    = await collection.Find(filter).FirstOrDefaultAsync();

            Pipeline pipeline = BsonSerializer.Deserialize <Pipeline>(bson);

            foreach (var taskId in pipeline.TaskIds)
            {
                var          taskFilterBuilder = Builders <BsonDocument> .Filter;
                var          taskFilter        = taskFilterBuilder.Eq("_id", taskId);
                BsonDocument bsonTask          = await taskCollection.Find(taskFilter).FirstOrDefaultAsync();

                PipelineTask task = BsonSerializer.Deserialize <PipelineTask>(bsonTask);
                pipeline.Tasks.Add(task);
                pipelineRunTime += task.AverageTime;
            }

            Console.WriteLine(pipelineRunTime + " seconds");
            Environment.Exit(0);
        }
Example #20
0
        public void LoadFromSql()
        {
            int count = 0;

            PipelineTask.FromSql("select * from datasetinfo",
                                 new ConnectInfo()
            {
                Db      = "DatasetInfo",
                Server  = ".",
                IsTrust = true
            })
            .AddMonitor((entity) =>
            {
                count++;
                Assert.IsFalse(entity.IsEmpty());
            })
            .ToCsvFile("CSVOutput")
            .Start();

            Assert.IsTrue(count == 71);
        }
Example #21
0
        public void AzureTableTest()
        {
            PipelineTask.FromCsvFile("SimpleAzureSource")
            .AddMonitor((entity) =>
            {
                Console.WriteLine();
            })
            .ToAzureTable(info, "##col1##", "##col1####col2##")
            .Start();

            int count = 0;

            PipelineTask.Create(new AzureTableSourceNode(info))
            .AddMonitor((entity) =>
            {
                count++;
            })
            .Start();

            Assert.AreEqual(5, count);
        }
Example #22
0
        public void Filter()
        {
            int beforeFilter = 0, afterFilter = 0;

            PipelineTask.Create(new SingleLineFileSourceNode(SimpleSource))
            .Spilt(Entity.DefaultColumn)
            .AddMonitor((sender, args) =>
            {
                beforeFilter++;
            })
            .Filter((entity) =>
            {
                return(true);
            })
            .AddMonitor((sender, args) =>
            {
                afterFilter++;
            })
            .Start();

            Assert.AreEqual(1, beforeFilter);
            Assert.AreEqual(0, afterFilter);
        }
Example #23
0
 public PipelineTaskAttribute(PipelineTask coreTask)
 {
     PipelineTask = coreTask;
 }
Example #24
0
 public static PipelineTask ToAzureTable(this PipelineTask pipelineTask, AzureTableInfo azureTableInfo, string partitionKeyTemplate, string rowKeyTemplate, int maxParallelCount = 10)
 {
     return(pipelineTask.To(new AzureTableConsumer(azureTableInfo, partitionKeyTemplate, rowKeyTemplate, maxParallelCount)));
 }
Example #25
0
 protected PipelineTaskEventAttribute(PipelineTask pipelineTask)
 {
     PipelineTask = pipelineTask;
 }
Example #26
0
 public BeforePipelineTaskAttribute(PipelineTask pipelineTask)
     : base(pipelineTask)
 {
 }
Example #27
0
 public AfterPipelineTaskAttribute(PipelineTask pipelineTask)
     : base(pipelineTask)
 {
 }