예제 #1
0
        static void Main(string[] args)
        {
            WebUtils.DefaultTimeout     = 10000;
            WebUtils.NumRetries         = 3;
            WebUtils.WaitBetweenRetries = 5000;
            WebUtils.ReadWriteTimeout   = 30000;
            ServicePointManager.DefaultConnectionLimit = 8;

            DateTime startTime  = DateTime.MinValue;
            Logger   rootLogger = Logger.GetRootLogger();

            rootLogger.LocalLevel      = Logger.Level.Debug;
            rootLogger.LocalOutputType = Logger.OutputType.Console;
            if (Config.LogFileName != null)
            {
                rootLogger.LocalOutputWriter = new StreamWriter(Config.LogFileName, /*append=*/ true);
                rootLogger.LocalOutputType  |= Logger.OutputType.Writer;
            }
            ArrayList <StreamDataProducerPoll>    dataReaders   = new ArrayList <StreamDataProducerPoll>();
            ArrayList <StreamDataConsumer>        dataConsumers = new ArrayList <StreamDataConsumer>();
            Dictionary <IWorkflowComponent, Guid> components    = new Dictionary <IWorkflowComponent, Guid>();
            // init logging
            Logger logger = Logger.GetLogger("Latino.Workflows.Dacq");
            // start HTTP server
            bool exit = false;
            bool httpServerRunning = false;

            if (Config.ClientIp != null)
            {
                new Thread(new ThreadStart(delegate()
                {
                    HttpListener listener          = new HttpListener();
                    listener.AuthenticationSchemes = AuthenticationSchemes.Anonymous;
                    listener.Prefixes.Add(string.Format("http://localhost/{0}/", Config.WebSiteId));
                    listener.Prefixes.Add(string.Format("http://first.ijs.si/{0}/", Config.WebSiteId));
                    listener.Start();
                    logger.Info("Main.HttpServer", "HTTP server started.");
                    httpServerRunning        = true;
                    DateTime prevRequestTime = DateTime.MinValue;
                    while (!exit)
                    {
                        try
                        {
                            HttpListenerContext ctx = null;
                            listener.BeginGetContext(new AsyncCallback(delegate(IAsyncResult ar)
                            {
                                try { ctx = listener.EndGetContext(ar); }
                                catch { }
                            }), /*state=*/ null);
                            while (!exit && ctx == null)
                            {
                                Thread.Sleep(500);
                            }
                            if (!exit)
                            {
                                // process requests one by one
                                ctx.Response.AppendHeader("Content-Type", "application/xml");
                                XmlWriterSettings settings = new XmlWriterSettings();
                                settings.Encoding          = Encoding.UTF8;
                                settings.CheckCharacters   = false;
                                settings.Indent            = true;
                                XmlWriter w = XmlTextWriter.Create(ctx.Response.OutputStream, settings);
                                w.WriteStartElement("DacqResponse");
                                w.WriteElementString("DacqStartTime", startTime.ToString(TIME_FORMAT));
                                if (prevRequestTime == DateTime.MinValue)
                                {
                                    prevRequestTime = startTime;
                                }
                                DateTime thisRequestTime = DateTime.Now;
                                string command           = GetHttpRequestCommand(ctx.Request.Url.ToString());
                                if (command == "components")
                                {
                                    w.WriteElementString("PreviousRequestTime", prevRequestTime.ToString(TIME_FORMAT));
                                    w.WriteElementString("ThisRequestTime", thisRequestTime.ToString(TIME_FORMAT));
                                }
                                w.WriteElementString("Request", ctx.Request.Url.ToString());
                                w.WriteElementString("Command", command);
                                w.WriteStartElement("ResponseBody");
                                if (command == "help")
                                {
                                    WriteSupportedCommands(w);
                                }
                                else if (command == "components")
                                {
                                    WriteComponentInfo(w, components, thisRequestTime, prevRequestTime);
                                    prevRequestTime = thisRequestTime;
                                }
                                else if (command == "sources")
                                {
                                    WriteRssInfo(w, components);
                                }
                                w.WriteEndElement(); // ResponseBody
                                w.WriteEndElement(); // DacqResponse
                                w.Close();
                                ctx.Response.Close();
                            }
                        }
                        catch (Exception e)
                        {
                            logger.Warn("Main.HttpServer", e);
                        }
                    }
                    listener.Stop();
                    logger.Info("Main.HttpServer", "HTTP server stopped.");
                    httpServerRunning = false;
                })).Start();
            }
            Console.CancelKeyPress += delegate(object sender, ConsoleCancelEventArgs e)
            {
                logger.Info("Main", "*** Ctrl-C command received. ***");
                e.Cancel = true;
                exit     = true;
                string componentsStr = "";
                foreach (StreamDataProducerPoll c in dataReaders)
                {
                    if (c.IsRunning)
                    {
                        componentsStr += "\r\n" + c.GetType() + " : " + c.Name;
                    }
                }
                foreach (StreamDataConsumer dataConsumer in dataConsumers)
                {
                    if (dataConsumer.IsRunning)
                    {
                        componentsStr += "\r\n" + dataConsumer.GetType() + " : " + dataConsumer.Load;
                    }
                }
                logger.Info("Main", "Active components:" + componentsStr);
            };
            logger.Info("Main", "Starting Dacq ...");
            // initialize database writers
            PassOnComponent lb = new PassOnComponent(); // data load balancer

            lb.DispatchPolicy = DispatchPolicy.BalanceLoadMax;
            dataConsumers.Add(lb);
            if (Config.DbConnectionString != null)
            {
                if (!Config.SkipBoilerplateHistoryInit)
                {
                    UrlTreeBoilerplateRemoverComponent.InitializeHistory(Config.DbConnectionString);
                    RssFeedComponent.DatabaseConnectionString = Config.DbConnectionString;
                }
            }
            for (int i = 0; i < Config.NumPipes; i++)
            {
                DocumentWriterComponent            dwc = new DocumentWriterComponent(Config.DbConnectionStringDump, /*cmdTimeout=*/ 0, Config.XmlDataDumpRoot, Config.HtmlDataDumpRoot, Config.HtmlDumpViewRoot);
                UrlTreeBoilerplateRemoverComponent bpr = new UrlTreeBoilerplateRemoverComponent();
                DocumentWriterComponent            dw  = new DocumentWriterComponent(Config.DbConnectionString, /*cmdTimeout=*/ 0, Config.XmlDataRoot, Config.HtmlDataRoot, Config.HtmlViewRoot);
                HtmlTokenizerComponent             htc = new HtmlTokenizerComponent();
                SentenceSplitterComponent          ssc = null;
                EnglishTokenizerComponent          tok = null;
                EnglishLemmatizerComponent         lem = null;
                EnglishPosTaggerComponent          pt  = null;
                if (Config.Language == "English")
                {
                    ssc = new SentenceSplitterComponent();
                    tok = new EnglishTokenizerComponent();
                    lem = new EnglishLemmatizerComponent(EnglishLemmatizerComponent.Type.Both);
                    pt  = new EnglishPosTaggerComponent();
                }
                LanguageDetectorComponent ld = new LanguageDetectorComponent();
                DocumentFilterComponent   df = new DocumentFilterComponent();
                df.OnFilterDocument += new DocumentFilterComponent.FilterDocumentHandler(delegate(Document document, Logger dfLogger) {
                    string docId = document.Features.GetFeatureValue("guid");
                    // remove items without link in RSS document
                    if (document.Features.GetFeatureValue("contentType") != "Text")
                    {
                        dfLogger.Info("OnFilterDocument", "Document rejected: contentType not Text (id={0}).", docId);
                        return(false);
                    }
                    // remove items with blacklisted URL
                    if (document.Features.GetFeatureValue("blacklisted") == "True")
                    {
                        dfLogger.Info("OnFilterDocument", "Document rejected: responseUrl blacklisted (id={0}).", docId);
                        return(false);
                    }
                    // remove items with not enough content
                    if (Convert.ToInt32(document.Features.GetFeatureValue("bprContentCharCount")) < 100)
                    {
                        dfLogger.Info("OnFilterDocument", "Document rejected: bprContentCharCount < 100 (id={0}).", docId);
                        return(false);
                    }
                    // remove unsupported languages
                    if (document.Features.GetFeatureValue("detectedCharRange") != "Basic Latin")
                    {
                        dfLogger.Info("OnFilterDocument", "Document rejected: detectedCharRange not Basic Latin (id={0}).", docId);
                        return(false);
                    }
                    if (document.Features.GetFeatureValue("detectedLanguage") != Config.Language)
                    {
                        dfLogger.Info("OnFilterDocument", "Document rejected: detectedLanguage not {1} but {2} (id={0}).", docId, Config.Language, document.Features.GetFeatureValue("detectedLanguage"));
                        return(false);
                    }
                    // remove exact duplicates
                    if (document.Features.GetFeatureValue("unseenContent") == "No")
                    {
                        dfLogger.Info("OnFilterDocument", "Document rejected: no unseen content (id={0}).", docId);
                        return(false);
                    }
                    return(true);
                });
                ld.BlockSelector = "TextBlock/Content"; // due to problems with itar-tass.com
                df.SubscribeDumpConsumer(dwc);

                lb.Subscribe(htc);
                htc.Subscribe(bpr);
                bpr.Subscribe(ld);
                ld.Subscribe(df);

                if (Config.Language == "English")
                {
                    df.Subscribe(ssc);
                    ssc.Subscribe(tok);
                    tok.Subscribe(lem);
                    lem.Subscribe(pt);
                    pt.Subscribe(dw);
                    dataConsumers.AddRange(new StreamDataConsumer[] { dwc, df, ld, htc, ssc, tok, pt, dw, lem, bpr });
                }
                else
                {
                    df.Subscribe(dw);
                    dataConsumers.AddRange(new StreamDataConsumer[] { dwc, df, ld, htc, dw, bpr });
                }
            }
            // initialize stream simulator
            if (Config.OfflineSource != null)
            {
                DocumentStreamReaderComponent dsr = new DocumentStreamReaderComponent(Config.OfflineSource);
                dsr.Name = Config.OfflineSource;
                dataReaders.Add(dsr);
                dsr.Subscribe(lb);
            }
            // initialize RSS feed components
            int j = 0;
            RssFeedComponent rssComp = null;
            Set <string>     sites   = new Set <string>();

            if (Config.DataSourcesFileName != null)
            {
                string[] sources = File.ReadAllLines(Config.DataSourcesFileName);
                foreach (string _url in sources)
                {
                    string url = _url.Trim();
                    if (url != "" && !url.StartsWith("#"))
                    {
                        Match m;
                        if ((m = Regex.Match(url, @"^site\s*:(?<siteId>.*)$", RegexOptions.IgnoreCase)).Success)
                        {
                            string siteId = m.Result("${siteId}").Trim().ToLower();
                            if (sites.Contains(siteId))
                            {
                                throw new Exception(string.Format("Duplicated site identifier ({0}).", siteId));
                            }
                            sites.Add(siteId);
                            rssComp = new RssFeedComponent(siteId);
                            if (Config.Language != "")
                            {
                                rssComp.RssXmlCodePageDetectorLanguage = (Language)Enum.Parse(typeof(Language), Config.Language);
                            }
                            rssComp.MaxDocsPerCorpus   = Config.MaxDocsPerCorpus;
                            rssComp.RandomDelayAtStart = Config.RandomDelayAtStart;
                            rssComp.Name             = siteId;
                            rssComp.TimeBetweenPolls = Config.SleepBetweenPolls;
                            rssComp.IncludeRssXml    = true;
                            rssComp.Initialize();
                            rssComp.IncludeRawData = true;
                            rssComp.Subscribe(lb);
                            dataReaders.Add(rssComp);
                            j++;
                        }
                        else if (rssComp != null)
                        {
                            rssComp.AddSource(url);
                        }
                    }
                }
            }
            foreach (StreamDataProducerPoll c in dataReaders)
            {
                c.Start();
            }
            foreach (IWorkflowComponent obj in dataReaders)
            {
                components.Add(obj, Guid.NewGuid());
            }
            foreach (IWorkflowComponent obj in dataConsumers)
            {
                components.Add(obj, Guid.NewGuid());
            }
            startTime = DateTime.Now;
            while (!exit)
            {
                Thread.Sleep(500);
            }
            // shut down gracefully
            logger.Info("Main", "Please wait while shutting down ...");
            // wait for HTTP server shutdown
            while (httpServerRunning)
            {
                Thread.Sleep(500);
            }
            // stop RSS components
            foreach (StreamDataProducerPoll c in dataReaders)
            {
                c.Stop();
            }
            foreach (StreamDataProducerPoll c in dataReaders)
            {
                while (c.IsRunning)
                {
                    Thread.Sleep(500);
                }
            }
            // wait for all data consumers to finish
            foreach (StreamDataConsumer dataConsumer in dataConsumers)
            {
                if (dataConsumer.IsRunning)
                {
                    while (!dataConsumer.IsSuspended)
                    {
                        Thread.Sleep(500);
                    }
                }
                dataConsumer.Dispose();
            }
            logger.Info("Main", "Dacq successfully stopped.");
        }
예제 #2
0
        static void Main(string[] args)
        {
            WebUtils.DefaultTimeout = 10000;
            WebUtils.NumRetries = 3;
            WebUtils.WaitBetweenRetries = 5000;
            WebUtils.ReadWriteTimeout = 30000;
            ServicePointManager.DefaultConnectionLimit = 8;

            DateTime startTime = DateTime.MinValue;
            Logger rootLogger = Logger.GetRootLogger();
            rootLogger.LocalLevel = Logger.Level.Debug;
            string LOG_FILE_NAME = ConfigurationManager.AppSettings["logFileName"];
            rootLogger.LocalOutputType = Logger.OutputType.Console;
            if (LOG_FILE_NAME != null)
            {
                rootLogger.LocalOutputWriter = new StreamWriter(LOG_FILE_NAME, /*append=*/true);
                rootLogger.LocalOutputType |= Logger.OutputType.Writer;
            }
            string SOURCES_FILE_NAME = ConfigurationManager.AppSettings["dataSourcesFileName"];
            string WEB_SITE_ID = Utils.GetConfigValue("webSiteId", "dacq");
            string DB_CONNECTION_STRING = ConfigurationManager.AppSettings["dbConnectionString"];
            string SQL_DB_CONNECTION_STRING = ConfigurationManager.AppSettings["SqlDbConnectionString"];
            string DB_CONNECTION_STRING_DUMP = ConfigurationManager.AppSettings["dbConnectionStringDump"];
            string CLIENT_IP = ConfigurationManager.AppSettings["clientIp"];
            string XML_DATA_ROOT = ConfigurationManager.AppSettings["xmlDataRoot"];
            string XML_DATA_ROOT_DUMP = ConfigurationManager.AppSettings["xmlDataRootDump"];
            string HTML_DATA_ROOT = ConfigurationManager.AppSettings["htmlDataRoot"];
            string HTML_DATA_ROOT_DUMP = ConfigurationManager.AppSettings["htmlDataRootDump"];
            string XML_DATA_ROOT_NEW = XML_DATA_ROOT == null ? null : (XML_DATA_ROOT.TrimEnd('\\') + "\\" + "New");
            string HTML_DATA_ROOT_NEW = HTML_DATA_ROOT == null ? null : (HTML_DATA_ROOT.TrimEnd('\\') + "\\" + "New");
            string XML_DATA_ROOT_DUMP_NEW = XML_DATA_ROOT_DUMP == null ? null : (XML_DATA_ROOT_DUMP.TrimEnd('\\') + "\\" + "New");
            string HTML_DATA_ROOT_DUMP_NEW = HTML_DATA_ROOT_DUMP == null ? null : (HTML_DATA_ROOT_DUMP.TrimEnd('\\') + "\\" + "New");
            string DB_CONNECTION_STRING_NEW = ConfigurationManager.AppSettings["SqlDbConnectionStringNew"];
            string tmp = ConfigurationManager.AppSettings["enableZeroMQ"];
            bool ENABLE_ZEROMQ = tmp != null && new List<string>(new string[] { "true", "1", "yes", "on" }).Contains(tmp.ToLower());
            const int NUM_WRITERS = 8;
            const int SLEEP_BETWEEN_POLLS = 15 * 60000; // 15 minutes
            ArrayList<StreamDataProducerPoll> dataReaders = new ArrayList<StreamDataProducerPoll>();
            ArrayList<StreamDataConsumer> dataConsumers = new ArrayList<StreamDataConsumer>();
            Dictionary<IWorkflowComponent, Guid> components = new Dictionary<IWorkflowComponent, Guid>();
            // init logging
            Logger logger = Logger.GetLogger("Latino.Workflows.Dacq");
            // start HTTP server
            bool exit = false;
            bool httpServerRunning = false;
            if (CLIENT_IP != null)
            {
                new Thread(new ThreadStart(delegate()
                {
                    HttpListener listener = new HttpListener();
                    listener.AuthenticationSchemes = AuthenticationSchemes.Anonymous;
                    listener.Prefixes.Add(string.Format("http://localhost/{0}/", WEB_SITE_ID));
                    listener.Prefixes.Add(string.Format("http://first.ijs.si/{0}/", WEB_SITE_ID));
                    listener.Start();
                    logger.Info("Main.HttpServer", "HTTP server started.");
                    httpServerRunning = true;
                    DateTime prevRequestTime = DateTime.MinValue;
                    while (!exit)
                    {
                        try
                        {
                            HttpListenerContext ctx = null;
                            listener.BeginGetContext(new AsyncCallback(delegate(IAsyncResult ar)
                            {
                                try { ctx = listener.EndGetContext(ar); }
                                catch { }
                            }), /*state=*/null);
                            while (!exit && ctx == null) { Thread.Sleep(500); }
                            if (!exit)
                            {
                                // process requests one by one
                                ctx.Response.AppendHeader("Content-Type", "application/xml");
                                XmlWriterSettings settings = new XmlWriterSettings();
                                settings.Encoding = Encoding.UTF8;
                                settings.CheckCharacters = false;
                                settings.Indent = true;
                                XmlWriter w = XmlTextWriter.Create(ctx.Response.OutputStream, settings);
                                w.WriteStartElement("DacqResponse");
                                w.WriteElementString("DacqStartTime", startTime.ToString(TIME_FORMAT));
                                if (prevRequestTime == DateTime.MinValue) { prevRequestTime = startTime; }
                                DateTime thisRequestTime = DateTime.Now;
                                string command = GetHttpRequestCommand(ctx.Request.Url.ToString());
                                if (command == "components")
                                {
                                    w.WriteElementString("PreviousRequestTime", prevRequestTime.ToString(TIME_FORMAT));
                                    w.WriteElementString("ThisRequestTime", thisRequestTime.ToString(TIME_FORMAT));
                                }
                                w.WriteElementString("Request", ctx.Request.Url.ToString());
                                w.WriteElementString("Command", command);
                                w.WriteStartElement("ResponseBody");
                                if (command == "help")
                                {
                                    WriteSupportedCommands(w);
                                }
                                else if (command == "components")
                                {
                                    WriteComponentInfo(w, components, thisRequestTime, prevRequestTime);
                                    prevRequestTime = thisRequestTime;
                                }
                                else if (command == "sources")
                                {
                                    WriteRssInfo(w, components);
                                }
                                w.WriteEndElement(); // ResponseBody
                                w.WriteEndElement(); // DacqResponse
                                w.Close();
                                ctx.Response.Close();
                            }
                        }
                        catch (Exception e)
                        {
                            logger.Warn("Main.HttpServer", e);
                        }
                    }
                    listener.Stop();
                    logger.Info("Main.HttpServer", "HTTP server stopped.");
                    httpServerRunning = false;
                })).Start();
            }
            Console.CancelKeyPress += delegate(object sender, ConsoleCancelEventArgs e)
            {
                logger.Info("Main", "*** Ctrl-C command received. ***");
                e.Cancel = true;
                exit = true;
                string componentsStr = "";
                foreach (StreamDataProducerPoll c in dataReaders)
                {
                    if (c.IsRunning) { componentsStr += "\r\n" + c.GetType() + " : " + c.Name; }
                }
                foreach (StreamDataConsumer dataConsumer in dataConsumers)
                {
                    if (dataConsumer.IsRunning) { componentsStr += "\r\n" + dataConsumer.GetType() + " : " + dataConsumer.Load; }
                }
                logger.Info("Main", "Active components:" + componentsStr);
            };
            logger.Info("Main", "Starting Dacq ...");
            // initialize database writers
            PassOnComponent lb = new PassOnComponent(); // data load balancer
            lb.DispatchPolicy = DispatchPolicy.BalanceLoadMax;
            dataConsumers.Add(lb);
            ZeroMqEmitterComponent zmq = null;
            if (ENABLE_ZEROMQ)
            {
                zmq = new ZeroMqEmitterComponent();
                dataConsumers.Add(zmq);
            }
            DatabaseConnection dbConnection = new DatabaseConnection();
            if (DB_CONNECTION_STRING != null)
            {
                dbConnection.ConnectionString = DB_CONNECTION_STRING;
                dbConnection.Connect();
                UrlTreeBoilerplateRemoverComponent.InitializeHistory(dbConnection);
                dbConnection.Disconnect();
                RssFeedComponent.DatabaseConnectionString = SQL_DB_CONNECTION_STRING;
            }
            for (int i = 0; i < NUM_WRITERS; i++)
            {
                DocumentCorpusWriterComponent dcw = new DocumentCorpusWriterComponent(DB_CONNECTION_STRING_DUMP, /*xmlDataRoot=*/null);
                DocumentWriterComponent dwc = new DocumentWriterComponent(/*connectionString=*/null, XML_DATA_ROOT_DUMP_NEW, HTML_DATA_ROOT_DUMP_NEW);
                dcw.IsDumpWriter = true;
                UrlTreeBoilerplateRemoverComponent bpr = new UrlTreeBoilerplateRemoverComponent(DB_CONNECTION_STRING);
                DocumentCorpusWriterComponent cw = new DocumentCorpusWriterComponent(DB_CONNECTION_STRING, XML_DATA_ROOT);
                DocumentWriterComponent dw = new DocumentWriterComponent(DB_CONNECTION_STRING_NEW, XML_DATA_ROOT_NEW, HTML_DATA_ROOT_NEW);
                HtmlTokenizerComponent htc = new HtmlTokenizerComponent();
                SentenceSplitterComponent ssc = new SentenceSplitterComponent();
                EnglishTokenizerComponent tok = new EnglishTokenizerComponent();
                EnglishLemmatizerComponent lem = new EnglishLemmatizerComponent(EnglishLemmatizerComponent.Type.Both);
                EnglishPosTaggerComponent pt = new EnglishPosTaggerComponent();
                LanguageDetectorComponent ld = new LanguageDetectorComponent();
                DocumentFilterComponent df = new DocumentFilterComponent();
                df.OnFilterDocument += new DocumentFilterComponent.FilterDocumentHandler(delegate(Document document, Logger dfLogger) {
                    string docId = document.Features.GetFeatureValue("guid");
                    // remove items without link in RSS document
                    if (document.Features.GetFeatureValue("contentType") != "Text")
                    {
                        dfLogger.Info("OnFilterDocument", "Document rejected: contentType not Text (id={0}).", docId);
                        return false;
                    }
                    // remove items with blacklisted URL
                    if (document.Features.GetFeatureValue("blacklisted") == "True")
                    {
                        dfLogger.Info("OnFilterDocument", "Document rejected: responseUrl blacklisted (id={0}).", docId);
                        return false;
                    }
                    // remove items with not enough content
                    if (Convert.ToInt32(document.Features.GetFeatureValue("bprContentCharCount")) < 100)
                    {
                        dfLogger.Info("OnFilterDocument", "Document rejected: bprContentCharCount < 100 (id={0}).", docId);
                        return false;
                    }
                    // remove non-English items
                    if (document.Features.GetFeatureValue("detectedCharRange") != "Basic Latin")
                    {
                        dfLogger.Info("OnFilterDocument", "Document rejected: detectedCharRange not Basic Latin (id={0}).", docId);
                        return false;
                    }
                    if (document.Features.GetFeatureValue("detectedLanguage") != "English")
                    {
                        dfLogger.Info("OnFilterDocument", "Document rejected: detectedLanguage not English (id={0}).", docId);
                        return false;
                    }
                    // remove exact duplicates
                    if (document.Features.GetFeatureValue("unseenContent") == "No")
                    {
                        dfLogger.Info("OnFilterDocument", "Document rejected: no unseen content (id={0}).", docId);
                        return false;
                    }
                    return true;
                });
                ld.BlockSelector = "TextBlock/Content"; // due to problems with itar-tass.com
                df.SubscribeDumpConsumer(dcw);
                df.SubscribeDumpConsumer(dwc);

                lb.Subscribe(htc);
                htc.Subscribe(bpr);
                bpr.Subscribe(ld);
                ld.Subscribe(df);

                df.Subscribe(ssc);
                ssc.Subscribe(tok);
                tok.Subscribe(lem);
                lem.Subscribe(pt);

                pt.Subscribe(cw);
                pt.Subscribe(dw);
                if (ENABLE_ZEROMQ) { pt.Subscribe(zmq); }

                dataConsumers.AddRange(new StreamDataConsumer[] { dcw, dwc, df, ld, htc, ssc, tok, pt, cw, dw, lem, bpr });
            }
            // initialize stream simulator
            string offlineSource = ConfigurationManager.AppSettings["offlineSource"];
            if (offlineSource != null)
            {
                DocumentStreamReaderComponent dsr = new DocumentStreamReaderComponent(offlineSource);
                dsr.Name = offlineSource;
                dataReaders.Add(dsr);
                dsr.Subscribe(lb);
            }
            // initialize RSS feed components
            int j = 0;
            RssFeedComponent rssComp = null;
            Set<string> sites = new Set<string>();
            if (SOURCES_FILE_NAME != null)
            {
                string[] sources = File.ReadAllLines(SOURCES_FILE_NAME);
                foreach (string _url in sources)
                {
                    string url = _url.Trim();
                    if (url != "" && !url.StartsWith("#"))
                    {
                        Match m;
                        if ((m = Regex.Match(url, @"^site\s*:(?<siteId>.*)$", RegexOptions.IgnoreCase)).Success)
                        {
                            string siteId = m.Result("${siteId}").Trim().ToLower();
                            if (sites.Contains(siteId)) { throw new Exception(string.Format("Duplicated site identifier ({0}).", siteId)); }
                            sites.Add(siteId);
                            rssComp = new RssFeedComponent(siteId);
                            rssComp.MaxDocsPerCorpus = Convert.ToInt32(Utils.GetConfigValue("MaxDocsPerCorpus", "50"));
                            rssComp.RandomDelayAtStart = new ArrayList<string>("yes,on,true,1,y".Split(','))
                                .Contains(Utils.GetConfigValue("RandomDelayAtStart", "true").ToLower());
                            rssComp.Name = siteId;
                            rssComp.TimeBetweenPolls = SLEEP_BETWEEN_POLLS;
                            rssComp.IncludeRssXml = true;
                            if (SQL_DB_CONNECTION_STRING != null)
                            {
                                rssComp.Initialize(SQL_DB_CONNECTION_STRING);
                            }
                            rssComp.IncludeRawData = true;
                            rssComp.Subscribe(lb);
                            dataReaders.Add(rssComp);
                            j++;
                        }
                        else if (rssComp != null)
                        {
                            rssComp.AddSource(url);
                        }
                    }
                }
            }
            foreach (StreamDataProducerPoll c in dataReaders)
            {
                c.Start();
            }
            foreach (IWorkflowComponent obj in dataReaders) { components.Add(obj, Guid.NewGuid()); }
            foreach (IWorkflowComponent obj in dataConsumers) { components.Add(obj, Guid.NewGuid()); }
            startTime = DateTime.Now;
            while (!exit) { Thread.Sleep(500); }
            // shut down gracefully
            logger.Info("Main", "Please wait while shutting down ...");
            // wait for HTTP server shutdown
            while (httpServerRunning) { Thread.Sleep(500); }
            // stop RSS components
            foreach (StreamDataProducerPoll c in dataReaders)
            {
                c.Stop();
            }
            foreach (StreamDataProducerPoll c in dataReaders)
            {
                while (c.IsRunning) { Thread.Sleep(500); }
            }
            // wait for all data consumers to finish
            foreach (StreamDataConsumer dataConsumer in dataConsumers)
            {
                if (dataConsumer.IsRunning) { while (!dataConsumer.IsSuspended) { Thread.Sleep(500); } }
                dataConsumer.Dispose();
            }
            logger.Info("Main", "Dacq successfully stopped.");
        }