示例#1
0
        //The Content Type supported by the parser

        #endregion

        #region Constructor and Singleton Instance members

        /// <summary>
        /// The constructor is private so that only the class itself can create an instance.
        /// </summary>
        private TextParser()
        {
            //Initialize the synchronization mechanism
            mutex = new Mutex();
            //Initialize the Regular Expressions
            hrefRegex = new Regex(@"(http|https)://[\w]+(\.[\w]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?", RegexOptions.CultureInvariant | RegexOptions.Multiline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
            //use "(http|ftp|https)://[\w]+(\.[\w]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?" to enable ftp urls
            sessionIDRegex = new Regex(@"([0-9a-fA-F]{40,64})|([\{|\(]?[0-9a-fA-F]{8}[-]?([0-9a-fA-F]{4}[-]?){3}[0-9a-fA-F]{12}[\)|\}]?)$", RegexOptions.CultureInvariant | RegexOptions.Multiline | RegexOptions.IgnoreCase | RegexOptions.Compiled);       //@"^([0-9a-f]{32})|(\{?[0-9a-f]{8}-([0-9a-f]{4}-){3}-[0-9a-f]{12}\}?)$"
            spacesRegex    = new Regex(@"\s+", RegexOptions.CultureInvariant | RegexOptions.Multiline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
            //Initialize the filters
            robotsFilter = RobotsFilter.Instance();
            domainFilter = DomainFilter.Instance();
            //Get a reference to the global variables and application settings
            globals = Globals.Instance();
        }