Example #1
0
 /// <summary>
 /// Constructor
 /// </summary>
 /// <returns></returns>
 public GithubScraper(ICorpusContext context)
 {
     m_guid          = System.Guid.NewGuid();
     m_status        = "init";
     m_progress      = 0.0f;
     m_priority      = 0;
     m_context       = context;
     m_downloadCount = 0;
     m_downloadLimit = 0;
     m_timer         = new Stopwatch();
     m_timeLimit     = 0;
 }
Example #2
0
        /// <summary>
        /// Creates corpus content for a project Gutenberg text file
        /// </summary>
        /// <param name="Name"></param>
        /// <param name="Type"></param>
        /// <param name="ScraperGuid"></param>
        /// <param name="ScraperType"></param>
        /// <param name="DownloadDate"></param>
        /// <param name="DownloadURL"></param>
        /// <param name="Content"></param>
        /// <param name="m_context"></param>
        public static void addCorpusContent(string Name, string Type,
                                            Guid ScraperGuid, string ScraperType, DateTime DownloadDate, string DownloadURL,
                                            byte[] Content, ICorpusContext m_context, int corpusId)
        {
            CorpusContent corpContent = new CorpusContent();

            corpContent.CorpusId     = corpusId;
            corpContent.Name         = Name;
            corpContent.Type         = Type;
            corpContent.ScraperGuid  = ScraperGuid;
            corpContent.ScraperType  = ScraperType;
            corpContent.DownloadDate = DownloadDate;
            corpContent.URL          = DownloadURL;
            corpContent.Content      = Content;
            corpContent.Hash         = hashContent(Content);
            m_context.CorpusContentRepository.Add(corpContent);
        }
Example #3
0
        /* Constructors */
        public ScraperFactory(ICorpusContext context)
        {
            m_context      = context;
            m_scraperTypes = new List <Type>();

            /* Fill our array of scraper types */
            m_scraperTypes.Add(typeof(DebianScraper));
            m_scraperTypes.Add(typeof(TestScraper));
            m_scraperTypes.Add(typeof(TextScraper));
            m_scraperTypes.Add(typeof(TwitterScraper));
            m_scraperTypes.Add(typeof(GithubScraper));

            /* TODO: Add scrapers from DLL assemblies */

            /* Ensure that each scraper type implements IScraper */
            foreach (Type t in m_scraperTypes)
            {
                Debug.Assert(t.GetInterfaces().Contains(typeof(IScraper)));
            }

            /* TODO: Ensure that each scraper type implements the needed
             * static methods (with appropriate signatures) */
        }
Example #4
0
        /// <summary>
        /// creates a corpus content from a tweet
        /// </summary>
        /// <param name="Name"></param>
        /// <param name="Type"></param>
        /// <param name="ScraperGuid"></param>
        /// <param name="ScraperType"></param>
        /// <param name="tweet"></param>
        /// <param name="m_context"></param>
        public static void addCorpusContent(string Name, string Type,
                                            Guid ScraperGuid, string ScraperType, ITweet tweet, ICorpusContext m_context, int corpusId)
        {
            CorpusContent corpContent = new CorpusContent();

            corpContent.CorpusId     = corpusId;
            corpContent.Name         = Name;
            corpContent.Type         = Type;
            corpContent.ScraperGuid  = ScraperGuid;
            corpContent.ScraperType  = ScraperType;
            corpContent.Content      = Encoding.ASCII.GetBytes(tweet.Text);
            corpContent.DownloadDate = tweet.CreatedAt;
            corpContent.URL          = tweet.Url;
            if (tweet.Coordinates != null) //may be null if tweet does not have a location
            {
                corpContent.Lat  = (float)tweet.Coordinates.Latitude;
                corpContent.Long = (float)tweet.Coordinates.Longitude;
            }
            corpContent.TweetID    = tweet.Id;
            corpContent.AuthorName = tweet.CreatedBy.Name;
            //corpContent.Hashtags = tweet.Hashtags;
            corpContent.Language = tweet.Language.GetType().FullName;
            // corpContent.Source = source;

            corpContent.Hash = hashContent(Encoding.ASCII.GetBytes(tweet.Text));
            m_context.CorpusContentRepository.Add(corpContent);
        }
Example #5
0
 public DebianScraper(ICorpusContext context)
 {
     m_guid    = System.Guid.NewGuid();
     m_status  = "init";
     m_context = context;
 }
Example #6
0
 /* Constructors */
 public CorpusController(ICorpusContext context)
 {
     m_context = context;
 }