/// <summary> /// Constructor /// </summary> /// <returns></returns> public GithubScraper(ICorpusContext context) { m_guid = System.Guid.NewGuid(); m_status = "init"; m_progress = 0.0f; m_priority = 0; m_context = context; m_downloadCount = 0; m_downloadLimit = 0; m_timer = new Stopwatch(); m_timeLimit = 0; }
/// <summary> /// Creates corpus content for a project Gutenberg text file /// </summary> /// <param name="Name"></param> /// <param name="Type"></param> /// <param name="ScraperGuid"></param> /// <param name="ScraperType"></param> /// <param name="DownloadDate"></param> /// <param name="DownloadURL"></param> /// <param name="Content"></param> /// <param name="m_context"></param> public static void addCorpusContent(string Name, string Type, Guid ScraperGuid, string ScraperType, DateTime DownloadDate, string DownloadURL, byte[] Content, ICorpusContext m_context, int corpusId) { CorpusContent corpContent = new CorpusContent(); corpContent.CorpusId = corpusId; corpContent.Name = Name; corpContent.Type = Type; corpContent.ScraperGuid = ScraperGuid; corpContent.ScraperType = ScraperType; corpContent.DownloadDate = DownloadDate; corpContent.URL = DownloadURL; corpContent.Content = Content; corpContent.Hash = hashContent(Content); m_context.CorpusContentRepository.Add(corpContent); }
/* Constructors */ public ScraperFactory(ICorpusContext context) { m_context = context; m_scraperTypes = new List <Type>(); /* Fill our array of scraper types */ m_scraperTypes.Add(typeof(DebianScraper)); m_scraperTypes.Add(typeof(TestScraper)); m_scraperTypes.Add(typeof(TextScraper)); m_scraperTypes.Add(typeof(TwitterScraper)); m_scraperTypes.Add(typeof(GithubScraper)); /* TODO: Add scrapers from DLL assemblies */ /* Ensure that each scraper type implements IScraper */ foreach (Type t in m_scraperTypes) { Debug.Assert(t.GetInterfaces().Contains(typeof(IScraper))); } /* TODO: Ensure that each scraper type implements the needed * static methods (with appropriate signatures) */ }
/// <summary> /// creates a corpus content from a tweet /// </summary> /// <param name="Name"></param> /// <param name="Type"></param> /// <param name="ScraperGuid"></param> /// <param name="ScraperType"></param> /// <param name="tweet"></param> /// <param name="m_context"></param> public static void addCorpusContent(string Name, string Type, Guid ScraperGuid, string ScraperType, ITweet tweet, ICorpusContext m_context, int corpusId) { CorpusContent corpContent = new CorpusContent(); corpContent.CorpusId = corpusId; corpContent.Name = Name; corpContent.Type = Type; corpContent.ScraperGuid = ScraperGuid; corpContent.ScraperType = ScraperType; corpContent.Content = Encoding.ASCII.GetBytes(tweet.Text); corpContent.DownloadDate = tweet.CreatedAt; corpContent.URL = tweet.Url; if (tweet.Coordinates != null) //may be null if tweet does not have a location { corpContent.Lat = (float)tweet.Coordinates.Latitude; corpContent.Long = (float)tweet.Coordinates.Longitude; } corpContent.TweetID = tweet.Id; corpContent.AuthorName = tweet.CreatedBy.Name; //corpContent.Hashtags = tweet.Hashtags; corpContent.Language = tweet.Language.GetType().FullName; // corpContent.Source = source; corpContent.Hash = hashContent(Encoding.ASCII.GetBytes(tweet.Text)); m_context.CorpusContentRepository.Add(corpContent); }
public DebianScraper(ICorpusContext context) { m_guid = System.Guid.NewGuid(); m_status = "init"; m_context = context; }
/* Constructors */ public CorpusController(ICorpusContext context) { m_context = context; }