private TrawlerResult ParseTextsInPage(string pageData)
        {
            var result = new TrawlerResult();

            int index = pageData.IndexOf(StartTag);
            while (index >= 0)
            {
                int startTagEnd = pageData.IndexOf(EndOfStartTag, index) + 2;
                int end = pageData.IndexOf(EndTag, startTagEnd);
                string message = pageData.Substring(startTagEnd, end - startTagEnd);
                int idStart = index + StartTag.Length;
                int idEnd = pageData.IndexOf(EndOfId, idStart);
                string textId = pageData.Substring(idStart, idEnd - idStart);
                ((List<Sin>)result.Sins).Add(new Sin() { Content = message, SourceSinId = textId, Source = SourceName });
                index = pageData.IndexOf(StartTag, end);
            }

            int nextPageLinkStart =
                pageData.IndexOf(NextLinkStartText) + NextLinkStartText.Length;
            if (nextPageLinkStart >= NextLinkStartText.Length)
            {
                int nextPageLinkEnd = pageData.IndexOf(NextLinkEndtext, nextPageLinkStart);
                string nextPageUrl = pageData.Substring(nextPageLinkStart, nextPageLinkEnd - nextPageLinkStart);
                result.HasNextPage = true;
                result.NextPageUrl = BaseDomain+nextPageUrl;
            }
            else
            {
                result.HasNextPage = false;
            }
            return result;
        }
        public TrawlerResult GetSins()
        {
            log.DebugFormat("Fetch:{0}", InitialPageUrl);
            string pageData = _pageDownloader.GetPage(InitialPageUrl);

            var trawlerResult = new TrawlerResult();
            var allTexts = new List<Sin>();
            var result = ParseTextsInPage(pageData);
            allTexts.AddRange(result.Sins);
            while(result.HasNextPage)
            {
                log.DebugFormat("Fetch:{0}", result.NextPageUrl);
                pageData =
                _pageDownloader.GetPage(result.NextPageUrl);

                result = ParseTextsInPage(pageData);
                allTexts.AddRange(result.Sins);
            }

            trawlerResult.Sins = allTexts;

            return trawlerResult;
        }
Beispiel #3
0
        private TrawlerResult ParseTextsInPage(string pageData)
        {
            var result = new TrawlerResult();

            int index = pageData.IndexOf(StartTag);

            while (index >= 0)
            {
                int    startTagEnd = pageData.IndexOf(EndOfStartTag, index) + 2;
                int    end         = pageData.IndexOf(EndTag, startTagEnd);
                string message     = pageData.Substring(startTagEnd, end - startTagEnd);
                int    idStart     = index + StartTag.Length;
                int    idEnd       = pageData.IndexOf(EndOfId, idStart);
                string textId      = pageData.Substring(idStart, idEnd - idStart);
                ((List <Sin>)result.Sins).Add(new Sin()
                {
                    Content = message, SourceSinId = textId, Source = SourceName
                });
                index = pageData.IndexOf(StartTag, end);
            }

            int nextPageLinkStart =
                pageData.IndexOf(NextLinkStartText) + NextLinkStartText.Length;

            if (nextPageLinkStart >= NextLinkStartText.Length)
            {
                int    nextPageLinkEnd = pageData.IndexOf(NextLinkEndtext, nextPageLinkStart);
                string nextPageUrl     = pageData.Substring(nextPageLinkStart, nextPageLinkEnd - nextPageLinkStart);
                result.HasNextPage = true;
                result.NextPageUrl = BaseDomain + nextPageUrl;
            }
            else
            {
                result.HasNextPage = false;
            }
            return(result);
        }
Beispiel #4
0
        public TrawlerResult GetSins()
        {
            log.DebugFormat("Fetch:{0}", InitialPageUrl);
            string pageData = _pageDownloader.GetPage(InitialPageUrl);

            var trawlerResult = new TrawlerResult();
            var allTexts      = new List <Sin>();
            var result        = ParseTextsInPage(pageData);

            allTexts.AddRange(result.Sins);
            while (result.HasNextPage)
            {
                log.DebugFormat("Fetch:{0}", result.NextPageUrl);
                pageData =
                    _pageDownloader.GetPage(result.NextPageUrl);

                result = ParseTextsInPage(pageData);
                allTexts.AddRange(result.Sins);
            }

            trawlerResult.Sins = allTexts;

            return(trawlerResult);
        }
Beispiel #5
0
 private void StoreSins(TrawlerResult sins)
 {
     _indulgeMeService.SaveSins(sins.Sins);
 }