private TrawlerResult ParseTextsInPage(string pageData) { var result = new TrawlerResult(); int index = pageData.IndexOf(StartTag); while (index >= 0) { int startTagEnd = pageData.IndexOf(EndOfStartTag, index) + 2; int end = pageData.IndexOf(EndTag, startTagEnd); string message = pageData.Substring(startTagEnd, end - startTagEnd); int idStart = index + StartTag.Length; int idEnd = pageData.IndexOf(EndOfId, idStart); string textId = pageData.Substring(idStart, idEnd - idStart); ((List<Sin>)result.Sins).Add(new Sin() { Content = message, SourceSinId = textId, Source = SourceName }); index = pageData.IndexOf(StartTag, end); } int nextPageLinkStart = pageData.IndexOf(NextLinkStartText) + NextLinkStartText.Length; if (nextPageLinkStart >= NextLinkStartText.Length) { int nextPageLinkEnd = pageData.IndexOf(NextLinkEndtext, nextPageLinkStart); string nextPageUrl = pageData.Substring(nextPageLinkStart, nextPageLinkEnd - nextPageLinkStart); result.HasNextPage = true; result.NextPageUrl = BaseDomain+nextPageUrl; } else { result.HasNextPage = false; } return result; }
public TrawlerResult GetSins() { log.DebugFormat("Fetch:{0}", InitialPageUrl); string pageData = _pageDownloader.GetPage(InitialPageUrl); var trawlerResult = new TrawlerResult(); var allTexts = new List<Sin>(); var result = ParseTextsInPage(pageData); allTexts.AddRange(result.Sins); while(result.HasNextPage) { log.DebugFormat("Fetch:{0}", result.NextPageUrl); pageData = _pageDownloader.GetPage(result.NextPageUrl); result = ParseTextsInPage(pageData); allTexts.AddRange(result.Sins); } trawlerResult.Sins = allTexts; return trawlerResult; }
private TrawlerResult ParseTextsInPage(string pageData) { var result = new TrawlerResult(); int index = pageData.IndexOf(StartTag); while (index >= 0) { int startTagEnd = pageData.IndexOf(EndOfStartTag, index) + 2; int end = pageData.IndexOf(EndTag, startTagEnd); string message = pageData.Substring(startTagEnd, end - startTagEnd); int idStart = index + StartTag.Length; int idEnd = pageData.IndexOf(EndOfId, idStart); string textId = pageData.Substring(idStart, idEnd - idStart); ((List <Sin>)result.Sins).Add(new Sin() { Content = message, SourceSinId = textId, Source = SourceName }); index = pageData.IndexOf(StartTag, end); } int nextPageLinkStart = pageData.IndexOf(NextLinkStartText) + NextLinkStartText.Length; if (nextPageLinkStart >= NextLinkStartText.Length) { int nextPageLinkEnd = pageData.IndexOf(NextLinkEndtext, nextPageLinkStart); string nextPageUrl = pageData.Substring(nextPageLinkStart, nextPageLinkEnd - nextPageLinkStart); result.HasNextPage = true; result.NextPageUrl = BaseDomain + nextPageUrl; } else { result.HasNextPage = false; } return(result); }
public TrawlerResult GetSins() { log.DebugFormat("Fetch:{0}", InitialPageUrl); string pageData = _pageDownloader.GetPage(InitialPageUrl); var trawlerResult = new TrawlerResult(); var allTexts = new List <Sin>(); var result = ParseTextsInPage(pageData); allTexts.AddRange(result.Sins); while (result.HasNextPage) { log.DebugFormat("Fetch:{0}", result.NextPageUrl); pageData = _pageDownloader.GetPage(result.NextPageUrl); result = ParseTextsInPage(pageData); allTexts.AddRange(result.Sins); } trawlerResult.Sins = allTexts; return(trawlerResult); }
private void StoreSins(TrawlerResult sins) { _indulgeMeService.SaveSins(sins.Sins); }