protected void PostLinkInfo(LinkAndInfo linkAndInfo, Action <string> post) { string domainAnnotation = null; string[] hostNamePieces = linkAndInfo.Link.Host.ToLowerInvariant().Split('.'); for (int i = 0; i < hostNamePieces.Length - 1; ++i) { string domainPiece = hostNamePieces.Skip(i).StringJoin("."); if (Config.DomainAnnotations.TryGetValue(domainPiece, out domainAnnotation)) { break; } } string domainAnnotationString = (domainAnnotation == null) ? "" : (" " + domainAnnotation); string linkString = linkAndInfo.Link.AbsoluteUri; string info = linkAndInfo.Info; if (linkAndInfo.ErrorLevel == FetchErrorLevel.Success && Config.FakeResponses.ContainsKey(linkString)) { info = Config.FakeResponses[linkString]; } string redirectedString = (linkAndInfo.OriginalLink != null) ? $"{ShortenLink(linkAndInfo.OriginalLink.AbsoluteUri)} -> " : ""; post($"{redirectedString}{ShortenLink(linkString)} {(linkAndInfo.IsError ? ":!:" : "::")} {ShortenInfo(info)}{domainAnnotationString}"); }
protected void HandleOutgoingChannelMessage(object sender, OutgoingMessageEventArgs args) { // find all the links var links = FindLinks(args.OutgoingMessage); // store the new "last link" unless it's already cached if (links.Count > 0) { var lastLink = links[links.Count - 1]; if (LastLinkAndInfo == null || LastLinkAndInfo.Link != lastLink) { LastLinkAndInfo = LinkAndInfo.CreateUnfetched(lastLink); } } }
protected void HandleChannelMessage(object sender, IChannelMessageEventArgs args, MessageFlags flags) { if (flags.HasFlag(MessageFlags.UserBanned)) { return; } string body = args.Message; // find all the links IList <Uri> links = FindLinks(body); // store the new "last link" if (links.Count > 0) { LastLinkAndInfo = LinkAndInfo.CreateUnfetched(links[links.Count - 1]); } // do something with the links LinksAction(args, flags, links); }
protected void PostLinkInfoToChannel(LinkAndInfo linkAndInfo, string channel) { PostLinkInfo(linkAndInfo, message => ConnectionManager.SendChannelMessage(channel, message)); }
public virtual LinkAndInfo RealObtainLinkInfo([NotNull] Uri link, [CanBeNull] Uri originalLink = null, int redirectCount = 0) { // hyperrecursion? if (redirectCount > Config.MaxRedirects) { return(new LinkAndInfo(link, "(too many redirections)", FetchErrorLevel.TransientError, originalLink)); } var linkBuilder = new UriBuilder(link); linkBuilder.Fragment = ""; // check URL blacklist IPAddress[] addresses; try { linkBuilder.Host = IDNMapping.GetAscii(link.Host); addresses = Dns.GetHostAddressesAsync(linkBuilder.Host).SyncWait(); } catch (SocketException se) { Logger.LogWarning("socket exception when resolving {Host}: {Exception}", linkBuilder.Host, se); return(new LinkAndInfo(link, "(cannot resolve)", FetchErrorLevel.TransientError, originalLink)); } if (addresses.Length == 0) { Logger.LogWarning("no addresses found when resolving {Host}", linkBuilder.Host); return(new LinkAndInfo(link, "(cannot resolve)", FetchErrorLevel.TransientError, originalLink)); } if (addresses.Any(IPAddressBlacklist.IsIPAddressBlacklisted)) { return(new LinkAndInfo(link, "(I refuse to access this IP address)", FetchErrorLevel.LastingError, originalLink)); } var httpClientHandler = new HttpClientHandler { AllowAutoRedirect = false }; using (httpClientHandler) using (var httpClient = new HttpClient(httpClientHandler)) using (var request = new HttpRequestMessage(HttpMethod.Get, linkBuilder.Uri)) using (var respStore = new MemoryStream()) { var contentType = new MediaTypeHeaderValue("application/octet-stream"); httpClient.Timeout = TimeSpan.FromSeconds(Config.TimeoutSeconds); request.Headers.UserAgent.TryParseAdd(Config.FakeUserAgent); request.Headers.AcceptLanguage.TryParseAdd(Config.AcceptLanguage); using (var resp = httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead).SyncWait()) { try { // redirect? Uri location = resp.Headers.Location; if (location != null) { // go there instead Logger.LogDebug( "{AbsoluteURI} (originally {OriginalAbsoluteURI}) redirects to {Location}", link.AbsoluteUri, originalLink?.AbsoluteUri ?? link.AbsoluteUri, location ); return(RealObtainLinkInfo(new Uri(link, location), originalLink ?? link, redirectCount + 1)); } // success? if (!resp.IsSuccessStatusCode) { throw new HttpRequestException("unsuccessful"); } // find the content-type contentType = resp.Content.Headers.ContentType ?? contentType; // start timing var readTimeout = TimeSpan.FromSeconds(Config.TimeoutSeconds); var timer = new Stopwatch(); timer.Start(); // copy var buf = new byte[DownloadBufferSize]; Stream responseStream = resp.Content.ReadAsStreamAsync().SyncWait(); if (responseStream.CanTimeout) { responseStream.ReadTimeout = (int)readTimeout.TotalMilliseconds; } long totalBytesRead = 0; for (;;) { int bytesRead = responseStream.Read(buf, 0, buf.Length); if (bytesRead == 0) { break; } totalBytesRead += bytesRead; if (timer.Elapsed > readTimeout) { return(new LinkAndInfo(link, "(reading timed out)", FetchErrorLevel.TransientError, originalLink)); } if (totalBytesRead > Config.MaxDownloadSizeBytes) { return(new LinkAndInfo(link, "(file too large)", FetchErrorLevel.LastingError, originalLink)); } respStore.Write(buf, 0, bytesRead); } } catch (HttpRequestException we) { if (resp != null) { return(new LinkAndInfo(link, $"(HTTP {resp.StatusCode})", FetchErrorLevel.TransientError, originalLink)); } Logger.LogWarning("HTTP exception thrown: {Exception}", we); return(new LinkAndInfo(link, "(HTTP error)", FetchErrorLevel.TransientError, originalLink)); } } var linkToResolve = new LinkToResolve(link, originalLink, respStore.ToArray(), contentType); foreach (ILinkResolverPlugin plugin in Plugins) { LinkAndInfo ret = plugin.ResolveLink(linkToResolve); if (ret != null) { return(ret); } } // fallback switch (contentType.MediaType) { case "application/octet-stream": return(new LinkAndInfo(link, "(can't figure out the content type, sorry)", FetchErrorLevel.LastingError, originalLink)); case "text/html": case "application/xhtml+xml": return(new LinkAndInfo(link, "HTML", FetchErrorLevel.Success, originalLink)); case "image/png": return(new LinkAndInfo(link, "PNG image", FetchErrorLevel.Success, originalLink)); case "image/jpeg": return(new LinkAndInfo(link, "JPEG image", FetchErrorLevel.Success, originalLink)); case "image/gif": return(new LinkAndInfo(link, "GIF image", FetchErrorLevel.Success, originalLink)); case "application/json": return(new LinkAndInfo(link, "JSON", FetchErrorLevel.Success, originalLink)); case "text/xml": case "application/xml": return(new LinkAndInfo(link, "XML", FetchErrorLevel.Success, originalLink)); default: return(new LinkAndInfo(link, $"file of type {contentType.MediaType}", FetchErrorLevel.Success, originalLink)); } } }