/// <summary> /// Extract the host and port to use from either the HTTP request /// line, or the HTTP headers; update the request line to remove /// the hostname and port /// </summary> /// <param name="hrl"> /// The HTTP request line; the URI will be updated to remove the /// host name and port number /// </param> /// <param name="hh_rq">The HTTP request headers</param> /// <param name="port"> /// When this method returns, contains the request port /// </param> /// <remarks> /// May modify the URI of <c>hrl</c> /// </remarks> protected string ParseDestinationHostAndPort(HttpRequestLine hrl, HttpHeaders hh_rq, out int port) { string host = null; bool bIsConnect = hrl.Method.Equals("CONNECT"); port = bIsConnect ? 443 : 80; bool bIsHTTP1_0 = hrl.ProtocolVersion.Equals("1.0"); if (hrl.URI.Equals("*")) { Debug.Assert(!bIsHTTP1_0); goto hostname_from_header; } // At this point, hrl.URI follows one of these forms: // - scheme:(//authority)/abs_path // - authority // - /abs_path int prefix = 0; // current parse position if (hrl.URI.Contains("://")) { if (hrl.URI.StartsWith("http://")) { prefix = 7; // length of "http://" } else { if (hrl.URI.StartsWith("https://")) { prefix = 8; // length of "https://" port = 443; } else { throw new HttpProtocolBroken("Expected scheme missing or unsupported"); } } } // Starting from offset prefix, we now have either: // 1) authority (only for CONNECT) // 2) authority/abs_path // 3) /abs_path int slash = hrl.URI.IndexOf('/', prefix); string authority = null; if (slash == -1) { // case 1 authority = hrl.URI; Debug.Assert(bIsConnect); } else { if (slash > 0) { // Strict inequality // case 2 authority = hrl.URI.Substring(prefix, slash - prefix); } } if (authority != null) { // authority is either: // a) hostname // b) hostname: // c) hostname:port int c = authority.IndexOf(':'); if (c < 0) { // case a) host = authority; } else if (c == authority.Length - 1) { // case b) host = authority.TrimEnd('/'); } else { // case c) host = authority.Substring(0, c); port = int.Parse(authority.Substring(c + 1)); } prefix += authority.Length; } if (host != null) { #if false // XXX Not sure whether this can happen (without doing ad // replacement) or if we want to prevent it if (hh_rq.Host != null) { // Does hh_rq.Host and host match? (disregarding // the potential ":port" prefix of hh_rq.Host) int c2 = hh_rq.Host.IndexOf(':'); string rq_host = c2 < 0 ? hh_rq.Host : hh_rq.Host.Substring(0, c2); if (!rq_host.Equals(host)) { // Host discrepancy: fix the 'Host' header hh_rq.Host = host; } } #endif // Remove the host from the request URI, unless the "server" // is actually a proxy, in which case the URI should remain // unchanged. (RFC 2616, section 5.1.2) if (RelayHttpProxyHost == null) { hrl.URI = hrl.URI.Substring(prefix); } return(host); } hostname_from_header: host = hh_rq.Host; if (host == null) { throw new HttpProtocolBroken("No host specified"); } int cp = host.IndexOf(':'); if (cp < 0) { /* nothing */ } else if (cp == host.Length - 1) { host = host.TrimEnd('/'); } else { host = host.Substring(0, cp); port = int.Parse(host.Substring(cp + 1)); } return(host); }
/// <summary> /// Pipeline step: read the HTTP request from the client, schedule /// the next step to be <c>SendRequest</c>, and call /// <c>OnReceiveRequest</c> /// </summary> protected virtual void ReadRequest() { try { RequestLine = new HttpRequestLine(BrowserSocket); } catch (IoBroken) { // The request line is the first line of a HTTP request. // If none comes in a timely fashion, then we eventually // get a IoBroken exception, which is common enough // not to be rethrown. AbortRequest(); return; } catch (SocketException) { // Ditto AbortRequest(); return; } RequestHeaders = new HttpHeaders(BrowserSocket); if (RequestLine.Method.Equals("CONNECT")) { BrowserSocket.Send501(); AbortRequest(); return; } // We call OnReceiveRequest now because Connect() will // modify the request URI. State.NextStep = SendRequest; var item = new TransferItem(); item.Headers = RequestHeaders; item.HttpRequestLine = RequestLine; item.BrowserSocket = BrowserSocket; item.RemoteSocket = RemoteSocket; item.State = State; OnReceiveRequest(item); RequestLine.URI = item.HttpRequestLine.URL; // Now we parse the request to: // 1) find out where we should connect // 2) find out whether there is a message body in the request // 3) find out whether the BrowserSocket connection should be kept-alive if (State.NextStep != null) { // Step 1) if (RelayHttpProxyHost == null) { int NewDestinationPort; string NewDestinationHost = ParseDestinationHostAndPort(RequestLine, RequestHeaders, out NewDestinationPort); Console.WriteLine("Connect to {0}", NewDestinationHost); Connect(NewDestinationHost, NewDestinationPort); } else { Connect(RelayHttpProxyHost, RelayHttpProxyPort); } // Step 2) // Find out whether the request has a message body // (RFC 2616, section 4.3); if it has, get the message length State.RequestHasMessage = false; State.RequestMessageLength = 0; State.RequestMessageChunked = false; if (RequestHeaders.TransferEncoding != null) { State.RequestHasMessage = true; State.RequestMessageChunked = Array.IndexOf(RequestHeaders.TransferEncoding, "chunked") >= 0; Debug.Assert(State.RequestMessageChunked); } else if (RequestHeaders.ContentLength != null) { State.RequestMessageLength = (uint)RequestHeaders.ContentLength; // Note: HTTP 1.0 wants "Content-Length: 0" when there // is no entity body. (RFC 1945, section 7.2) if (State.RequestMessageLength > 0) { State.RequestHasMessage = true; } } } // Step 3) State.UseDefaultPersistBrowserSocket = true; if (RequestHeaders.ProxyConnection != null) { // Note: This is not part of the HTTP 1.1 standard. See // http://homepage.ntlworld.com./jonathan.deboynepollard/FGA/web-proxy-connection-header.html foreach (string i in RequestHeaders.ProxyConnection) { if (i.Equals("close")) { State.PersistConnectionBrowserSocket = false; State.UseDefaultPersistBrowserSocket = false; break; } if (i.Equals("keep-alive")) { State.PersistConnectionBrowserSocket = true; State.UseDefaultPersistBrowserSocket = false; break; } } if (RelayHttpProxyHost == null) { RequestHeaders.ProxyConnection = null; } } // Note: we do not remove fields mentioned in the // 'Connection' header (the specs say we should). }
/// <summary> /// Extract the host and port to use from either the HTTP request /// line, or the HTTP headers; update the request line to remove /// the hostname and port /// </summary> /// <param name="hrl"> /// The HTTP request line; the URI will be updated to remove the /// host name and port number /// </param> /// <param name="hh_rq">The HTTP request headers</param> /// <param name="port"> /// When this method returns, contains the request port /// </param> /// <remarks> /// May modify the URI of <c>hrl</c> /// </remarks> protected string ParseDestinationHostAndPort(HttpRequestLine hrl, HttpHeaders hh_rq, out int port) { string host = null; bool bIsConnect = hrl.Method.Equals("CONNECT"); port = bIsConnect ? 443 : 80; bool bIsHTTP1_0 = hrl.ProtocolVersion.Equals("1.0"); if (hrl.URI.Equals("*")) { Debug.Assert(!bIsHTTP1_0); goto hostname_from_header; } // At this point, hrl.URI follows one of these forms: // - scheme:(//authority)/abs_path // - authority // - /abs_path int prefix = 0; // current parse position if (hrl.URI.Contains("://")) { if (hrl.URI.StartsWith("http://")) { prefix = 7; // length of "http://" } else { if (hrl.URI.StartsWith("https://")) { prefix = 8; // length of "https://" port = 443; } else { throw new HttpProtocolBroken("Expected scheme missing or unsupported"); } } } // Starting from offset prefix, we now have either: // 1) authority (only for CONNECT) // 2) authority/abs_path // 3) /abs_path int slash = hrl.URI.IndexOf('/', prefix); string authority = null; if (slash == -1) { // case 1 authority = hrl.URI; Debug.Assert(bIsConnect); } else { if (slash > 0) { // Strict inequality // case 2 authority = hrl.URI.Substring(prefix, slash - prefix); } } if (authority != null) { // authority is either: // a) hostname // b) hostname: // c) hostname:port int c = authority.IndexOf(':'); if (c < 0) { // case a) host = authority; } else if (c == authority.Length - 1) { // case b) host = authority.TrimEnd('/'); } else { // case c) host = authority.Substring(0, c); port = int.Parse(authority.Substring(c + 1)); } prefix += authority.Length; } if (host != null) { #if false // XXX Not sure whether this can happen (without doing ad // replacement) or if we want to prevent it if (hh_rq.Host != null) { // Does hh_rq.Host and host match? (disregarding // the potential ":port" prefix of hh_rq.Host) int c2 = hh_rq.Host.IndexOf(':'); string rq_host = c2 < 0 ? hh_rq.Host : hh_rq.Host.Substring(0, c2); if (!rq_host.Equals(host)) // Host discrepancy: fix the 'Host' header hh_rq.Host = host; } #endif // Remove the host from the request URI, unless the "server" // is actually a proxy, in which case the URI should remain // unchanged. (RFC 2616, section 5.1.2) if (RelayHttpProxyHost == null) { hrl.URI = hrl.URI.Substring(prefix); } return host; } hostname_from_header: host = hh_rq.Host; if (host == null) { throw new HttpProtocolBroken("No host specified"); } int cp = host.IndexOf(':'); if (cp < 0) { /* nothing */ } else if (cp == host.Length - 1) { host = host.TrimEnd('/'); } else { host = host.Substring(0, cp); port = int.Parse(host.Substring(cp + 1)); } return host; }