/// <summary> /// Extract the host and port to use from either the HTTP request /// line, or the HTTP headers; update the request line to remove /// the hostname and port /// </summary> /// <param name="hrl"> /// The HTTP request line; the URI will be updated to remove the /// host name and port number</param> /// <param name="hh_rq">The HTTP request headers</param> /// <param name="port"> /// When this method returns, contains the request port /// </param> /// <remarks> /// May modify the URI of <c>hrl</c> /// </remarks> protected string ParseDestinationHostAndPort( HttpRequestLine hrl, HttpHeaders hh_rq, out int port) { string host = null; port = DestinationPort; if (DestinationPort == 0) { port = 80; } //bool bIsConnect = hrl.Method.Equals("CONNECT"); //port = bIsConnect ? 443 : 80; bool bIsHTTP1_0 = hrl.ProtocolVersion.Equals("1.0"); if (hrl.URI.Equals("*")) { System.Diagnostics.Debug.Assert(!bIsHTTP1_0); goto hostname_from_header; } // At this point, hrl.URI follows one of these forms: // - scheme:(//authority)/abs_path // - authority // - /abs_path int prefix = 0; // current parse position if (hrl.URI.Contains("://")) { if (hrl.URI.StartsWith("http://")) { prefix = 7; // length of "http://" } else if (hrl.URI.StartsWith("https://")) { prefix = 8; // length of "https://" port = 443; } else { throw new HttpProtocolBroken( "Expected scheme missing or unsupported"); } } // Starting from offset prefix, we now have either: // 1) authority (only for CONNECT) // 2) authority/abs_path // 3) /abs_path int slash = hrl.URI.IndexOf('/', prefix); string authority = null; if (slash == -1) { // case 1 authority = hrl.URI; //System.Diagnostics.Debug.Assert(bIsConnect); } else if (slash > 0) // Strict inequality // case 2 { authority = hrl.URI.Substring(prefix, slash - prefix); } if (authority != null) { // authority is either: // a) hostname // b) hostname: // c) hostname:port int c = authority.IndexOf(':'); if (c < 0) { // case a) host = authority; } else if (c == authority.Length - 1) { // case b) host = authority.TrimEnd('/'); } else { // case c) host = authority.Substring(0, c); port = int.Parse(authority.Substring(c + 1)); } prefix += authority.Length; } if (host != null) { #if false // XXX Not sure whether this can happen (without doing ad // replacement) or if we want to prevent it if (hh_rq.Host != null) { // Does hh_rq.Host and host match? (disregarding // the potential ":port" prefix of hh_rq.Host) int c2 = hh_rq.Host.IndexOf(':'); string rq_host = c2 < 0 ? hh_rq.Host : hh_rq.Host.Substring(0, c2); if (!rq_host.Equals(host)) { // Host discrepancy: fix the 'Host' header hh_rq.Host = host; } } #endif // Remove the host from the request URI, unless the "server" // is actually a proxy, in which case the URI should remain // unchanged. (RFC 2616, section 5.1.2) //if (!UsingHttpProxy) //{ // hrl.URI = hrl.URI.Substring(prefix); // log.Debug("Rewriting request line as: " + // hrl.RequestLine); //} return(host); } hostname_from_header: host = hh_rq.Host; if (host == null) { throw new HttpProtocolBroken("No host specified"); } int cp = host.IndexOf(':'); if (cp < 0) /* nothing */ } {
/// <summary> /// Extract the host and port to use from either the HTTP request /// line, or the HTTP headers; update the request line to remove /// the hostname and port /// </summary> /// <param name="hrl"> /// The HTTP request line; the URI will be updated to remove the /// host name and port number</param> /// <param name="hh_rq">The HTTP request headers</param> /// <param name="port"> /// When this method returns, contains the request port /// </param> /// <remarks> /// May modify the URI of <c>hrl</c> /// </remarks> protected string ParseDestinationHostAndPort( HttpRequestLine hrl, HttpHeaders hh_rq, out int port) { string host = null; bool bIsConnect = hrl.Method.Equals("CONNECT"); port = bIsConnect ? 443 : 80; bool bIsHTTP1_0 = hrl.ProtocolVersion.Equals("1.0"); if (hrl.URI.Equals("*")) { System.Diagnostics.Debug.Assert(!bIsHTTP1_0); goto hostname_from_header; } // At this point, hrl.URI follows one of these forms: // - scheme:(//authority)/abs_path // - authority // - /abs_path int prefix = 0; // current parse position if (hrl.URI.Contains("://")) { if (hrl.URI.StartsWith("http://")) { prefix = 7; // length of "http://" } else if (hrl.URI.StartsWith("https://")) { prefix = 8; // length of "https://" port = 443; } else { throw new HttpProtocolBroken( "Expected scheme missing or unsupported"); } } // Starting from offset prefix, we now have either: // 1) authority (only for CONNECT) // 2) authority/abs_path // 3) /abs_path int slash = hrl.URI.IndexOf('/', prefix); string authority = null; if (slash == -1) { // case 1 authority = hrl.URI; System.Diagnostics.Debug.Assert(bIsConnect); } else if (slash > 0) // Strict inequality // case 2 { authority = hrl.URI.Substring(prefix, slash - prefix); } if (authority != null) { // authority is either: // a) hostname // b) hostname: // c) hostname:port int c = authority.IndexOf(':'); if (c < 0) { // case a) host = authority; } else if (c == authority.Length - 1) { // case b) host = authority.TrimEnd('/'); } else { // case c) host = authority.Substring(0, c); port = int.Parse(authority.Substring(c + 1)); } prefix += authority.Length; } if (host != null) { // Remove the host from the request URI, unless the "server" // is actually a proxy, in which case the URI should remain // unchanged. (RFC 2616, section 5.1.2) if (RelayHttpProxyHost == null) { hrl.URI = hrl.URI.Substring(prefix); log.Debug("Rewriting request line as: " + hrl.RequestLine); } return(host); } hostname_from_header: host = hh_rq.Host; if (host == null) { throw new HttpProtocolBroken("No host specified"); } int cp = host.IndexOf(':'); if (cp < 0) /* nothing */ } {
/// <summary> /// Extract the host and port to use from either the HTTP request /// line, or the HTTP headers; update the request line to remove /// the hostname and port /// </summary> /// <param name="hrl"> /// The HTTP request line; the URI will be updated to remove the /// host name and port number</param> /// <param name="hh_rq">The HTTP request headers</param> /// <param name="port"> /// When this method returns, contains the request port /// </param> /// <remarks> /// May modify the URI of <c>hrl</c> /// </remarks> protected string ParseDestinationHostAndPort( HttpRequestLine hrl, HttpHeaders hh_rq, out int port) { string host = null; bool bIsConnect = hrl.Method.Equals("CONNECT"); port = bIsConnect ? 443 : 80; bool bIsHTTP1_0 = hrl.ProtocolVersion.Equals("1.0"); if (hrl.URI.Equals("*")) { System.Diagnostics.Debug.Assert(!bIsHTTP1_0); goto hostname_from_header; } // At this point, hrl.URI follows one of these forms: // - scheme:(//authority)/abs_path // - authority // - /abs_path int prefix = 0; // current parse position if (hrl.URI.Contains("://")) { if (hrl.URI.StartsWith("http://")) prefix = 7; // length of "http://" else if (hrl.URI.StartsWith("https://")) { prefix = 8; // length of "https://" port = 443; } else throw new HttpProtocolBroken( "Expected scheme missing or unsupported"); } // Starting from offset prefix, we now have either: // 1) authority (only for CONNECT) // 2) authority/abs_path // 3) /abs_path int slash = hrl.URI.IndexOf('/', prefix); string authority = null; if (slash == -1) { // case 1 authority = hrl.URI; System.Diagnostics.Debug.Assert(bIsConnect); } else if (slash > 0) // Strict inequality // case 2 authority = hrl.URI.Substring(prefix, slash - prefix); if (authority != null) { // authority is either: // a) hostname // b) hostname: // c) hostname:port int c = authority.IndexOf(':'); if (c < 0) // case a) host = authority; else if (c == authority.Length - 1) // case b) host = authority.TrimEnd('/'); else { // case c) host = authority.Substring(0, c); port = int.Parse(authority.Substring(c + 1)); } prefix += authority.Length; } if (host != null) { #if false // XXX Not sure whether this can happen (without doing ad // replacement) or if we want to prevent it if (hh_rq.Host != null) { // Does hh_rq.Host and host match? (disregarding // the potential ":port" prefix of hh_rq.Host) int c2 = hh_rq.Host.IndexOf(':'); string rq_host = c2 < 0 ? hh_rq.Host : hh_rq.Host.Substring(0, c2); if (!rq_host.Equals(host)) // Host discrepancy: fix the 'Host' header hh_rq.Host = host; } #endif // Remove the host from the request URI, unless the "server" // is actually a proxy, in which case the URI should remain // unchanged. (RFC 2616, section 5.1.2) if (RelayHttpProxyHost == null) { hrl.URI = hrl.URI.Substring(prefix); log.Debug("Rewriting request line as: " + hrl.RequestLine); } return host; } hostname_from_header: host = hh_rq.Host; if (host == null) throw new HttpProtocolBroken("No host specified"); int cp = host.IndexOf(':'); if (cp < 0) { /* nothing */ } else if (cp == host.Length - 1) host = host.TrimEnd('/'); else { host = host.Substring(0, cp); port = int.Parse(host.Substring(cp + 1)); } return host; }
/// <summary> /// Pipeline step: read the HTTP request from the client, schedule /// the next step to be <c>SendRequest</c>, and call /// <c>OnReceiveRequest</c> /// </summary> protected virtual void ReadRequest() { try { RequestLine = new HttpRequestLine(SocketBP); } catch (TrotiNet.IoBroken) { // The request line is the first line of a HTTP request. // If none comes in a timely fashion, then we eventually // get a IoBroken exception, which is common enough // not to be rethrown. AbortRequest(); return; } catch (SocketException) { // Ditto AbortRequest(); return; } RequestHeaders = new HttpHeaders(SocketBP); if (RequestLine.Method.Equals("CONNECT")) { log.Debug("Method CONNECT not implemented"); SocketBP.Send501(); AbortRequest(); return; } log.Info("Got request " + RequestLine.RequestLine); // We call OnReceiveRequest now because Connect() will // modify the request URI. State.NextStep = SendRequest; OnReceiveRequest(); // Now we parse the request to: // 1) find out where we should connect // 2) find out whether there is a message body in the request // 3) find out whether the BP connection should be kept-alive if (State.NextStep != null) { // Step 1) if (RelayHttpProxyHost == null) { int NewDestinationPort; string NewDestinationHost = ParseDestinationHostAndPort( RequestLine, RequestHeaders, out NewDestinationPort); Connect(NewDestinationHost, NewDestinationPort); } else Connect(RelayHttpProxyHost, RelayHttpProxyPort); // Step 2) // Find out whether the request has a message body // (RFC 2616, section 4.3); if it has, get the message length State.bRequestHasMessage = false; State.RequestMessageLength = 0; State.bRequestMessageChunked = false; if (RequestHeaders.TransferEncoding != null) { State.bRequestHasMessage = true; State.bRequestMessageChunked = Array.IndexOf<string>( RequestHeaders.TransferEncoding, "chunked") >= 0; System.Diagnostics.Debug.Assert( State.bRequestMessageChunked); } else if (RequestHeaders.ContentLength != null) { State.RequestMessageLength = (uint)RequestHeaders.ContentLength; // Note: HTTP 1.0 wants "Content-Length: 0" when there // is no entity body. (RFC 1945, section 7.2) if (State.RequestMessageLength > 0) State.bRequestHasMessage = true; } } // Step 3) State.bUseDefaultPersistBP = true; if (RequestHeaders.ProxyConnection != null) { // Note: This is not part of the HTTP 1.1 standard. See // http://homepage.ntlworld.com./jonathan.deboynepollard/FGA/web-proxy-connection-header.html foreach (string i in RequestHeaders.ProxyConnection) { if (i.Equals("close")) { State.bPersistConnectionBP = false; State.bUseDefaultPersistBP = false; break; } if (i.Equals("keep-alive")) { State.bPersistConnectionBP = true; State.bUseDefaultPersistBP = false; break; } } if (RelayHttpProxyHost == null) RequestHeaders.ProxyConnection = null; } // Note: we do not remove fields mentioned in the // 'Connection' header (the specs say we should). }
/// <summary> /// Extract the host and port to use from either the HTTP request /// line, or the HTTP headers; update the request line to remove /// the hostname and port /// </summary> /// <param name="hrl"> /// The HTTP request line; the URI will be updated to remove the /// host name and port number</param> /// <param name="hh_rq">The HTTP request headers</param> /// <param name="port"> /// When this method returns, contains the request port /// </param> /// <remarks> /// May modify the URI of <c>hrl</c> /// </remarks> protected string ParseDestinationHostAndPort( HttpRequestLine hrl, HttpHeaders hh_rq, out int port) { string host = null; bool bIsConnect = hrl.Method.Equals("CONNECT"); port = bIsConnect ? 443 : 80; bool bIsHTTP1_0 = hrl.ProtocolVersion.Equals("1.0"); if (hrl.URI.Equals("*")) { System.Diagnostics.Debug.Assert(!bIsHTTP1_0); goto hostname_from_header; } // At this point, hrl.URI follows one of these forms: // - scheme:(//authority)/abs_path // - authority // - /abs_path int prefix = 0; // current parse position if (hrl.URI.Contains("://")) { //当URI包含://,为普通网址时,需要规范化 //特别是如果是根目录,尾部不附加/,又是ip:port形式,则下面在解析port时会发生异常 //这个问题会导致部分网银客户端无法工作 //这里不能使用这种方法进行规范化,否则会导致在搜索页面时出现搜索结果都是?的错误 //因为在查询网址中搜索字符是url encode以后,通过这种方式进行规范化就会变成中文 //导致搜索结果出错 //log.Info("URI before normalization: " + hrl.URI); //hrl.URI = new Uri(hrl.URI).ToString(); //log.Info("URI after normalization: " + hrl.URI); if (hrl.URI.StartsWith("http://")) prefix = 7; // length of "http://" else if (hrl.URI.StartsWith("https://")) { log.Error("Unexpected https request!"); prefix = 8; // length of "https://" port = 443; } else throw new HttpProtocolBroken( "Expected scheme missing or unsupported"); } // Starting from offset prefix, we now have either: // 1) authority (only for CONNECT) // 2) authority/abs_path // 3) /abs_path int slash = hrl.URI.IndexOf('/', prefix); //这里如果找不到/,但是又确定包含http或是https,则表示为根目录,忘记添加/了 if (slash == -1 && prefix!= -1) { hrl.URI = hrl.URI + "/"; slash = hrl.URI.IndexOf('/', prefix); log.Info("URI is not normal: " + hrl.URI); } string authority = null; if (slash == -1) { // case 1 authority = hrl.URI; System.Diagnostics.Debug.Assert(bIsConnect); log.Error("URI only authority but not connect request!"); } else if (slash > 0) // Strict inequality // case 2 authority = hrl.URI.Substring(prefix, slash - prefix); if (authority != null) { // authority is either: // a) hostname // b) hostname: // c) hostname:port int c = authority.IndexOf(':'); if (c < 0) // case a) host = authority; else if (c == authority.Length - 1) // case b) host = authority.TrimEnd('/'); else { // case c) host = authority.Substring(0, c); port = int.Parse(authority.Substring(c + 1)); } prefix += authority.Length; } if (host != null) { // Remove the host from the request URI, unless the "server" // is actually a proxy, in which case the URI should remain // unchanged. (RFC 2616, section 5.1.2) if (RelayHttpProxyHost == null) { hrl.URI = hrl.URI.Substring(prefix); log.Debug("Rewriting request line as: " + hrl.RequestLine); } return host; } hostname_from_header: host = hh_rq.Host; if (host == null) throw new HttpProtocolBroken("No host specified"); int cp = host.IndexOf(':'); if (cp < 0) { /* nothing */ } else if (cp == host.Length - 1) host = host.TrimEnd('/'); else { host = host.Substring(0, cp); port = int.Parse(host.Substring(cp + 1)); } return host; }