Beispiel #1
0
        public static URL parse(string s, URL baseurl, string encoding, bool strict)
        {
            if(s==null)
              throw new ArgumentException();
            int beginning=0;
            int ending=s.Length-1;
            bool relative=false;
            URL url=new URL();
            ITextEncoder encoder=null;
            ParseState state=ParseState.SchemeStart;
            if(encoding!=null){
              encoder=TextEncoding.getEncoder(encoding);
            }
            if(s.IndexOf("http://",StringComparison.Ordinal)==0){
              state=ParseState.AuthorityIgnoreSlashes;
              url.scheme="http";
              beginning=7;
              relative=true;
            } else {
              while(beginning<s.Length){
            char c=s[beginning];
            if(c!=0x09 && c!=0x0a && c!=0x0c && c!=0x0d && c!=0x20){
              break;
            }
            beginning++;
              }
            }
            while(ending>=beginning){
              char c=s[ending];
              if(c!=0x09 && c!=0x0a && c!=0x0c && c!=0x0d && c!=0x20){
            ending++;
            break;
              }
              ending--;
            }
            if(ending<beginning) {
              ending=beginning;
            }
            bool atflag=false;
            bool bracketflag=false;
            IntList buffer=new IntList();
            IntList query=null;
            IntList fragment=null;
            IntList password=null;
            IntList username=null;
            IntList schemeData=null;
            bool error=false;
            IList<string> path=new List<string>();
            int index=beginning;
            int hostStart=-1;
            int portstate=0;
            while(index<=ending){
              int oldindex=index;
              int c=-1;
              if(index>=ending){
            c=-1;
            index++;
              } else {
            c=s[index];
            if(c>=0xD800 && c<=0xDBFF && index+1<ending &&
            s[index+1]>=0xDC00 && s[index+1]<=0xDFFF){
              // Get the Unicode code point for the surrogate pair
              c=0x10000+(c-0xD800)*0x400+(s[index+1]-0xDC00);
              index++;
            } else if(c>=0xD800 && c<=0xDFFF)
              // illegal surrogate
              throw new ArgumentException();
            index++;
              }
              switch(state){
              case ParseState.SchemeStart:
            if(c>='A' && c<='Z'){
              buffer.appendInt(c+0x20);
              state=ParseState.Scheme;
            } else if(c>='a' && c<='z'){
              buffer.appendInt(c);
              state=ParseState.Scheme;
            } else {
              index=oldindex;
              state=ParseState.NoScheme;
            }
            break;
              case ParseState.Scheme:
            if(c>='A' && c<='Z'){
              buffer.appendInt(c+0x20);
            } else if((c>='a' && c<='z') || c=='.' || c=='-' || c=='+'){
              buffer.appendInt(c);
            } else if(c==':'){
              url.scheme=buffer.ToString();
              buffer.clearAll();
              if(url.scheme.Equals("http") ||
              url.scheme.Equals("https") ||
              url.scheme.Equals("ftp") ||
              url.scheme.Equals("gopher") ||
              url.scheme.Equals("ws") ||
              url.scheme.Equals("wss") ||
              url.scheme.Equals("file")){
            relative=true;
              }
              if(url.scheme.Equals("file")){
            state=ParseState.Relative;
            relative=true;
              } else if(relative && baseurl!=null && url.scheme.Equals(baseurl.scheme)){
            state=ParseState.RelativeOrAuthority;
              } else if(relative){
            state=ParseState.AuthorityFirstSlash;
              } else {
            schemeData=new IntList();
            state=ParseState.SchemeData;
              }
            } else {
              buffer.clearAll();
              index=beginning;
              state=ParseState.NoScheme;
            }
            break;
              case ParseState.SchemeData:
            if(c=='?'){
              query=new IntList();
              state=ParseState.Query;
              break;
            } else if(c=='#'){
              fragment=new IntList();
              state=ParseState.Fragment;
              break;
            }
            if((c>=0 && (!isUrlCodePoint(c) && c!='%')  || (c=='%' &&
            (index+2>ending ||
                !isHexDigit(s[index]) ||
                !isHexDigit(s[index+1]))))){
              error=true;
            }
            if(c>=0 && c!=0x09 && c!=0x0a && c!=0x0d){
              if(c<0x20 || c==0x7F){
            percentEncode(schemeData,c);
              } else if(c<0x7F){
            schemeData.appendInt(c);
              } else {
            percentEncodeUtf8(schemeData,c);
              }
            }
            break;
              case ParseState.NoScheme:
            if(baseurl==null)
              return null;
            //Console.WriteLine("no scheme: [%s] [%s]",s,baseurl);
            if(!(baseurl.scheme.Equals("http") ||
            baseurl.scheme.Equals("https") ||
            baseurl.scheme.Equals("ftp") ||
            baseurl.scheme.Equals("gopher") ||
            baseurl.scheme.Equals("ws") ||
            baseurl.scheme.Equals("wss") ||
            baseurl.scheme.Equals("file")
            ))
              return null;
            state=ParseState.Relative;
            index=oldindex;
            break;
              case ParseState.RelativeOrAuthority:
            if(c=='/' && index<ending && s[index]=='/'){
              index++;
              state=ParseState.AuthorityIgnoreSlashes;
            } else {
              error=true;
              state=ParseState.Relative;
              index=oldindex;
            }
            break;
              case ParseState.Relative:{
            relative=true;
            if(!"file".Equals(url.scheme)){
              url.scheme=baseurl.scheme;
            }
            if(c<0){
              url.host=baseurl.host;
              url.port=baseurl.port;
              path=pathList(baseurl.path);
              url.query=baseurl.query;
            } else if(c=='/' || c=='\\'){
              if(c=='\\') {
            error=true;
              }
              state=ParseState.RelativeSlash;
            } else if(c=='?'){
              url.host=baseurl.host;
              url.port=baseurl.port;
              path=pathList(baseurl.path);
              query=new IntList();
              state=ParseState.Query;
            } else if(c=='#'){
              url.host=baseurl.host;
              url.port=baseurl.port;
              path=pathList(baseurl.path);
              url.query=baseurl.query;
              fragment=new IntList();
              state=ParseState.Fragment;
            } else {
              url.host=baseurl.host;
              url.port=baseurl.port;
              path=pathList(baseurl.path);
              if(path.Count>0) { // Pop path
            path.RemoveAt(path.Count-1);
              }
              state=ParseState.RelativePath;
              index=oldindex;
            }
            break;
              }
              case ParseState.RelativeSlash:
            if(c=='/' || c=='\\'){
              if(c=='\\') {
            error=true;
              }
              if("file".Equals(url.scheme)){
            state=ParseState.FileHost;
              } else {
            state=ParseState.AuthorityIgnoreSlashes;
              }
            } else {
              if(baseurl!=null){
            url.host=baseurl.host;
            url.port=baseurl.port;
              }
              state=ParseState.RelativePath;
              index=oldindex;
            }
            break;
              case ParseState.AuthorityFirstSlash:
            if(c=='/'){
              state=ParseState.AuthoritySecondSlash;
            } else {
              error=true;
              state=ParseState.AuthorityIgnoreSlashes;
              index=oldindex;
            }
            break;
              case ParseState.AuthoritySecondSlash:
            if(c=='/'){
              state=ParseState.AuthorityIgnoreSlashes;
            } else {
              error=true;
              state=ParseState.AuthorityIgnoreSlashes;
              index=oldindex;
            }
            break;
              case ParseState.AuthorityIgnoreSlashes:
            if(c!='/' && c!='\\'){
              username=new IntList();
              index=oldindex;
              hostStart=index;
              state=ParseState.Authority;
            } else {
              error=true;
            }
            break;
              case ParseState.Authority:
            if(c=='@'){
              if(atflag){
            IntList result=(password==null) ? username : password;
            error=true;
            result.appendInt('%');
            result.appendInt('4');
            result.appendInt('0');
              }
              atflag=true;
              int[] array=buffer.array();
              for(int i=0;i<buffer.Count;i++){
            int cp=array[i];
            if(cp==0x9 || cp==0xa || cp==0xd){
              error=true;
              continue;
            }
            if((!isUrlCodePoint(c) && c!='%')  || (cp=='%' &&
                (i+3>buffer.Count ||
                    !isHexDigit(array[index+1]) ||
                    !isHexDigit(array[index+2])))){
              error=true;
            }
            if(cp==':' && password==null){
              password=new IntList();
              continue;
            }
            IntList result=(password==null) ? username : password;
            if(cp<=0x20 || cp>=0x7F || ((cp&0x7F)==cp && "#<>?`\"".IndexOf((char)cp)>=0)){
              percentEncodeUtf8(result,cp);
            } else {
              result.appendInt(cp);
            }
              }

              //Console.WriteLine("username=%s",username);
              //Console.WriteLine("password=%s",password);
              buffer.clearAll();
              hostStart=index;
            } else if(c<0 || ((c&0x7F)==c && "/\\?#".IndexOf((char)c)>=0)){
              buffer.clearAll();
              state=ParseState.Host;
              index=hostStart;
            } else {
              buffer.appendInt(c);
            }
            break;
              case ParseState.FileHost:
            if(c<0 || ((c&0x7F)==c && "/\\?#".IndexOf((char)c)>=0)){
              index=oldindex;
              if(buffer.Count==2){
            int c1=buffer[0];
            int c2=buffer[1];
            if((c2=='|' || c2==':') && ((c1>='A' && c1<='Z') || (c1>='a' && c1<='z'))){
              state=ParseState.RelativePath;
              break;
            }
              }
              string host=hostParse(buffer.ToString());
              if(host==null)
            throw new ArgumentException();
              url.host=host;
              buffer.clearAll();
              state=ParseState.RelativePathStart;
            } else if(c==0x09 || c==0x0a || c==0x0d){
              error=true;
            } else {
              buffer.appendInt(c);
            }
            break;
              case ParseState.Host:
              case ParseState.HostName:
            if(c==':' && !bracketflag){
              string host=hostParse(buffer.ToString());
              if(host==null)
            return null;
              url.host=host;
              buffer.clearAll();
              state=ParseState.Port;
            } else if(c<0 || ((c&0x7F)==c && "/\\?#".IndexOf((char)c)>=0)){
              string host=hostParse(buffer.ToString());
              if(host==null)
            return null;
              url.host=host;
              buffer.clearAll();
              index=oldindex;
              state=ParseState.RelativePathStart;
            } else if(c==0x09 || c==0x0a || c==0x0d){
              error=true;
            } else {
              if(c=='[') {
            bracketflag=true;
              } else if(c==']') {
            bracketflag=false;
              }
              buffer.appendInt(c);
            }
            break;
              case ParseState.Port:
            if(c>='0' && c<='9'){
              if(c!='0') {
            portstate=2; // first non-zero found
              } else if(portstate==0){
            portstate=1; // have a port number
              }
              if(portstate==2) {
            buffer.appendInt(c);
              }
            } else if(c<0 || ((c&0x7F)==c && "/\\?#".IndexOf((char)c)>=0)){
              string bufport="";
              if(portstate==1) {
            bufport="0";
              } else if(portstate==2) {
            bufport=buffer.ToString();
              }
              //Console.WriteLine("port: [%s]",buffer.ToString());
              if((url.scheme.Equals("http") || url.scheme.Equals("ws"))
              && bufport.Equals("80")) {
            bufport="";
              }
              if((url.scheme.Equals("https") || url.scheme.Equals("wss"))
              && bufport.Equals("443")) {
            bufport="";
              }
              if((url.scheme.Equals("gopher"))
              && bufport.Equals("70")) {
            bufport="";
              }
              if((url.scheme.Equals("ftp"))
              && bufport.Equals("21")) {
            bufport="";
              }
              url.port=bufport;
              buffer.clearAll();
              state=ParseState.RelativePathStart;
              index=oldindex;
            } else if(c==0x09 || c==0x0a || c==0x0d){
              error=true;
            } else
              return null;
            break;
              case ParseState.Query:
            if(c<0 || c=='#'){
              bool utf8=true;
              if(relative){
            utf8=true;
              }
              if(utf8 || encoder==null){
            // NOTE: Encoder errors can never happen in
            // this case
            for(int i=0;i<buffer.Count;i++){
              int ch=buffer[i];
              if(ch<0x21 || ch>0x7e || ch==0x22 || ch==0x23 ||
                  ch==0x3c || ch==0x3e || ch==0x60){
                percentEncodeUtf8(query,ch);
              } else {
                query.appendInt(ch);
              }
            }
              } else {
            try {
              MemoryOutputStream baos=new MemoryOutputStream();
              encoder.encode(baos,buffer.array(),0,buffer.Count,encodingError);
              byte[] bytes=baos.toByteArray();
              foreach(var ch in bytes) {
                if(ch<0x21 || ch>0x7e || ch==0x22 || ch==0x23 ||
                    ch==0x3c || ch==0x3e || ch==0x60){
                  percentEncode(query,ch);
                } else {
                  query.appendInt(ch);
                }
              }
              baos.Close();
            } catch (IOException e) {
              throw e;
            }
            throw new InvalidOperationException();
              }
              buffer.clearAll();
              if(c=='#'){
            fragment=new IntList();
            state=ParseState.Fragment;
              }
            } else if(c==0x09 || c==0x0a || c==0x0d){
              error=true;
            } else {
              if((!isUrlCodePoint(c) && c!='%')  || (c=='%' &&
              (index+2>ending ||
                  !isHexDigit(s[index]) ||
                  !isHexDigit(s[index+1])))){
            error=true;
              }
              buffer.appendInt(c);
            }
            break;
              case ParseState.RelativePathStart:
            if(c=='\\'){
              error=true;
            }
            state=ParseState.RelativePath;
            if((c!='/' && c!='\\')){
              index=oldindex;
            }
            break;
              case ParseState.RelativePath:
            if((c<0 || c=='/' || c=='\\') ||
            (c=='?' || c=='#')){
              if(c=='\\') {
            error=true;
              }
              if(buffer.Count==2 && buffer[0]=='.'
              && buffer[1]=='.'){
            if(path.Count>0){
              path.RemoveAt(path.Count-1);
            }
            if((c!='/' && c!='\\')){
              path.Add("");
            }
              } else if(buffer.Count==1 && buffer[0]=='.'){
            if((c!='/' && c!='\\')){
              path.Add("");
            }
              } else {
            if("file".Equals(url.scheme) && path.Count==0 &&
                buffer.Count==2){
              int c1=buffer[0];
              int c2=buffer[1];
              if((c2=='|' || c2==':') && ((c1>='A' && c1<='Z') || (c1>='a' && c1<='z'))){
                buffer[1]=':';
              }
            }
            path.Add(buffer.ToString());
              }
              buffer.clearAll();
              if(c=='?'){
            query=new IntList();
            state=ParseState.Query;
              }
              if(c=='#'){
            fragment=new IntList();
            state=ParseState.Fragment;
              }
            } else if(c=='%' && index+2<=ending &&
            s[index]=='2' &&
            (s[index+1]=='e' || s[index+1]=='E')){
              index+=2;
              buffer.appendInt('.');
            } else if(c==0x09 || c==0x0a || c==0x0d){
              error=true;
            } else {
              if((!isUrlCodePoint(c) && c!='%') || (c=='%' &&
              (index+2>ending ||
                  !isHexDigit(s[index]) ||
                  !isHexDigit(s[index+1])))){
            error=true;
              }
              if(c<=0x20 || c>=0x7F || ((c&0x7F)==c && "#<>?`\"".IndexOf((char)c)>=0)){
            percentEncodeUtf8(buffer,c);
              } else {
            buffer.appendInt(c);
              }
            }
            break;
              case ParseState.Fragment:
            if(c<0) {
              break;
            }
            if(c==0x09 || c==0x0a || c==0x0d) {
              error=true;
            } else {
              if((!isUrlCodePoint(c) && c!='%')  || (c=='%' &&
              (index+2>ending ||
                  !isHexDigit(s[index]) ||
                  !isHexDigit(s[index+1])))){
            error=true;
              }
              if(c<0x20 || c==0x7F){
            percentEncode(fragment,c);
              } else if(c<0x7F){
            fragment.appendInt(c);
              } else {
            percentEncodeUtf8(fragment,c);
              }
            }
            break;
              default:
            throw new InvalidOperationException();
              }
            }
            if(error && strict)
              return null;
            if(schemeData!=null) {
              url.schemeData=schemeData.ToString();
            }
            StringBuilder builder=new StringBuilder();
            if(path.Count==0){
              builder.Append('/');
            } else {
              foreach(var segment in path){
            builder.Append('/');
            builder.Append(segment);
              }
            }
            url.path=builder.ToString();
            if(query!=null) {
              url.query=query.ToString();
            }
            if(fragment!=null) {
              url.fragment=fragment.ToString();
            }
            if(password!=null) {
              url.password=password.ToString();
            }
            if(username!=null) {
              url.username=username.ToString();
            }
            return url;
        }
Beispiel #2
0
   private static void appendOutputBytes(StringBuilder builder,
 MemoryOutputStream baos)
   {
       for(int i=0;i<baos.Length;i++){
         int c=baos[i];
         if(c==0x20) {
       builder.Append((char)0x2b);
         } else if(c==0x2a || c==0x2d || c==0x2e ||
         (c>=0x30 && c<=0x39) ||
         (c>=0x41 && c<=0x5a) ||
         (c>=0x5f) || (c>=0x61 && c<=0x7a)){
       builder.Append((char)c);
         } else {
       builder.Append('%');
       builder.Append(hex[(c>>4)&0x0F]);
       builder.Append(hex[(c)&0x0F]);
         }
       }
   }
Beispiel #3
0
   public static string toQueryString(IList<string[]> pairs,
 string delimiter, string encoding)
   {
       if(encoding==null) {
         encoding="utf-8";
       }
       ITextEncoder encoder=TextEncoding.getEncoder(encoding);
       if(encoder==null)
         throw new ArgumentException();
       StringBuilder builder=new StringBuilder();
       bool first=true;
       MemoryOutputStream baos=new MemoryOutputStream();
       foreach(var pair in pairs){
         if(!first){
       builder.Append(delimiter==null ? "&" : delimiter);
         }
         first=false;
         if(pair==null || pair.Length<2)
       throw new ArgumentException();
         baos.reset();
         TextEncoding.encodeString(pair[0], baos, encoder, querySerializerError);
         appendOutputBytes(builder,baos);
         builder.Append('=');
         baos.reset();
         TextEncoding.encodeString(pair[1], baos, encoder, querySerializerError);
         appendOutputBytes(builder,baos);
       }
       return builder.ToString();
   }