public static void writeFile(System.String str, System.String filename, bool append) { int length = str.Length; //UPGRADE_TODO: Class 'java.io.FileWriter' was converted to 'System.IO.StreamWriter' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioFileWriter'" //UPGRADE_TODO: Constructor 'java.io.FileWriter.FileWriter' was converted to 'System.IO.StreamWriter' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioFileWriterFileWriter_javalangString_boolean'" System.IO.StreamWriter out_Renamed = new System.IO.StreamWriter(filename, append, System.Text.Encoding.Default); out_Renamed.Write(str.ToCharArray(), 0, length); out_Renamed.Close(); }
/// <summary> /// Escape string /// </summary> /// <param name="text"></param> /// <param name="encChars"></param> /// <returns></returns> public static System.String escape(System.String text, EncodingCharacters encChars) { //Note: Special character sequences are like \.br\. Items like this should not //be escaped using the \E\ method for the \'s. Instead, just tell the encoding to //skip these items. char[] textAsChar = text.ToCharArray(); System.Text.StringBuilder result = new System.Text.StringBuilder(text.Length); Hashtable specialCharacters = InvertHash(getEscapeSequences(encChars)); bool isEncodingSpecialCharacterSequence = false; bool encodeCharacter = false; for (int i = 0; i < textAsChar.Length; i++) { encodeCharacter = false; if (isEncodingSpecialCharacterSequence) { encodeCharacter = false; if(textAsChar[i].Equals(encChars.EscapeCharacter)) isEncodingSpecialCharacterSequence = false; } else { if (specialCharacters[textAsChar[i]] != null) { //Special character encodeCharacter = true; if (textAsChar[i].Equals(encChars.EscapeCharacter)) { //Check for special escaping if (i < textAsChar.Length - 1) { //The data is specially escaped, treat it that way by not encoding the escape character if (_nonEscapeCharacterMapping[textAsChar[i].ToString() + textAsChar[i + 1].ToString()] != null) { //Start buffering this isEncodingSpecialCharacterSequence = true; encodeCharacter = false; } } } } } if (encodeCharacter) result.Append(specialCharacters[textAsChar[i]]); else result.Append(textAsChar[i]); } if (result.Length > 0) return result.ToString().Trim(); else return ""; }
//*************************** // Compute Levenshtein distance //*************************** public int GetDistance(System.String other) { int[][] d; // matrix // Step 1 char[] ta = other.ToCharArray(); int m = ta.Length; if (n == 0) { return m; } if (m == 0) { return n; } if (m >= cache.Length) { d = Form(n, m); } else if (cache[m] != null) { d = cache[m]; } else { d = cache[m] = Form(n, m); // Step 3 } for (int i = 1; i <= n; i++) { char s_i = sa[i - 1]; // Step 4 for (int j = 1; j <= m; j++) { char t_j = ta[j - 1]; // Step 5 int cost = s_i == t_j ? 0 : 1; d[i][j] = Min3(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost); } } // Step 7 return d[n][m]; }
/// <summary> Make a Java-ish accessor method name out of a field or component description /// by removing non-letters and adding "get". One complication is that some description /// entries in the DB have their data types in brackets, and these should not be /// part of the method name. On the other hand, sometimes critical distinguishing /// information is in brackets, so we can't omit everything in brackets. The approach /// taken here is to eliminate bracketed text if a it looks like a data type. /// </summary> public static System.String makeAccessorName(System.String fieldDesc) { System.Text.StringBuilder aName = new System.Text.StringBuilder("get"); char[] chars = fieldDesc.ToCharArray(); bool lastCharWasNotLetter = true; int inBrackets = 0; System.Text.StringBuilder bracketContents = new System.Text.StringBuilder(); for (int i = 0; i < chars.Length; i++) { if (chars[i] == '(') inBrackets++; if (chars[i] == ')') inBrackets--; if (System.Char.IsLetterOrDigit(chars[i])) { if (inBrackets > 0) { //buffer everthing in brackets bracketContents.Append(chars[i]); } else { //add capitalized bracketed text if appropriate if (bracketContents.Length > 0) { aName.Append(capitalize(filterBracketedText(bracketContents.ToString()))); bracketContents = new System.Text.StringBuilder(); } if (lastCharWasNotLetter) { //first letter of each word is upper-case aName.Append(System.Char.ToUpper(chars[i])); } else { aName.Append(chars[i]); } lastCharWasNotLetter = false; } } else { lastCharWasNotLetter = true; } } aName.Append(capitalize(filterBracketedText(bracketContents.ToString()))); return aName.ToString(); }
/// <summary> Removes leading whitespace. /// /// </summary> /// <seealso cref="ca.uhn.hl7v2.validation.PrimitiveTypeRule.correct(java.lang.String)"> /// </seealso> public virtual System.String correct(System.String value_Renamed) { System.String trmValue = null; if (value_Renamed != null) { char[] stringChr = value_Renamed.ToCharArray(); for (int i = 0; i < stringChr.Length && trmValue == null; i++) { if (!System.Char.IsWhiteSpace(stringChr[i])) { trmValue = new System.String(stringChr, i, (stringChr.Length - i)); } } } return trmValue; }
/// <summary> Removes unecessary delimiters from the end of a field or segment. /// This is cut-and-pasted from PipeParser (just making it public in /// PipeParser would kind of cloud the purpose of PipeParser). /// </summary> private static System.String stripExtraDelimiters(System.String in_Renamed, char delim) { char[] chars = in_Renamed.ToCharArray(); //search from back end for first occurance of non-delimiter ... int c = chars.Length - 1; bool found = false; while (c >= 0 && !found) { if (chars[c--] != delim) found = true; } System.String ret = ""; if (found) ret = new System.String(chars, 0, c + 2); return ret; }
private static System.String urlDecode(System.String escaped) { // No we can't use java.net.URLDecoder here. JavaME doesn't have it. if (escaped == null) { return null; } char[] escapedArray = escaped.ToCharArray(); int first = findFirstEscape(escapedArray); if (first < 0) { return escaped; } int max = escapedArray.Length; // final length is at most 2 less than original due to at least 1 unescaping System.Text.StringBuilder unescaped = new System.Text.StringBuilder(max - 2); // Can append everything up to first escape character unescaped.Append(escapedArray, 0, first); for (int i = first; i < max; i++) { char c = escapedArray[i]; if (c == '+') { // + is translated directly into a space unescaped.Append(' '); } else if (c == '%') { // Are there even two more chars? if not we will just copy the escaped sequence and be done if (i >= max - 2) { unescaped.Append('%'); // append that % and move on } else { int firstDigitValue = parseHexDigit(escapedArray[++i]); int secondDigitValue = parseHexDigit(escapedArray[++i]); if (firstDigitValue < 0 || secondDigitValue < 0) { // bad digit, just move on unescaped.Append('%'); unescaped.Append(escapedArray[i - 1]); unescaped.Append(escapedArray[i]); } unescaped.Append((char) ((firstDigitValue << 4) + secondDigitValue)); } } else { unescaped.Append(c); } } return unescaped.ToString(); }
protected internal static System.String unescapeBackslash(System.String escaped) { if (escaped != null) { int backslash = escaped.IndexOf('\\'); if (backslash >= 0) { int max = escaped.Length; System.Text.StringBuilder unescaped = new System.Text.StringBuilder(max - 1); unescaped.Append(escaped.ToCharArray(), 0, backslash); bool nextIsEscaped = false; for (int i = backslash; i < max; i++) { char c = escaped[i]; if (nextIsEscaped || c != '\\') { unescaped.Append(c); nextIsEscaped = false; } else { nextIsEscaped = true; } } return unescaped.ToString(); } } return escaped; }
/// <summary> Returns a String where the escape char has been /// removed, or kept only once if there was a double escape. /// </summary> private System.String DiscardEscapeChar(System.String input) { char[] caSource = input.ToCharArray(); char[] caDest = new char[caSource.Length]; int j = 0; for (int i = 0; i < caSource.Length; i++) { if ((caSource[i] != '\\') || (i > 0 && caSource[i - 1] == '\\')) { caDest[j++] = caSource[i]; } } return new System.String(caDest, 0, j); }
/// <summary> /// Returns the next token from the source string, using the provided /// token delimiters /// </summary> /// <param name="delimiters">String containing the delimiters to use</param> /// <returns>The string value of the token</returns> public System.String NextToken(System.String delimiters) { //According to documentation, the usage of the received delimiters should be temporary (only for this call). //However, it seems it is not true, so the following line is necessary. this.delimiters = delimiters; //at the end if (this.currentPos == this.chars.Length) throw new System.ArgumentOutOfRangeException(); //if over a delimiter and delimiters must be returned else if ( (System.Array.IndexOf(delimiters.ToCharArray(),chars[this.currentPos]) != -1) && this.includeDelims ) return "" + this.chars[this.currentPos++]; //need to get the token wo delimiters. else return nextToken(delimiters.ToCharArray()); }
/// <summary> /// Initializes a new class instance with a specified string to process /// </summary> /// <param name="source">String to tokenize</param> public Tokenizer(System.String source) { this.chars = source.ToCharArray(); }
/// <summary> Replaces the character at the specified position of the string str1 /// with the first character of the string str2. /// </summary> /// <param name="str1">- base string /// </param> /// <param name="cur">- index of the character to /// </param> /// <param name="str2">- the first character of the string is used to replace /// </param> /// <returns> the string with the new character replaced /// </returns> private System.String replace(System.String str1, int cur, System.String str2) { char[] array = str1.ToCharArray(); if (str2.Length == 0) { System.Console.Error.WriteLine("Exp.java: replace(): s is to short"); System.Environment.Exit(0); } array[cur] = str2[0]; return new string(array); }
public static java.lang.String toJava(System.String str) { global::org.xmlvm._nArrayAdapter<char> n = new global::org.xmlvm._nArrayAdapter<char>(str.ToCharArray()); java.lang.String s = new java.lang.String(); s.@this(n); return s; }
/* * Write a string to the stream. * * @param s the string to write. **/ public virtual void write_string(System.String s) { int len = s.Length; switch (len) { case 0: this.write_nil(); break; default: //UPGRADE_NOTE: This code will be optimized in the future; byte[] tmpBytes; int i; string tmpStr; tmpStr = s; tmpBytes = new byte[tmpStr.Length]; i = 0; while (i < tmpStr.Length) { tmpBytes[i] = (byte) tmpStr[i]; i++; } byte[] bytebuf = tmpBytes; /*switch to se if the length of the byte array is equal to the length of the list */ if (bytebuf.Length == len) { /*Usual */ this.write1(OtpExternal.stringTag); this.write2BE(len); this.writeN(bytebuf); } else { /*Unicode */ char[] charbuf = s.ToCharArray(); this.write_list_head(len); for (int i2 = 0; i2 < len; i2++) this.write_char(charbuf[i2]); this.write_nil(); } break; } }
/// <summary> Removes leading whitespace from the given string. This method was created to deal with frequent /// problems parsing messages that have been hand-written in windows. The intuitive way to delimit /// segments is to hit <ENTER> at the end of each segment, but this creates both a carriage return /// and a line feed, so to the parser, the first character of the next segment is the line feed. /// </summary> public static System.String stripLeadingWhitespace(System.String in_Renamed) { System.Text.StringBuilder out_Renamed = new System.Text.StringBuilder(); char[] chars = in_Renamed.ToCharArray(); int c = 0; while (c < chars.Length) { if (!System.Char.IsWhiteSpace(chars[c])) break; c++; } for (int i = c; i < chars.Length; i++) { out_Renamed.Append(chars[i]); } return out_Renamed.ToString(); }
/** * Checks that an email address conforms to RFCs 5321, 5322 and others. With * verbose information. * * @param email * The email address to check * @param checkDNS * If true then a DNS check for A and MX records will be made * @return Result-Object of the email analysis. * @throws DNSLookupException * Is thrown if an internal error in the DNS lookup appeared. */ public bool IsEmailValid(System.String email) { ResultInfo = new List<string>(); if (email == null) { email = string.Empty; } // Check that $email is a valid address. Read the following RFCs to // understand the constraints: // (http://tools.ietf.org/html/rfc5321) // (http://tools.ietf.org/html/rfc5322) // (http://tools.ietf.org/html/rfc4291#section-2.2) // (http://tools.ietf.org/html/rfc1123#section-2.1) // (http://tools.ietf.org/html/rfc3696) (guidance only) // the upper limit on address lengths should normally be considered to // be 254 // (http://www.rfc-editor.org/errata_search.php?rfc=3696) // NB My erratum has now been verified by the IETF so the correct answer // is 254 // // The maximum total length of a reverse-path or forward-path is 256 // characters (including the punctuation and element separators) // (http://tools.ietf.org/html/rfc5321#section-4.5.3.1.3) // NB There is a mandatory 2-character wrapper round the actual address int emailLength = email.Length; // revision 1.17: Max length reduced to 254 (see above) if (emailLength > 254) { this.ResultInfo.Add(@" Email is too long. The maximum total length of a reverse-path or forward-path is 256 characters (including the punctuation and element separators) (http://tools.ietf.org/html/rfc5321#section-4.5.3.1.3) "); return false; } // Contemporary email addresses consist of a "local part" separated from // a "domain part" (a fully-qualified domain name) by an at-sign ("@"). // (http://tools.ietf.org/html/rfc3696#section-3) int atIndex = email.LastIndexOf('@'); if (atIndex == -1) { this.ResultInfo.Add(@" Email is too long. Contemporary email addresses consist of a ""local part"" separated from a ""domain part"" (a fully-qualified domain name) by an at-sign (""@""). (http://tools.ietf.org/html/rfc3696#section-3) "); return false; } if (atIndex == 0) { this.ResultInfo.Add(@" Email is too long. Contemporary email addresses consist of a ""local part"" separated from a ""domain part"" (a fully-qualified domain name) by an at-sign (""@""). (http://tools.ietf.org/html/rfc3696#section-3) "); return false; } if (atIndex == emailLength - 1) { this.ResultInfo.Add(@" Email is too long. Contemporary email addresses consist of a ""local part"" separated from a ""domain part"" (a fully-qualified domain name) by an at-sign (""@""). (http://tools.ietf.org/html/rfc3696#section-3) "); return false; } // Sanitize comments // - remove nested comments, quotes and dots in comments // - remove parentheses and dots from quoted strings int braceDepth = 0; bool inQuote = false; bool escapeThisChar = false; for (int i = 0; i < emailLength; ++i) { char charX = email.ToCharArray()[i]; bool replaceChar = false; if (charX == '\\') { escapeThisChar = !escapeThisChar; // Escape the next character? } else { switch (charX) { case '(': if (escapeThisChar) { replaceChar = true; } else { if (inQuote) { replaceChar = true; } else { if (braceDepth++ > 0) { replaceChar = true; // Increment brace depth } } } break; case ')': if (escapeThisChar) { replaceChar = true; } else { if (inQuote) { replaceChar = true; } else { if (--braceDepth > 0) replaceChar = true; // Decrement brace depth if (braceDepth < 0) { braceDepth = 0; } } } break; case '"': if (escapeThisChar) { replaceChar = true; } else { if (braceDepth == 0) { // Are we inside a quoted string? inQuote = !inQuote; } else { replaceChar = true; } } break; case '.': // Dots don't help us either if (escapeThisChar) { replaceChar = true; } else { if (braceDepth > 0) replaceChar = true; } break; } escapeThisChar = false; if (replaceChar) { // Replace the offending character with something harmless // revision 1.12: Line above replaced because PHPLint // doesn't like that syntax email = replaceCharAt(email, i, 'x'); } } } System.String localPart = email.Substring(0, atIndex); System.String domain = email.Substring(atIndex + 1); // Folding white space System.String FWS = "(?:(?:(?:[ \\t]*(?:\\r\\n))?[ \\t]+)|(?:[ \\t]+(?:(?:\\r\\n)[ \\t]+)*))"; // Let's check the local part for RFC compliance... // // local-part = dot-atom / quoted-string / obs-local-part // obs-local-part = word *("." word) // (http://tools.ietf.org/html/rfc5322#section-3.4.1) // // Problem: need to distinguish between "first.last" and "first"."last" // (i.e. one element or two). And I suck at regular expressions. Regex regex = new Regex("(?m)\\.(?=(?:[^\\\"]*\\\"[^\\\"]*\\\")*(?![^\\\"]*\\\"))"); System.String[] dotArray = regex.Split(localPart); int partLength = 0; #region foreach block foreach (System.String element in dotArray) { string working_element = element; // for use in our for loop, can't work on a foreach target SCO-04152011 // Remove any leading or trailing FWS Regex repRegex = new Regex("^" + FWS + "|" + FWS + "$"); System.String new_element = repRegex.Replace(working_element, string.Empty); if (!working_element.Equals(new_element)) { // FWS is unlikely in the real world this.ResultInfo.Add(@" Folding White Space local-part = dot-atom / quoted-string / obs-local-part obs-local-part = word *(""."" word) (http://tools.ietf.org/html/rfc5322#section-3.4.1) "); } working_element = new_element; // version 2.3: Warning condition added int elementLength = new_element.Length; if (elementLength == 0) { // Can't have empty element (consecutive dots or // dots at the start or end) this.ResultInfo.Add(@" Can't have empty element (consecutive dots or dots at the start or end) (http://tools.ietf.org/html/rfc5322#section-3.4.1) "); return false; } // revision 1.15: Speed up the test and get rid of // "uninitialized string offset" notices from PHP // We need to remove any valid comments (i.e. those at the start or // end of the element) if (working_element.Substring(0) == "(") { // Comments are unlikely in the real world // return_status = IsEMailResult.ISEMAIL_COMMENTS; // version 2.0: Warning condition added int indexBrace = working_element.IndexOf(")"); if (indexBrace != -1) { Regex pregMatch = new Regex("(?<!\\\\)[\\(\\)]"); if (pregMatch.Matches(working_element.Substring(1, indexBrace - 1)).Count > 0) { // Illegal characters in comment this.ResultInfo.Add(@" Illegal characters in comment "); return false; } working_element = working_element.Substring(indexBrace + 1, elementLength - indexBrace - 1); elementLength = working_element.Length; } } if (working_element.Substring(elementLength - 1) == ")") { // Comments are unlikely in the real world // return_status = IsEMailResult.ISEMAIL_COMMENTS; // version 2.0: Warning condition added int indexBrace = working_element.LastIndexOf("("); if (indexBrace != -1) { Regex pregMatch = new Regex("(?<!\\\\)(?:[\\(\\)])"); if (pregMatch.Matches(working_element.Substring(indexBrace + 1, elementLength - indexBrace - 2)).Count > 0) { // Illegal characters in comment this.ResultInfo.Add(@" Illegal characters in comment "); return false; } working_element = working_element.Substring(0, indexBrace); elementLength = working_element.Length; } } // Remove any remaining leading or trailing FWS around the element // (having removed any comments) Regex fwsRegex = new Regex("^" + FWS + "|" + FWS + "$"); new_element = fwsRegex.Replace(working_element, string.Empty); //// FWS is unlikely in the real world //if (!working_element.equals(new_element)) // return_status = IsEMailResult.ISEMAIL_FWS; working_element = new_element; // version 2.0: Warning condition added // What's left counts towards the maximum length for this part if (partLength > 0) partLength++; // for the dot partLength += working_element.Length; // Each dot-delimited component can be an atom or a quoted string // (because of the obs-local-part provision) Regex quotRegex = new Regex("(?s)^\"(?:.)*\"$"); if (quotRegex.Matches(working_element).Count > 0) { // Quoted-string tests: // Quoted string is unlikely in the real world // return_status = IsEMailResult.ISEMAIL_QUOTEDSTRING; // version 2.0: Warning condition added // Remove any FWS // A warning condition, but we've already raised // ISEMAIL_QUOTEDSTRING Regex newRepRegex = new Regex("(?<!\\\\)" + FWS); working_element = newRepRegex.Replace(working_element, string.Empty); // My regular expression skills aren't up to distinguishing // between \" \\" \\\" \\\\" etc. // So remove all \\ from the string first... Regex slashRegex = new Regex("\\\\\\\\"); working_element = slashRegex.Replace(working_element, string.Empty); Regex quot2Regex = new Regex("(?<!\\\\|^)[\"\\r\\n\\x00](?!$)|\\\\\"$|\"\""); if (quot2Regex.Matches(working_element).Count > 0) { // ", CR, LF and NUL must be escaped // version 2.0: allow ""@example.com because it's // technically valid this.ResultInfo.Add(@" "", CR, LF and NUL must be escaped "); return false; } } else { // Unquoted string tests: // // Period (".") may...appear, but may not be used to start or // end the // local part, nor may two or more consecutive periods appear. // (http://tools.ietf.org/html/rfc3696#section-3) // // A zero-length element implies a period at the beginning or // end of the // local part, or two periods together. Either way it's not // allowed. if (string.IsNullOrEmpty(working_element)) { // Dots in wrong place this.ResultInfo.Add(@" A zero-length element implies a period at the beginning or end of the local part, or two periods together. Either way it's not allowed. "); return false; } // Any ASCII graphic (printing) character other than the // at-sign ("@"), backslash, double quote, comma, or square // brackets may // appear without quoting. If any of that list of excluded // characters // are to appear, they must be quoted // (http://tools.ietf.org/html/rfc3696#section-3) // // Any excluded characters? i.e. 0x00-0x20, (, ), <, >, [, ], :, // ;, @, \, comma, period, " Regex quot3Regex = new Regex("[\\x00-\\x20\\(\\)<>\\[\\]:;@\\\\,\\.\"]"); if (quot3Regex.Matches(working_element).Count > 0) { // These characters must be in a quoted string this.ResultInfo.Add(@" Any ASCII graphic (printing) character other than the at-sign (""@""), backslash, double quote, comma, or square brackets may appear without quoting. If any of that list of excluded characters are to appear, they must be quoted (http://tools.ietf.org/html/rfc3696#section-3) "); return false; } //Regex quot4Regex = new Regex("^\\w+"); //if (quot4Regex.Matches(working_element).Count == 0) //{ // // First character is an odd one // return_status = IsEMailResult.ISEMAIL_UNLIKELYINITIAL; //} } } #endregion end foreach if (partLength > 64) { // Local part must be 64 characters or less this.ResultInfo.Add(@" Local part must be 64 characters or less "); return false; } // Now let's check the domain part... // The domain name can also be replaced by an IP address in square // brackets // (http://tools.ietf.org/html/rfc3696#section-3) // (http://tools.ietf.org/html/rfc5321#section-4.1.3) // (http://tools.ietf.org/html/rfc4291#section-2.2) // // IPv4 is the default format for address literals. Alternative formats // can // be defined. At the time of writing only IPv6 has been defined as an // alternative format. Non-IPv4 formats must be tagged to show what type // of address literal they are. The registry of current tags is here: // http://www.iana.org/assignments/address-literal-tags if (new Regex("^\\[(.)+]$").Matches(domain).Count == 1) { //// It's an address-literal //// Quoted string is unlikely in the real world //return_status = IsEMailResult.ISEMAIL_ADDRESSLITERAL; // version 2.0: Warning condition added System.String addressLiteral = domain.Substring(1, domain.Length - 2); System.String IPv6; int groupMax = 8; // revision 2.1: new IPv6 testing strategy System.String colon = ":"; // Revision 2.7: Daniel Marschall's new // IPv6 testing strategy System.String double_colon = "::"; System.String IPv6tag = "IPv6:"; // Extract IPv4 part from the end of the address-literal (if there // is one) Regex splitRegex = new Regex("\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$"); MatchCollection matchesIP1 = splitRegex.Matches(addressLiteral); if (matchesIP1.Count > 0) { int index = addressLiteral.LastIndexOf(matchesIP1[0].Value); if (index == 0) { // Nothing there except a valid IPv4 address, so... return true; // version 2.0: return warning if one is set } else { // - // Assume it's an attempt at a mixed address (IPv6 + // IPv4) // - if ($addressLiteral[$index - 1] !== ':') return // IsEMailResult.ISEMAIL_IPV4BADPREFIX; // Character // preceding IPv4 address must be ':' // revision 2.1: new IPv6 testing strategy if (!addressLiteral.Substring(0, 5).Equals(IPv6tag)) { // RFC5321 section 4.1.3 this.ResultInfo.Add(@" Character preceding IPv4 address must be ':' RFC5321 section 4.1.3 "); return false; } // - // - $IPv6 = substr($addressLiteral, 5, ($index === 7) ? 2 : // $index - 6); // - $groupMax = 6; // revision 2.1: new IPv6 testing strategy IPv6 = addressLiteral.Substring(5, index - 5) + "0000:0000"; // Convert IPv4 part to IPv6 format } } else { // It must be an attempt at pure IPv6 if (!addressLiteral.Substring(0, 5).Equals(IPv6tag)) { // RFC5321 section 4.1.3 this.ResultInfo.Add(@" Invalid IPV6 address RFC5321 section 4.1.3 "); return false; } IPv6 = addressLiteral.Substring(5); // - $groupMax = 8; // revision 2.1: new IPv6 testing strategy } // Revision 2.7: Daniel Marschall's new IPv6 testing strategy Regex split2Regex = new Regex(colon); string[] matchesIP = split2Regex.Split(IPv6); int groupCount = matchesIP.Length; int currIndex = IPv6.IndexOf(double_colon); if (currIndex == -1) { // We need exactly the right number of groups if (groupCount != groupMax) { // RFC5321 section 4.1.3 this.ResultInfo.Add(@" Invalid IPV6 groupcount RFC5321 section 4.1.3 "); return false; } } else { if (currIndex != IPv6.LastIndexOf(double_colon)) { // More than one '::' this.ResultInfo.Add(@" IPV6 double double colon present RFC5321 section 4.1.3 "); return false; } if ((currIndex == 0) || (currIndex == (IPv6.Length - 2))) { groupMax++; // RFC 4291 allows :: at the start or end of an } // address with 7 other groups in addition if (groupCount > groupMax) { // Too many IPv6 groups in address this.ResultInfo.Add(@" Too many groups in section RFC5321 section 4.1.3 "); return false; } if (groupCount == groupMax) { // Eliding a single group with :: is deprecated by RFCs 5321 & 5952 // & 5952 this.ResultInfo.Add(@"Eliding a single group with :: is deprecated by RFCs 5321 & 5952"); } } // Check for single : at start and end of address // Revision 2.7: Daniel Marschall's new IPv6 testing strategy if (IPv6.StartsWith(colon) && (!IPv6.StartsWith(double_colon))) { // Address starts with a single colon this.ResultInfo.Add(@" IPV6 must start with a single colon RFC5321 section 4.1.3 "); return false; } if (IPv6.EndsWith(colon) && (!IPv6.EndsWith(double_colon))) { // Address ends with a single colon this.ResultInfo.Add(@" IPV6 must end with a single colon RFC5321 section 4.1.3 "); return false; } // Check for unmatched characters foreach (System.String s in matchesIP) { Regex goodStuff = new Regex("^[0-9A-Fa-f]{0,4}$"); if (goodStuff.Matches(s).Count == 0) { this.ResultInfo.Add(@" IPV6 address contains bad characters RFC5321 section 4.1.3 "); return false; } } // It's a valid IPv6 address, so... return true; // revision 2.1: bug fix: now correctly return warning status } else { // It's a domain name... // The syntax of a legal Internet host name was specified in RFC-952 // One aspect of host name syntax is hereby changed: the // restriction on the first character is relaxed to allow either a // letter or a digit. // (http://tools.ietf.org/html/rfc1123#section-2.1) // // NB RFC 1123 updates RFC 1035, but this is not currently apparent // from reading RFC 1035. // // Most common applications, including email and the Web, will // generally not // permit...escaped strings // (http://tools.ietf.org/html/rfc3696#section-2) // // the better strategy has now become to make the // "at least one period" test, // to verify LDH conformance (including verification that the // apparent TLD name // is not all-numeric) // (http://tools.ietf.org/html/rfc3696#section-2) // // Characters outside the set of alphabetic characters, digits, and // hyphen MUST NOT appear in domain name // labels for SMTP clients or servers // (http://tools.ietf.org/html/rfc5321#section-4.1.2) // // RFC5321 precludes the use of a trailing dot in a domain name for // SMTP purposes // (http://tools.ietf.org/html/rfc5321#section-4.1.2) Regex split2Regex = new Regex("(?m)\\.(?=(?:[^\\\"]*\\\"[^\\\"]*\\\")*(?![^\\\"]*\\\"))"); dotArray = split2Regex.Split(domain); partLength = 0; // Since we use 'element' after the foreach // loop let's make sure it has a value System.String lastElement = ""; // revision 1.13: Line above added because PHPLint now checks for // Definitely Assigned Variables if (dotArray.Length == 1) { this.ResultInfo.Add(@"The mail host probably isn't a TLD"); } // version 2.0: downgraded to a warning foreach (System.String element in dotArray) { string working_element = element; lastElement = element; // Remove any leading or trailing FWS Regex newReg = new Regex("^" + FWS + "|" + FWS + "$"); System.String new_element = newReg.Replace(working_element, string.Empty); if (!element.Equals(new_element)) { this.ResultInfo.Add(@"FWS is unlikely in the real world"); } working_element = new_element; // version 2.0: Warning condition added int elementLength = working_element.Length; // Each dot-delimited component must be of type atext // A zero-length element implies a period at the beginning or // end of the // local part, or two periods together. Either way it's not // allowed. if (elementLength == 0) { // Dots in wrong place this.ResultInfo.Add(@" Each dot-delimited component must be of type atext A zero-length element implies a period at the beginning or end of the local part, or two periods together. Either way it's not allowed. "); return false; } // revision 1.15: Speed up the test and get rid of // "uninitialized string offset" notices from PHP // Then we need to remove all valid comments (i.e. those at the // start or end of the element if (working_element.Substring(0, 1) == "(") { this.ResultInfo.Add(@"Comments are unlikely in the real world"); // version 2.0: Warning condition added int indexBrace = working_element.IndexOf(")"); if (indexBrace != -1) { Regex comments1Regex = new Regex("(?<!\\\\)[\\(\\)]"); if (comments1Regex.Matches(working_element.Substring(1, indexBrace - 1)).Count > 0) { // revision 1.17: Fixed name of constant (also // spotted by turboflash - thanks!) // Illegal characters in comment this.ResultInfo.Add(@" Illegal characters in comment "); return false; } working_element = working_element.Substring(indexBrace + 1, elementLength - indexBrace - 1); elementLength = working_element.Length; } } if (working_element.Substring(elementLength - 1, 1) == ")") { // Comments are unlikely in the real world // return_status = IsEMailResult.ISEMAIL_COMMENTS; // version 2.0: Warning condition added int indexBrace = working_element.LastIndexOf("("); if (indexBrace != -1) { Regex commentRegex = new Regex("(?<!\\\\)(?:[\\(\\)])"); if (commentRegex.Matches(working_element.Substring(indexBrace + 1, elementLength - indexBrace - 2)).Count > 0) { // revision 1.17: Fixed name of constant (also // spotted by turboflash - thanks!) // Illegal characters in comment this.ResultInfo.Add(@" Illegal characters in comment "); return false; } working_element = working_element.Substring(0, indexBrace); elementLength = working_element.Length; } } // Remove any leading or trailing FWS around the element (inside // any comments) Regex repRegex = new Regex("^" + FWS + "|" + FWS + "$"); new_element = repRegex.Replace(working_element, string.Empty); //if (!element.equals(new_element)) //{ // // FWS is unlikely in the real world // return_status = IsEMailResult.ISEMAIL_FWS; //} working_element = new_element; // version 2.0: Warning condition added // What's left counts towards the maximum length for this part if (partLength > 0) { partLength++; // for the dot } partLength += working_element.Length; // The DNS defines domain name syntax very generally -- a // string of labels each containing up to 63 8-bit octets, // separated by dots, and with a maximum total of 255 // octets. // (http://tools.ietf.org/html/rfc1123#section-6.1.3.5) if (elementLength > 63) { // Label must be 63 characters or less this.ResultInfo.Add(@" The DNS defines domain name syntax very generally -- a string of labels each containing up to 63 8-bit octets, separated by dots, and with a maximum total of 255 octets. (http://tools.ietf.org/html/rfc1123#section-6.1.3.5) "); return false; } // Any ASCII graphic (printing) character other than the // at-sign ("@"), backslash, double quote, comma, or square // brackets may // appear without quoting. If any of that list of excluded // characters // are to appear, they must be quoted // (http://tools.ietf.org/html/rfc3696#section-3) // // If the hyphen is used, it is not permitted to appear at // either the beginning or end of a label. // (http://tools.ietf.org/html/rfc3696#section-2) // // Any excluded characters? i.e. 0x00-0x20, (, ), <, >, [, ], :, // ;, @, \, comma, period, " Regex badChars = new Regex("[\\x00-\\x20\\(\\)<>\\[\\]:;@\\\\,\\.\"]|^-|-$"); if (badChars.Matches(working_element).Count > 0) { // Illegal character in domain name this.ResultInfo.Add(@" Illegal character in domain name "); return false; } } if (partLength > 255) { // Domain part must be 255 characters or less // (http://tools.ietf.org/html/rfc1123#section-6.1.3.5) this.ResultInfo.Add(@" Domain part must be 255 characters or less (http://tools.ietf.org/html/rfc1123#section-6.1.3.5) "); return false; } Regex foo = new Regex("^[0-9]+$"); if (foo.Matches(lastElement).Count > 0) { this.ResultInfo.Add(@"TLD probably isn't all-numeric (http://www.apps.ietf.org/rfc/rfc3696.html#sec-2) "); // version 2.0: Downgraded to a warning } } // Eliminate all other factors, and the one which remains must be the // truth. (Sherlock Holmes, The Sign of Four) return true; }
/// <summary> Replaces all instances of oldString with newString in line. /// Taken from the Jive forum package. /// * /// </summary> /// <param name="String">original string. /// </param> /// <param name="String">string in line to replace. /// </param> /// <param name="String">replace oldString with this. /// </param> /// <returns>String string with replacements. /// /// </returns> public static System.String sub(System.String line, System.String oldString, System.String newString) { int i = 0; if ((i = line.IndexOf(oldString, i)) >= 0) { char[] line2 = line.ToCharArray(); char[] newString2 = newString.ToCharArray(); int oLength = oldString.Length; System.Text.StringBuilder buf = new System.Text.StringBuilder(line2.Length); buf.Append(line2, 0, i).Append(newString2); i += oLength; int j = i; while ((i = line.IndexOf(oldString, i)) > 0) { buf.Append(line2, j, i - j).Append(newString2); i += oLength; j = i; } buf.Append(line2, j, line2.Length - j); return buf.ToString(); } return line; }
//**************************************************************************************************** public static SingleVar FromString(System.String str) { try { int ind1; int len = str.Length; if (len == 0) { return null; } char[] str1 = str.ToCharArray(); if (str1[0] == '\"') { if ((len < 2) || str1[len - 1] != '\"') { return null; } else { return new StringVar(new System.String(str1, 1, len - 2)); } } bool nsign = (str1[0] == '-'); if (nsign || (str1[0] == '+')) { ind1 = 1; } else { ind1 = 0; } if ((len <= ind1) || !System.Char.IsDigit(str1[ind1])) { return null; } int i_res = 0; for (; (ind1 < len) && System.Char.IsDigit(str1[ind1]); ind1++) { i_res *= 10; i_res += DigitVal(str1[ind1]); } if (ind1 == len) { if (nsign) { return new IntVar(- i_res); } else { return new IntVar(i_res); } } double fract = 0; int fractlen = 0; if (str1[ind1] == '.') { ind1++; for (; (ind1 < len) && System.Char.IsDigit(str1[ind1]); ind1++, fractlen++) { fract *= 10; fract += DigitVal(str1[ind1]); } for (; fractlen > 0; fractlen--) { fract /= 10; } } if (len == ind1) { if (nsign) { return new RealVar(- (i_res + fract)); } else { return new RealVar(i_res + fract); } } else if ((str1[ind1] == 'e') || (str1[ind1] == 'E')) { ind1++; if (ind1 == len) { return null; } int exp = 0; bool expnsign = (str1[ind1] == '-'); if ((str1[ind1] == '+') || expnsign) { ind1++; if (ind1 == len) { return null; } } for (; (ind1 < len) && System.Char.IsDigit(str1[ind1]); ind1++) { exp *= 10; exp += DigitVal(str1[ind1]); } if (ind1 != len) { return null; } else { double res = i_res + fract; if (expnsign) { for (; exp > 0; exp--) { res /= 10; } } else { for (; exp > 0; exp--) { res *= 10; } } if (nsign) { return new RealVar(- res); } else { return new RealVar(res); } } } else { return null; } } catch (Exception) { return null; } }
/// <summary> Optimized to run a bit faster than the static getDistance(). /// In one benchmark times were 5.3sec using ctr vs 8.5sec w/ static method, thus 37% faster. /// </summary> public TRStringDistance(System.String target) { sa = target.ToCharArray(); n = sa.Length; }
/// <summary> Stem a word provided as a String. Returns the result as a String.</summary> public virtual System.String Stem(System.String s) { if (Stem(s.ToCharArray(), s.Length)) { return ToString(); } else return s; }
/// <summary>Add this String into the set </summary> public virtual bool Add(System.String text) { return Add(text.ToCharArray()); }
/// <summary> It expands the morpheme chart to deal with the phoneme change phenomenon.</summary> /// <param name="from">- the index of the start segment position /// </param> /// <param name="front">- the front part of the string /// </param> /// <param name="back">- the next part of the string /// </param> /// <param name="ftag">- the morpheme tag of the front part /// </param> /// <param name="btag">- the morpheme tag of the next part /// </param> /// <param name="phoneme">- phoneme /// </param> public virtual void phonemeChange(int from, System.String front, System.String back, int ftag, int btag, int phoneme) { TNODE node = null; int size = 0; bool x, y; int next; int nc_idx; // searches the system dictionary for the front part node = systemDic.fetch(front.ToCharArray()); if (node != null && node.info_list != null) { size = node.info_list.Count; } Position pos = sp.getPosition(from); for (int i = 0; i < size; i++) { INFO info = node.info_list.Get_Renamed(i); // comparison of the morpheme tag of the front part x = tagSet.checkTagType(ftag, info.tag); // comparison of the phoneme of the front part y = tagSet.checkPhonemeType(phoneme, info.phoneme); if (x && y) { next = altSegment(back); if (checkChart(pos.morpheme, pos.morphCount, info.tag, info.phoneme, next, btag, front) == false) { nc_idx = addMorpheme(info.tag, info.phoneme, next, btag); chart[nc_idx].str = front; pos.morpheme[pos.morphCount++] = nc_idx; } else { System.Console.Error.WriteLine("phonemeChange: exit"); System.Environment.Exit(0); } } } }