public override IEnumerable <Row> Process(RowSet input_rowset, Row output_row, string[] args) { NameFilter name_filter = new NameFilter(); foreach (Row input_row in input_rowset.Rows) { string input_string = input_row["Directors"].String; string[] input_split = input_string.Split(name_filter.SplitChars, StringSplitOptions.RemoveEmptyEntries); List <string> input_splited = new List <string>(input_split); //input_splited.Add(input_string); foreach (string item in input_splited) { string name = item.Trim(); if (name_filter.isAppropriateLanguage(name) && name_filter.isAppropriateLength(name, 1, 6) && !name_filter.isNoiseEntity(name)) //if (name_filter.isAppropriateLanguage(name) && !name_filter.isNoiseEntity(name)) { output_row["Directors"].Set(name); yield return(output_row); } else { // for debug output_row["Directors"].Set("000DirectorNameProcessor" + name); //yield return output_row; DebugStream.WriteLine(output_row.ToString()); } } } }
/// <summary> /// Write the JSON message to the output stream. /// </summary> /// <param name="message"> /// The message to write. /// </param> private void WriteJsonMessage(string message) { // Build the header message. var header = string.Format("Content-Length: {0}{1}{2}{1}{2}", message.Length, (char)0x0D, (char)0x0A); var headerData = Encoding.UTF8.GetBytes(header); SourceStream.Write(headerData, 0, headerData.Length); // Write the body data. var bodyData = Encoding.UTF8.GetBytes(message); SourceStream.Write(bodyData, 0, bodyData.Length); SourceStream.Flush(); // debug the new message. if (DebugStream != null) { lock (DebugStream) { DebugStream.WriteLine("*****************************************"); DebugStream.WriteLine(" Direction: {0}", Direction); DebugStream.WriteLine(" Timestamp: {0}", DateTime.Now); DebugStream.WriteLine("*****************************************"); DebugStream.WriteLine(message); DebugStream.WriteLine("*****************************************"); DebugStream.Flush(); } } }
public override IEnumerable <Row> Process(RowSet input_rowset, Row output_row, string[] args) { NameFilter name_filter = new NameFilter(); foreach (Row input_row in input_rowset.Rows) { string input_string = input_row["CountryName"].String; string[] input_split = input_string.Split(name_filter.SplitChars); foreach (string item in input_split) { string name = item.Trim(); if (NameFilter.isAppropriateLanguage(name) && NameFilter.isAppropriateLength(name, 1, 6)) { output_row["CountryName"].Set(name); yield return(output_row); } else { // for debug output_row["CountryName"].Set("000CountryNameProcessor" + name); //yield return output_row; DebugStream.WriteLine(output_row.ToString()); } } } }
// Compares two strings using the specified operator public static bool CompareStringProperty(GoalProperty <string> prop, string value, bool compareUrls = false) { if (prop == null || String.IsNullOrWhiteSpace(prop.PropertyValue) || String.IsNullOrWhiteSpace(value)) { return(false); } var propertyValue = prop.PropertyValue; value = value.Trim(); if ((int)prop.ComparisonOperator == 7) { // Hard coded & int casted because we will soon be dropping Conman + rewriting/merging the Operator enums // 7: Operator value denoting "Contains" eg does the destination url contain the given property value return(value.IndexOf(propertyValue, StringComparison.OrdinalIgnoreCase) != -1); } if (prop.ComparisonOperator == GoalComparisonOperator.RegularExpression) { if (!prop.RegexAssigned) { prop.RegexAssigned = true; var strippedPropertyValue = propertyValue.TrimEnd('\\', '/'); if (strippedPropertyValue.StartsWith("*")) { // One common mistake in the customer defined url regular expression is forgetting to add the '.' before '*'. // So ".*thankyou.html" is correct but "*thankyou.html" will throw exception during new Regex operation. strippedPropertyValue = "." + strippedPropertyValue; } else if (strippedPropertyValue.StartsWith("?")) { // One common mistake in the customer defined url regular expression is forgetting to add the '\' before '?'. // So "\?thankyou.html" is correct but "?thankyou.html" will throw exception during new Regex operation. strippedPropertyValue = @"\" + strippedPropertyValue; } if (strippedPropertyValue.Contains(@"\_")) { // Another common mistake in the customer defined url regular expression is that they use '\_' to represent '_', // while in C# _ is not a escape character. strippedPropertyValue = strippedPropertyValue.Replace(@"\_", @"_"); strippedPropertyValue = strippedPropertyValue.Replace(@"\_", @"\\_"); // We are seeing an "\\_" case and should not replace this one. } try { // Original code first try non-trimmed first then trimmed, which is unnecessary. We can try directly the trimmed. prop.Regex = new Regex(strippedPropertyValue, RegexOptions.IgnoreCase, RegexTimeOut); } catch (ArgumentException) { // TODO: UI needs to validate the regex // DebugStream.WriteLine("Failed to parse regex " + propertyValue); return(false); } } if (prop.Regex != null) { try { return(prop.Regex.IsMatch(value)); } catch (RegexMatchTimeoutException) { DebugStream.WriteLine("Timeout occured for " + propertyValue + " when matching " + value); } } return(false); } var result = false; switch (prop.ComparisonOperator) { case GoalComparisonOperator.EqualsTo: result = String.Equals(value, propertyValue, StringComparison.OrdinalIgnoreCase); break; case GoalComparisonOperator.BeginsWith: result = value.StartsWith(propertyValue, StringComparison.OrdinalIgnoreCase); break; } if (!result && compareUrls) { // If the original strings do not match, try matching pure URLs (without http or www) var strippedPropertyValue = StripHeaders(propertyValue.TrimEnd('/')); var strippedValue = StripHeaders(value.TrimEnd('/')); switch (prop.ComparisonOperator) { case GoalComparisonOperator.EqualsTo: result = String.Equals(strippedValue, strippedPropertyValue, StringComparison.OrdinalIgnoreCase); break; case GoalComparisonOperator.BeginsWith: result = strippedValue.StartsWith(strippedPropertyValue, StringComparison.OrdinalIgnoreCase); break; } } return(result); }