/// <summary> /// Adapt the value specified in the context /// </summary> /// <param name="context">Context information</param> /// <param name="currentValue">Value which might have been adapted</param> /// <returns>The new value (or same as the current value if no modification has been made)</returns> public object Adapt(ValueAdapterContext context, object currentValue) { if (context == null) { throw new ArgumentNullException("context"); } if (currentValue == null || context.ContextName != "ApplicationInfo") { return(currentValue); } if (context.PropertyName == "MainModule") { if (currentValue.ToString().StartsWith("System.Diagnostics.ProcessModule")) { return(currentValue.ToString() .Substring("System.Diagnostics.ProcessModule".Length) .Trim(' ', '(', ')')); } } if (MemoryProperties.Any(x => x.Equals(context.PropertyName, StringComparison.OrdinalIgnoreCase))) { var value = 0; if (!int.TryParse(currentValue.ToString(), out value)) { return(currentValue); } value = value / 1000000; return(MemoryNormalizer.Divide(value, context.TypeOfApplication == "Mobile" ? 32 : 512)); } if (context.PropertyName == "ThreadCount") { return(NumberNormalizer.Normalize(currentValue.ToString(), 10, 50)); } if (context.PropertyName == "HandleCount") { return(NumberNormalizer.Normalize(currentValue.ToString(), 100, 5000)); } if (context.PropertyName == "StartTime") { DateTime dt; if (!DateTime.TryParse(currentValue.ToString(), out dt)) { return(currentValue); } context.IgnoreProperty = true; context.AddCustomField("ApplicationInfo", "StartTime.Hour", dt.Hour); context.AddCustomField("ApplicationInfo", "StartTime.DayOfWeek", dt.DayOfWeek.ToString()); } context.IgnoreProperty = true; return(currentValue); }
public virtual IList <MatchedExpression> Extract(ICoreMap annotation) { if (!annotation.ContainsKey(typeof(CoreAnnotations.NumerizedTokensAnnotation))) { IList <ICoreMap> mergedNumbers = NumberNormalizer.FindAndMergeNumbers(annotation); annotation.Set(typeof(CoreAnnotations.NumerizedTokensAnnotation), mergedNumbers); } return(extractor.ExtractExpressions(annotation)); }
public virtual IList <TimeExpression> ExtractTimeExpressions(ICoreMap annotation, SUTime.Time refDate, SUTime.TimeIndex timeIndex) { if (!annotation.ContainsKey(typeof(CoreAnnotations.NumerizedTokensAnnotation))) { try { IList <ICoreMap> mergedNumbers = NumberNormalizer.FindAndMergeNumbers(annotation); annotation.Set(typeof(CoreAnnotations.NumerizedTokensAnnotation), mergedNumbers); } catch (NumberFormatException e) { logger.Warn("Caught bad number: " + e.Message); annotation.Set(typeof(CoreAnnotations.NumerizedTokensAnnotation), new List <ICoreMap>()); } } IList <MatchedExpression> matchedExpressions = expressionExtractor.ExtractExpressions(annotation); IList <TimeExpression> timeExpressions = new List <TimeExpression>(matchedExpressions.Count); foreach (MatchedExpression expr in matchedExpressions) { // Make sure we have the correct type (instead of just MatchedExpression) //timeExpressions.add(TimeExpression.TimeExpressionConverter.apply(expr)); // TODO: Fix the extraction pipeline so it creates TimeExpression instead of MatchedExpressions // For now, grab the time expression from the annotation (this is good, so we don't have duplicate copies) TimeExpression annoTe = expr.GetAnnotation().Get(typeof(TimeExpression.Annotation)); if (annoTe != null) { timeExpressions.Add(annoTe); } } // We cache the document date in the timeIndex if (timeIndex.docDate == null) { if (refDate != null) { timeIndex.docDate = refDate; } else { if (options.searchForDocDate) { // there was no document date but option was set to look for document date timeIndex.docDate = FindReferenceDate(timeExpressions); } } } // Didn't have a reference date - try using cached doc date if (refDate == null) { refDate = timeIndex.docDate; } // Some resolving is done even if refDate null... ResolveTimeExpressions(annotation, timeExpressions, refDate); if (options.restrictToTimex3) { // Keep only TIMEX3 compatible timeExpressions IList <TimeExpression> kept = new List <TimeExpression>(timeExpressions.Count); foreach (TimeExpression te in timeExpressions) { if (te.GetTemporal() != null && te.GetTemporal().GetTimexValue() != null) { kept.Add(te); } else { IList <ICoreMap> children = te.GetAnnotation().Get(typeof(TimeExpression.ChildrenAnnotation)); if (children != null) { foreach (ICoreMap child in children) { TimeExpression childTe = child.Get(typeof(TimeExpression.Annotation)); if (childTe != null) { ResolveTimeExpression(annotation, childTe, refDate); if (childTe.GetTemporal() != null && childTe.GetTemporal().GetTimexValue() != null) { kept.Add(childTe); } } } } } } timeExpressions = kept; } // Add back nested time expressions for ranges.... // For now only one level of nesting... if (options.includeNested) { IList <TimeExpression> nestedTimeExpressions = new List <TimeExpression>(); foreach (TimeExpression te in timeExpressions) { if (te.IsIncludeNested()) { IList <ICoreMap> children = te.GetAnnotation().Get(typeof(TimeExpression.ChildrenAnnotation)); if (children != null) { foreach (ICoreMap child in children) { TimeExpression childTe = child.Get(typeof(TimeExpression.Annotation)); if (childTe != null) { nestedTimeExpressions.Add(childTe); } } } } } ResolveTimeExpressions(annotation, nestedTimeExpressions, refDate); Sharpen.Collections.AddAll(timeExpressions, nestedTimeExpressions); } timeExpressions.Sort(MatchedExpression.ExprTokenOffsetsNestedFirstComparator); // Some resolving is done even if refDate null... ResolveTimeExpressions(annotation, timeExpressions, refDate); return(timeExpressions); }
public object Adapt(ValueAdapterContext context, object currentValue) { if (context == null) { throw new ArgumentNullException("context"); } var val = currentValue as string; if (string.IsNullOrEmpty(val)) { return(currentValue); } if (!context.ContextName.Equals("OperatingSystem", StringComparison.OrdinalIgnoreCase)) { return(currentValue); } context.IgnoreProperty = true; if (context.PropertyName.Equals("OperatingSystemSKU", StringComparison.OrdinalIgnoreCase)) { var name = OperatingSystemSku.GetName(currentValue.ToString()) ?? currentValue; context.AddCustomField("OS.Metadata", "Edition", name); } if (context.PropertyName.Equals("CSDVersion", StringComparison.OrdinalIgnoreCase)) { context.AddCustomField("OS.Metadata", "ServicePack", currentValue.ToString()); return(currentValue); } if (context.PropertyName.Equals("OSProductSuite", StringComparison.OrdinalIgnoreCase)) { var value = OsProductSuite.GetNames(currentValue.ToString()) ?? currentValue; context.AddCustomField("OS.Metadata", "ProductSuite", value); return(currentValue); } if (context.PropertyName.Equals("ProductType", StringComparison.OrdinalIgnoreCase)) { switch (currentValue.ToString()) { case "1": currentValue = "Work Station"; break; case "2": currentValue = "Domain Controller"; break; case "3": currentValue = "Server"; break; } context.AddCustomField("OS.Metadata", "ProductType", currentValue); return(currentValue); } if (context.PropertyName.Equals("SuiteMask", StringComparison.OrdinalIgnoreCase)) { var bitMask = 0; var value = ""; if (int.TryParse(currentValue.ToString(), out bitMask)) { if ((bitMask & 1) != 0) { value += "Small Business, "; } if ((bitMask & 2) != 0) { value += "Enterprise, "; } if ((bitMask & 4) != 0) { value += "BackOffice, "; } if ((bitMask & 8) != 0) { value += "Communications, "; } if ((bitMask & 16) != 0) { value += "Terminal Services, "; } if ((bitMask & 32) != 0) { value += "Small Business Restricted, "; } if ((bitMask & 64) != 0) { value += "Embedded Edition, "; } if ((bitMask & 128) != 0) { value += "Datacenter Edition, "; } if ((bitMask & 256) != 0) { value += "Single User, "; } if ((bitMask & 512) != 0) { value += "Home Edition, "; } if ((bitMask & 1024) != 0) { value += "Web Server Edition, "; } } if (value != "") { currentValue = value.Remove(value.Length - 2, 2); } context.AddCustomField("OS.Metadata", "Suite2", currentValue); return(currentValue); } if (context.PropertyName.Equals("QuantumLength", StringComparison.OrdinalIgnoreCase)) { switch (currentValue.ToString()) { case "0": currentValue = "Unknown "; break; case "1": currentValue = "One tick"; break; case "2": currentValue = "Two ticks"; break; } context.AddCustomField("OS.Metadata", "QuantumLength", currentValue); return(currentValue); } if (context.PropertyName.Equals("QuantumType", StringComparison.OrdinalIgnoreCase)) { switch (currentValue.ToString()) { case "0": currentValue = "Unknown "; break; case "1": currentValue = "Fixed"; break; case "2": currentValue = "Variable"; break; } context.AddCustomField("OS.Metadata", "QuantumType", currentValue); return(currentValue); } if (context.PropertyName == "InstallDate") { return(AdaptInstallDate(context, currentValue)); } if (context.PropertyName == "LastBootUpTime") { DateTime time; if (WmiDateConverter.TryParse(context.Value.ToString(), out time)) { context.AddCustomField("OS.Environment", "LastBootup.Hour", time.Hour); context.AddCustomField("OS.Environment", "LastBootup.DayOfWeek", time.DayOfWeek.ToString()); } } if (context.PropertyName == "LocalDateTime") { return(AdaptLocalTime(context, currentValue)); } if (context.PropertyName.StartsWith("Free", StringComparison.OrdinalIgnoreCase) || context.PropertyName.StartsWith("Total", StringComparison.OrdinalIgnoreCase) || context.PropertyName.Equals("SizeStoredInPagingFiles", StringComparison.OrdinalIgnoreCase)) { var divisor = context.TypeOfApplication == "Server" ? 512 : 256; var value = MemoryNormalizer.Divide(currentValue as string, divisor); if (!string.IsNullOrEmpty(value)) { context.AddCustomField("OS.Environment", context.PropertyName, value); } return(currentValue); } //allow as-is if (context.PropertyName.StartsWith("DataExecutionPrevention", StringComparison.OrdinalIgnoreCase)) { context.AddCustomField("OS.Metadata", context.PropertyName, currentValue); return(currentValue); } if (LocalizationProperties.Any(x => x.Equals(context.PropertyName, StringComparison.OrdinalIgnoreCase))) { if (context.PropertyName == "Locale") { int lcid; if (int.TryParse(currentValue.ToString(), NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo, out lcid)) { currentValue = CultureInfo.GetCultureInfo(lcid).Name; } } context.AddCustomField("OS.Localization", context.PropertyName, currentValue); return(null); } if (OsEnvironment.Any(x => x.Equals(context.PropertyName, StringComparison.OrdinalIgnoreCase))) { if (context.PropertyName == "NumberOfUsers") { if ("0".Equals(currentValue)) { currentValue = "0"; } else if ("1".Equals(currentValue)) { currentValue = "1"; } else { currentValue = "> 1"; } context.AddCustomField("OS.Environment", "NumberOfLoggedInUsers", currentValue); return(currentValue); } if (context.PropertyName == "NumberOfProcesses") { currentValue = NumberNormalizer.Normalize(currentValue as string, 20, 1000); } context.AddCustomField("OS.Environment", context.PropertyName, currentValue); return(currentValue); } return(currentValue); }
private static void RelationSpecificFeatures(KBPRelationExtractor.KBPInput input, Sentence sentence, ClassicCounter <string> feats) { if (input.objectType.Equals(KBPRelationExtractor.NERTag.Number)) { // Bucket the object value if it is a number // This is to prevent things like "age:9000" and to soft penalize "age:one" // The following features are extracted: // 1. Whether the object parses as a number (should always be true) // 2. Whether the object is an integer // 3. If the object is an integer, around what value is it (bucketed around common age values) // 4. Was the number spelled out, or written as a numeric number try { Number number = NumberNormalizer.WordToNumber(input.GetObjectText()); if (number != null) { Indicator(feats, "obj_parsed_as_num", "t"); if (number.Equals(number)) { Indicator(feats, "obj_isint", "t"); int numAsInt = number; string bucket = "<0"; if (numAsInt == 0) { bucket = "0"; } else { if (numAsInt == 1) { bucket = "1"; } else { if (numAsInt < 5) { bucket = "<5"; } else { if (numAsInt < 18) { bucket = "<18"; } else { if (numAsInt < 25) { bucket = "<25"; } else { if (numAsInt < 50) { bucket = "<50"; } else { if (numAsInt < 80) { bucket = "<80"; } else { if (numAsInt < 125) { bucket = "<125"; } else { if (numAsInt >= 100) { bucket = ">125"; } } } } } } } } } Indicator(feats, "obj_number_bucket", bucket); } else { Indicator(feats, "obj_isint", "f"); } if (Sharpen.Runtime.EqualsIgnoreCase(input.GetObjectText().Replace(",", string.Empty), number.ToString())) { Indicator(feats, "obj_spelledout_num", "f"); } else { Indicator(feats, "obj_spelledout_num", "t"); } } else { Indicator(feats, "obj_parsed_as_num", "f"); } } catch (NumberFormatException) { Indicator(feats, "obj_parsed_as_num", "f"); } // Special case dashes and the String "one" if (input.GetObjectText().Contains("-")) { Indicator(feats, "obj_num_has_dash", "t"); } else { Indicator(feats, "obj_num_has_dash", "f"); } if (Sharpen.Runtime.EqualsIgnoreCase(input.GetObjectText(), "one")) { Indicator(feats, "obj_num_is_one", "t"); } else { Indicator(feats, "obj_num_is_one", "f"); } } if ((input.subjectType == KBPRelationExtractor.NERTag.Person && input.objectType.Equals(KBPRelationExtractor.NERTag.Organization)) || (input.subjectType == KBPRelationExtractor.NERTag.Organization && input.objectType.Equals(KBPRelationExtractor.NERTag .Person))) { // Try to capture some denser features for employee_of // These are: // 1. Whether a TITLE tag occurs either before, after, or inside the relation span // 2. Whether a top employee trigger occurs either before, after, or inside the relation span Span relationSpan = Span.Union(input.subjectSpan, input.objectSpan); // (triggers before span) for (int i = Math.Max(0, relationSpan.Start() - 5); i < relationSpan.Start(); ++i) { if ("TITLE".Equals(sentence.NerTag(i))) { Indicator(feats, "title_before", "t"); } if (TopEmployeeTriggers.Contains(sentence.Word(i).ToLower())) { Indicator(feats, "top_employee_trigger_before", "t"); } } // (triggers after span) for (int i_1 = relationSpan.End(); i_1 < Math.Min(sentence.Length(), relationSpan.End()); ++i_1) { if ("TITLE".Equals(sentence.NerTag(i_1))) { Indicator(feats, "title_after", "t"); } if (TopEmployeeTriggers.Contains(sentence.Word(i_1).ToLower())) { Indicator(feats, "top_employee_trigger_after", "t"); } } // (triggers inside span) foreach (int i_2 in relationSpan) { if ("TITLE".Equals(sentence.NerTag(i_2))) { Indicator(feats, "title_inside", "t"); } if (TopEmployeeTriggers.Contains(sentence.Word(i_2).ToLower())) { Indicator(feats, "top_employee_trigger_inside", "t"); } } } }