/// <summary> /// Creates a new instance of <see cref="NodeInfo"/>. /// </summary> /// <param name="element">The XML element.</param> /// <param name="parentSignature">The signature of the parent XML node.</param> /// <param name="predicate">If true, the <see cref="XElement"/> will not be loaded.</param> /// <param name="schemaRegistry">Stores the schema of the XML node.</param> private NodeInfo(XElement element, string parentSignature, Func <XElement, bool> predicate, SchemaRegistry schemaRegistry) { var name = element.Name.LocalName; var signature = parentSignature != null ? $"{parentSignature}.{name}" : name; var children = element.Elements().Where(predicate).Select(childElement => new NodeInfo(childElement, signature, predicate, schemaRegistry)); var properties = new Dictionary <string, Bigram>(); var text = (element.FirstNode as XText)?.Value; foreach (var attribute in element.Attributes()) { properties[attribute.Name.LocalName] = new Bigram(attribute.Value); } if (text != null) { properties["&text"] = new Bigram(text); } schemaRegistry.AddPropertyNames(signature, properties.Keys); Children = children; Element = element; Properties = properties; Signature = signature; }
/// <summary> /// Formats the specified properties to match the current schema. /// </summary> /// <param name="properties">The properties to be formatted.</param> /// <param name="id">The ID of the current schema.</param> /// <returns>The formatted properties.</returns> internal ImmutableArray <Bigram> Format(IReadOnlyDictionary <string, Bigram> properties, out int id) { id = Id; var count = 0; var values = new Bigram[PropertyNames.Count]; foreach (var propertyName in PropertyNames) { values[count++] = properties.TryGetValue(propertyName, out var value) ? value : Bigram.Empty; } return(values.ToImmutableArray()); }
private static float Compare(Bigram a, Bigram b) { if (a.Tokens.IsEmpty && b.Tokens.IsEmpty) { return(1.0f); } if (a.Tokens.IsEmpty || b.Tokens.IsEmpty) { return(0.0f); } var enumeratorA = a.Tokens.GetEnumerator(); var enumeratorB = b.Tokens.GetEnumerator(); var moveNextA = enumeratorA.MoveNext(); var moveNextB = enumeratorB.MoveNext(); var count = 0; while (moveNextA && moveNextB) { var comparison = enumeratorA.Current.CompareTo(enumeratorB.Current); if (comparison < 0) { moveNextA = enumeratorA.MoveNext(); } else if (comparison > 0) { moveNextB = enumeratorB.MoveNext(); } else { count++; moveNextA = enumeratorA.MoveNext(); moveNextB = enumeratorB.MoveNext(); } } return((float)count / Math.Max(a.Tokens.Length, b.Tokens.Length)); }
/// <summary> /// Compares the current bigram to the specified bigram using the Sørensen–Dice coefficient. /// </summary> /// <param name="other">The other bigram.</param> /// <returns>A value between 0 and 1, where 1 is a perfect match.</returns> internal float CompareTo(Bigram other) { return(Compare(this, other)); }