/// <summary> /// Calculates the Hamming distance of this instance to a specified instance. /// </summary> /// <param name="vector">The instance.</param> /// <returns>The Hamming distance.</returns> /// <remarks></remarks> public int HammingDistance(InstanceS vector) { int distance, len; if (Features.Length != vector.Features.Length) { return(int.MaxValue); } if (Features.Length < vector.Features.Length) { distance = vector.Features.Length - Features.Length; len = Features.Length; } else { distance = Features.Length - vector.Features.Length; len = vector.Features.Length; } for (int i = 0; i < len; i++) { distance += (Features[i] != vector.Features[i]) ? 1 : 0; } return(distance); }
/// <summary> /// Calculates the weighted Hamming distance of this instance to a specified instance. /// </summary> /// <param name="vector">The instance.</param> /// <returns>The weighted Hamming distance.</returns> /// <remarks></remarks> public float ConfusionWeightedHammingDistance(InstanceS vector) { if (Features.Length != vector.Features.Length) return float.MaxValue; // empirical confusion probabilities var confusions = new Dictionary<string, float>() { { "F, E", 0.314f }, { "E, F", 0.147f }, { "Q, O", 0.125f }, { "O, Q", 0.125f }, { "D, O", 0.105f }, { "O, D", 0.105f }, { "I, J", 0.105f }, { "R, K", 0.118f }, { "N, H", 0.102f }, { "P, F", 0.094f }, { "H, N", 0.093f }, { "L, E", 0.090f }, { "I, Z", 0.085f }, { "J, I", 0.080f }, { "X, Z", 0.074f }, { "Y, A", 0.073f }, { "X, I", 0.072f }, { "A, V", 0.063f }, { "N, O", 0.057f }, { "V, A", 0.057f }, { "W, M", 0.055f }, { "A, Y", 0.054f }, { "B, R", 0.053f }, { "Z, I", 0.052f } }; float distance = 0; int errCount = 0; const float confusionMultiplier = 2.5f; for (int i = 0; i < Features.Length; i++) { if (Features[i] != vector.Features[i]) { string conf = "" + vector.Features[i] + ", " + Features[i]; if (confusions.ContainsKey(conf)) { distance += Math.Max(0, 1 - (confusionMultiplier * confusions[conf])); } else { distance += 1; } errCount += 1; } } distance += (1 - Weight) * errCount * 0.5f; return distance; }
/// <summary> /// Calculates the Hamming distance of this instance to a specified instance. /// </summary> /// <param name="vector">The instance.</param> /// <returns>The Hamming distance.</returns> /// <remarks></remarks> public int HammingDistance(InstanceS vector) { int distance, len; if (Features.Length != vector.Features.Length) return int.MaxValue; if (Features.Length < vector.Features.Length) { distance = vector.Features.Length - Features.Length; len = Features.Length; } else { distance = Features.Length - vector.Features.Length; len = vector.Features.Length; } for (int i = 0; i < len; i++) { distance += (Features[i] != vector.Features[i]) ? 1 : 0; } return distance; }
/// <summary> /// Finds the K nearest instances to a test instance. String based version. /// </summary> /// <param name="test">The test instance.</param> /// <returns>An array of the k-nearest results.</returns> /// <remarks></remarks> public Result[] FindKNearest(InstanceS test) { List<Result> results = new List<Result>(); foreach (InstanceS train in TrainingInstances) { double dist = double.MaxValue; switch (Metric) { case Metric.HammingDistance: dist = train.HammingDistance(test); break; case Metric.WeightedHammingDistance: dist = train.ConfusionWeightedHammingDistance(test); break; default: throw new NotSupportedException(); } results.Add(new Result(train.Class, dist)); } int k = (int)Math.Min(K, TrainingInstances.Count); results.Sort(); Result[] knr = new Result[k]; results.CopyTo(0, knr, 0, k); return knr; }
private void setupWordClassifier(string wordsPath, string wordProbPath) { Game.ValidWords = new WordDict(wordsPath); WordClassifier = new KNearestClassifier(1, Metric.WeightedHammingDistance, WeightMode.Modal); WordClassifier.TrainingInstances = new List<Instance>(); string[] lines = File.ReadAllLines(wordProbPath); foreach (string line in lines) { string[] word = line.Split(' '); InstanceS x = new InstanceS(word[0]); x.Weight = float.Parse(word[1]); WordClassifier.TrainingInstances.Add(x); } }
/// <summary> /// Calculates the weighted Hamming distance of this instance to a specified instance. /// </summary> /// <param name="vector">The instance.</param> /// <returns>The weighted Hamming distance.</returns> /// <remarks></remarks> public float ConfusionWeightedHammingDistance(InstanceS vector) { if (Features.Length != vector.Features.Length) { return(float.MaxValue); } // empirical confusion probabilities var confusions = new Dictionary <string, float>() { { "F, E", 0.314f }, { "E, F", 0.147f }, { "Q, O", 0.125f }, { "O, Q", 0.125f }, { "D, O", 0.105f }, { "O, D", 0.105f }, { "I, J", 0.105f }, { "R, K", 0.118f }, { "N, H", 0.102f }, { "P, F", 0.094f }, { "H, N", 0.093f }, { "L, E", 0.090f }, { "I, Z", 0.085f }, { "J, I", 0.080f }, { "X, Z", 0.074f }, { "Y, A", 0.073f }, { "X, I", 0.072f }, { "A, V", 0.063f }, { "N, O", 0.057f }, { "V, A", 0.057f }, { "W, M", 0.055f }, { "A, Y", 0.054f }, { "B, R", 0.053f }, { "Z, I", 0.052f } }; float distance = 0; int errCount = 0; const float confusionMultiplier = 2.5f; for (int i = 0; i < Features.Length; i++) { if (Features[i] != vector.Features[i]) { string conf = "" + vector.Features[i] + ", " + Features[i]; if (confusions.ContainsKey(conf)) { distance += Math.Max(0, 1 - (confusionMultiplier * confusions[conf])); } else { distance += 1; } errCount += 1; } } distance += (1 - Weight) * errCount * 0.5f; return(distance); }