예제 #1
0
        /// <summary>
        /// Calculates the Hamming distance of this instance to a specified instance.
        /// </summary>
        /// <param name="vector">The instance.</param>
        /// <returns>The Hamming distance.</returns>
        /// <remarks></remarks>
        public int HammingDistance(InstanceS vector)
        {
            int distance, len;

            if (Features.Length != vector.Features.Length)
            {
                return(int.MaxValue);
            }

            if (Features.Length < vector.Features.Length)
            {
                distance = vector.Features.Length - Features.Length;
                len      = Features.Length;
            }
            else
            {
                distance = Features.Length - vector.Features.Length;
                len      = vector.Features.Length;
            }

            for (int i = 0; i < len; i++)
            {
                distance += (Features[i] != vector.Features[i]) ? 1 : 0;
            }

            return(distance);
        }
예제 #2
0
        /// <summary>
        /// Calculates the weighted Hamming distance of this instance to a specified instance.
        /// </summary>
        /// <param name="vector">The instance.</param>
        /// <returns>The weighted Hamming distance.</returns>
        /// <remarks></remarks>
        public float ConfusionWeightedHammingDistance(InstanceS vector)
        {
            if (Features.Length != vector.Features.Length) return float.MaxValue;

            // empirical confusion probabilities
            var confusions = new Dictionary<string, float>()
            {
                { "F, E", 0.314f },
                { "E, F", 0.147f },
                { "Q, O", 0.125f },
                { "O, Q", 0.125f },
                { "D, O", 0.105f },
                { "O, D", 0.105f },
                { "I, J", 0.105f },
                { "R, K", 0.118f },
                { "N, H", 0.102f },
                { "P, F", 0.094f },
                { "H, N", 0.093f },
                { "L, E", 0.090f },
                { "I, Z", 0.085f },
                { "J, I", 0.080f },
                { "X, Z", 0.074f },
                { "Y, A", 0.073f },
                { "X, I", 0.072f },
                { "A, V", 0.063f },
                { "N, O", 0.057f },
                { "V, A", 0.057f },
                { "W, M", 0.055f },
                { "A, Y", 0.054f },
                { "B, R", 0.053f },
                { "Z, I", 0.052f }
            };


            float distance = 0;
            int errCount = 0;
            const float confusionMultiplier = 2.5f;

            for (int i = 0; i < Features.Length; i++)
            {
                if (Features[i] != vector.Features[i])
                {
                    string conf = "" + vector.Features[i] + ", " + Features[i];
                    if (confusions.ContainsKey(conf))
                    {
                        distance += Math.Max(0, 1 - (confusionMultiplier * confusions[conf]));
                    }
                    else
                    {
                        distance += 1;
                    }
                    errCount += 1;
                }
            }

            distance += (1 - Weight) * errCount * 0.5f;

            return distance;
        }
예제 #3
0
        /// <summary>
        /// Calculates the Hamming distance of this instance to a specified instance.
        /// </summary>
        /// <param name="vector">The instance.</param>
        /// <returns>The Hamming distance.</returns>
        /// <remarks></remarks>
        public int HammingDistance(InstanceS vector)
        {
            int distance, len;

            if (Features.Length != vector.Features.Length) 
                return int.MaxValue;

            if (Features.Length < vector.Features.Length)
            {
                distance = vector.Features.Length - Features.Length;
                len = Features.Length;
            }
            else
            {
                distance = Features.Length - vector.Features.Length;
                len = vector.Features.Length;
            }

            for (int i = 0; i < len; i++)
            {
                distance += (Features[i] != vector.Features[i]) ? 1 : 0;
            }

            return distance;
        }
예제 #4
0
        /// <summary>
        /// Finds the K nearest instances to a test instance. String based version.
        /// </summary>
        /// <param name="test">The test instance.</param>
        /// <returns>An array of the k-nearest results.</returns>
        /// <remarks></remarks>
        public Result[] FindKNearest(InstanceS test)
        {
            List<Result> results = new List<Result>();

            foreach (InstanceS train in TrainingInstances)
            {
                double dist = double.MaxValue;

                switch (Metric)
                {
                    case Metric.HammingDistance: dist = train.HammingDistance(test); break;
                    case Metric.WeightedHammingDistance: dist = train.ConfusionWeightedHammingDistance(test); break;
                    default: throw new NotSupportedException();
                }

                results.Add(new Result(train.Class, dist));
            }

            int k = (int)Math.Min(K, TrainingInstances.Count);
            results.Sort();
            Result[] knr = new Result[k];
            results.CopyTo(0, knr, 0, k);

            return knr;
        }
예제 #5
0
        private void setupWordClassifier(string wordsPath, string wordProbPath)
        {
            Game.ValidWords = new WordDict(wordsPath);

            WordClassifier = new KNearestClassifier(1, Metric.WeightedHammingDistance, WeightMode.Modal);
            WordClassifier.TrainingInstances = new List<Instance>();
            string[] lines = File.ReadAllLines(wordProbPath);

            foreach (string line in lines)
            {
                string[] word = line.Split(' ');
                InstanceS x = new InstanceS(word[0]);
                x.Weight = float.Parse(word[1]);
                WordClassifier.TrainingInstances.Add(x);
            }
        }
예제 #6
0
        /// <summary>
        /// Calculates the weighted Hamming distance of this instance to a specified instance.
        /// </summary>
        /// <param name="vector">The instance.</param>
        /// <returns>The weighted Hamming distance.</returns>
        /// <remarks></remarks>
        public float ConfusionWeightedHammingDistance(InstanceS vector)
        {
            if (Features.Length != vector.Features.Length)
            {
                return(float.MaxValue);
            }

            // empirical confusion probabilities
            var confusions = new Dictionary <string, float>()
            {
                { "F, E", 0.314f },
                { "E, F", 0.147f },
                { "Q, O", 0.125f },
                { "O, Q", 0.125f },
                { "D, O", 0.105f },
                { "O, D", 0.105f },
                { "I, J", 0.105f },
                { "R, K", 0.118f },
                { "N, H", 0.102f },
                { "P, F", 0.094f },
                { "H, N", 0.093f },
                { "L, E", 0.090f },
                { "I, Z", 0.085f },
                { "J, I", 0.080f },
                { "X, Z", 0.074f },
                { "Y, A", 0.073f },
                { "X, I", 0.072f },
                { "A, V", 0.063f },
                { "N, O", 0.057f },
                { "V, A", 0.057f },
                { "W, M", 0.055f },
                { "A, Y", 0.054f },
                { "B, R", 0.053f },
                { "Z, I", 0.052f }
            };


            float       distance            = 0;
            int         errCount            = 0;
            const float confusionMultiplier = 2.5f;

            for (int i = 0; i < Features.Length; i++)
            {
                if (Features[i] != vector.Features[i])
                {
                    string conf = "" + vector.Features[i] + ", " + Features[i];
                    if (confusions.ContainsKey(conf))
                    {
                        distance += Math.Max(0, 1 - (confusionMultiplier * confusions[conf]));
                    }
                    else
                    {
                        distance += 1;
                    }
                    errCount += 1;
                }
            }

            distance += (1 - Weight) * errCount * 0.5f;

            return(distance);
        }