Example #1
0
        public double CalculateMeasure(DataMining.Model.IClassifier classifier, Data.Dataset dataset)
        {
            double MSE = 0;

            Example example = null;

            for (int i = 0; i < dataset.Size; i++)
            {
                example = dataset[i];

                int actual = example.Label;

                Prediction prediction  = classifier.Classify(example);
                int        predicted   = prediction.Label;
                double     probability = prediction.Probabilities[prediction.Label];

                MSE += Math.Pow(1 - prediction.Probabilities[actual], 2);
            }

            MSE = Math.Sqrt(MSE) / dataset.Size;

            return(MSE);
        }
Example #2
0
        public double CalculateMeasure(DataMining.Model.IClassifier classifier, Data.Dataset dataset)
        {
            double QLF = 0;

            Example example = null;

            for (int i = 0; i < dataset.Size; i++)
            {
                example = dataset[i];

                int actual = example.Label;

                Prediction prediction  = classifier.Classify(example);
                int        predicted   = prediction.Label;
                double     probability = prediction.Probabilities[prediction.Label];

                double value = 0;

                for (int index = 0; index < dataset.Metadata.Target.Length; index++)
                {
                    if (index == actual)
                    {
                        value -= 2 * probability;
                    }
                    else
                    {
                        value += Math.Pow(prediction.Probabilities[index], 2);
                    }
                }

                QLF += (2 + value) / 3;
            }

            QLF = QLF / dataset.Size;

            return(QLF);
        }
Example #3
0
        public double CalculateMeasure(DataMining.Model.IClassifier classifier, Data.Dataset dataset)
        {
            double BIReward = 0;

            double[] classProbabilities = new double[dataset.Metadata.Target.Length];

            for (int i = 0; i < classProbabilities.Length; i++)
            {
                classProbabilities[i] = (double)dataset.Filter(i).Count / (double)dataset.Size;
            }

            Example example = null;

            for (int i = 0; i < dataset.Size; i++)
            {
                example = dataset[i];

                int actual = example.Label;

                Prediction prediction  = classifier.Classify(example);
                int        predicted   = prediction.Label;
                double     probability = prediction.Probabilities[prediction.Label];

                if (predicted == actual)
                {
                    BIReward += Math.Log(probability / classProbabilities[actual]);
                }
                else
                {
                    BIReward += Math.Log((1 - probability) / (1 - classProbabilities[actual]));
                }
            }

            BIReward = (BIReward / dataset.Size) + 0.5;

            return(BIReward);
        }
Example #4
0
        public static void SaveDatasetToArff(Data.Dataset dataset, string filePath)
        {
            using (StreamWriter writer = new StreamWriter(filePath))
            {
                writer.WriteLine("@relation " + dataset.Metadata.DatasetName);
                writer.WriteLine();

                foreach (Data.Attribute attribute in dataset.Metadata.Attributes)
                {
                    writer.WriteLine("@attribute " + attribute.ToString());
                }
                writer.WriteLine("@attribute " + dataset.Metadata.Target.ToString());
                writer.WriteLine();

                writer.WriteLine("@data");
                foreach (Data.Instance instance in dataset)
                {
                    writer.WriteLine(instance.ToString());
                }

                writer.Flush();
                writer.Close();
            }
        }
Example #5
0
 public double CalculateMeasure(DataMining.Model.IClassifier classifier, Data.Dataset dataset)
 {
     return(this.CalculateMeasure(ConfusionMatrix.ComputeConfusionMatrixes(classifier, dataset)));
 }
Example #6
0
        public static ConfusionMatrix[] ComputeConfusionMatrixes(Model.IClassifier classifier, Data.Dataset testset)
        {
            ConfusionMatrix[] list = new ConfusionMatrix[testset.Metadata.Target.Values.Length];

            Instance instance = null;

            for (int i = 0; i < testset.Size; i++)
            {
                instance = testset[i];
                bool correct = false;
                int  actual  = instance.Label;

                Prediction prediction  = classifier.Classify(instance);
                int        predicted   = prediction.Label;
                double     probability = prediction.Probabilities[prediction.Label];

                if (predicted == actual)
                {
                    correct = true;
                }

                for (int classIndex = 0; classIndex < list.Length; classIndex++)
                {
                    if (correct)
                    {
                        if (classIndex == actual)
                        {
                            list[classIndex].TP++;
                            list[classIndex].TP_Prob += probability;
                        }

                        else
                        {
                            list[classIndex].TN++;
                            list[classIndex].TN_Prob += probability;
                        }
                    }
                    else
                    {
                        if (classIndex == actual)
                        {
                            list[classIndex].FN++;
                            list[classIndex].FN_Prob += probability;
                        }
                        else if (classIndex == predicted)
                        {
                            list[classIndex].FP++;
                            list[classIndex].FP_Prob += probability;
                        }
                        else
                        {
                            list[classIndex].TN++;
                            list[classIndex].TN_Prob += probability;
                        }
                    }
                }
            }

            return(list);
        }
 public ClassBasedSimilarityMeasure(Data.Dataset dataset)
 {
     this._dataset = dataset;
     this.CalculateBaseSimilarities();
 }
Example #8
0
        private static Data.Example ParseHierarchicalExampleLine(int exampleIndex, string line, Data.Dataset dataset, bool skipFirstAttribute)
        {
            string[] parts = line.Split(',');
            List<double> values = new List<double>();

            List<int> label = new List<int>();
            string[] labelParts = parts[parts.Length - 1].Split(';');
            foreach (string target in labelParts)
                if (!string.IsNullOrWhiteSpace(target))
                    label.Add(dataset.Metadata.Target.GetIndex(target));



            for (int index = 0; index < parts.Length - 1; index++)
            {
                if (dataset.Metadata.Attributes[index] is Data.NominalAttribute)
                {
                    Data.NominalAttribute attribute = dataset.Metadata.Attributes[index] as Data.NominalAttribute;
                    string value = parts[index];
                    values.Add(attribute.GetIndex(value));
                }
                else
                {
                    double value = double.NaN;
                    if (parts[index] != "?")
                        value = double.Parse(parts[index]);
                    values.Add(value);

                }

            }


            Data.Example example = new Data.Example(dataset.Metadata, exampleIndex, values.ToArray(), label);
            return example;
        }
Example #9
0
        public static Data.Dataset LoadHierarchicalDatasetFromTxt(string filePath, bool skipfirstAttribute)
        {
            StreamReader reader = new StreamReader(filePath);
            
            string datasetName = null;
            Data.Metadata metadata = null;
            Data.Dataset dataset = null;

            List<Data.Attribute> attributes = new List<Data.Attribute>();
            List<Data.Example> examples = new List<Data.Example>();
            List<Data.Node> nodes = new List<Data.Node>();

            int attributeIndex = 0;
            int exampleIndex = 0;

            string mode = "start";
            


            while (!reader.EndOfStream)
            {
                string line = reader.ReadLine();
                if (!string.IsNullOrEmpty(line))
                {
                    if (line.Contains("%"))
                        continue;
                    if (line.Contains("@relation"))
                    {
                        datasetName = line.Substring(line.IndexOf(' ')).Trim();
                        datasetName = datasetName.Contains("-weka") ? datasetName.Substring(0, datasetName.IndexOf("-weka")) : datasetName;
                    }

                    else if (line.Contains("@attribute"))
                    {

                        Data.Attribute attribute = ParseAttributeLine(line, attributeIndex);

                        if (attribute != null)
                        {
                            attributeIndex++;
                            attributes.Add(attribute);
                        }


                    }

                    else if (line.Contains("@ontology"))
                    {
                        mode = "ontolog";

                    }

                    else if (line.Contains("@data"))
                    {
                        List<string> classValues = new List<string>();

                        int counter = 0;
                        for(int i=1; i<nodes.Count;i++)                        
                        {
                            Data.Node node = nodes[i];
                            node.ValueIndex = counter;
                            classValues.Add(node.Name);
                            counter++;
                        }

                        Data.ClassHierarchy classHierarchy = new Data.ClassHierarchy(nodes.ToArray());

                        Data.HierarchicalAttribute target = new Data.HierarchicalAttribute("class", attributes.Count, classValues.ToArray(), classHierarchy);
                        metadata = new Data.Metadata(datasetName, attributes.ToArray(), target,true);
                        dataset = new Data.Dataset(metadata);

                        mode = "data";

                    }
                    else
                    {
                        if (mode == "ontolog")
                        {
                            Data.Node node = ParseOntologyLine(line);
                            if (!nodes.Exists(n => n.Name == node.Name))
                                nodes.Add(node);

                        }
                        else
                        {

                            Data.Example example = ParseHierarchicalExampleLine(exampleIndex, line, dataset, skipfirstAttribute);
                            exampleIndex++;
                            examples.Add(example);

                        }

                    }
                }

            }

            dataset.SetExamples(examples.ToArray());

            return dataset;
        }
Example #10
0
        public static Data.Dataset LoadDatasetFromArff(string filePath)
        {
            StreamReader reader = new StreamReader(filePath);

            string datasetName = null;
            Data.Metadata metadata = null;
            Data.Dataset dataset = null;

            List<Data.Attribute> attributes = new List<Data.Attribute>();
            List<Data.Example> examples = new List<Data.Example>();
            int attributeIndex = 0;
            int exampleIndex = 0;

            List<string> exampleLines = new List<string>();

            while (!reader.EndOfStream)
            {
                string line = reader.ReadLine();
                if (!string.IsNullOrEmpty(line))
                {
                    if (line.Contains("%"))
                        continue;
                    if (line.Contains("@relation"))
                    {
                        datasetName = line.Substring(line.IndexOf(' ')).Trim();
                        datasetName = datasetName.Contains("-weka") ? datasetName.Substring(0, datasetName.IndexOf("-weka")) : datasetName;
                    }

                    else if (line.Contains("@attribute"))
                    {

                        Data.Attribute attribute = ParseAttributeLine(line, attributeIndex);

                        if (attribute != null)
                        {
                            attributeIndex++;
                            attributes.Add(attribute);
                        }

                    }

                    else if (line.Contains("@data"))
                    {
                        int targetIndex = attributes.FindIndex(m => string.Compare(m.Name, "Class", true) == 0);
                        Data.NominalAttribute target = attributes[targetIndex] as Data.NominalAttribute;
                        attributes.RemoveAt(target.Index);
                        metadata = new Data.Metadata(datasetName, attributes.ToArray(), target,false);
                        dataset = new Data.Dataset(metadata);


                    }
                    else
                    {
                        if (exampleLines.Contains(line))
                            continue;

                        Data.Example example = ParseExampleLine(exampleIndex, line, dataset);
                        exampleIndex++;
                        examples.Add(example);
                        exampleLines.Add(line);

                    }
                }

            }

            dataset.SetExamples(examples.ToArray());

            return dataset;
        }
Example #11
0
        private static Data.Instance ParseinstanceLine(int instanceIndex, string line, Data.Dataset dataset)
        {
            string[]      parts  = line.Split(',');
            List <double> values = new List <double>();
            int           label  = dataset.Metadata.Target.GetIndex(parts[parts.Length - 1]);

            for (int index = 0; index < parts.Length - 1; index++)
            {
                if (dataset.Metadata.Attributes[index] is Data.NominalAttribute)
                {
                    Data.NominalAttribute attribute = dataset.Metadata.Attributes[index] as Data.NominalAttribute;
                    string value = parts[index];
                    values.Add(attribute.GetIndex(value));
                }
                else
                {
                    double value = double.NaN;
                    if (parts[index] != "?")
                    {
                        value = double.Parse(parts[index]);
                    }
                    values.Add(value);
                }
            }


            Data.Instance instance = new Data.Instance(dataset.Metadata, instanceIndex, values.ToArray(), label);
            return(instance);
        }
Example #12
0
        public static ConfusionMatrix[] GetConfusionMatrixes(Model.Hierarchical.IHierarchicalClassifier classifier, Data.Dataset testset)
        {
            ConfusionMatrix[] list = new ConfusionMatrix[testset.Metadata.Target.Values.Length];


            foreach (Data.Example example in testset)
            {
                int[] predicted = classifier.Classify(example);
                int[] actual    = example.HierarchicalLabel;


                for (int classIndex = 0; classIndex < list.Length; classIndex++)
                {
                    if (predicted.Contains(classIndex))
                    {
                        if (actual.Contains(classIndex))
                        {
                            list[classIndex].TP++;
                        }
                        else
                        {
                            list[classIndex].FP++;
                        }
                    }
                    else
                    {
                        if (actual.Contains(classIndex))
                        {
                            list[classIndex].FN++;
                        }
                        else
                        {
                            list[classIndex].TN++;
                        }
                    }
                }
            }

            return(list);
        }
Example #13
0
        public static ConfusionMatrix[] GetConfusionMatrixes(Model.IClassifier classifier, Data.Dataset testset)
        {
            ConfusionMatrix[] list = new ConfusionMatrix[testset.Metadata.Target.Values.Length];


            foreach (Data.Example example in testset)
            {
                bool correct = false;
                int  actual  = example.Label;

                Prediction prediction  = classifier.Classify(example);
                int        predicted   = prediction.Label;
                double     probability = prediction.Probability;

                if (predicted == actual)
                {
                    correct = true;
                }

                for (int classIndex = 0; classIndex < list.Length; classIndex++)
                {
                    if (correct)
                    {
                        if (classIndex == actual)
                        {
                            list[classIndex].TP++;
                            list[classIndex].TP_Prob += probability;
                        }

                        else
                        {
                            list[classIndex].TN++;
                            list[classIndex].TN_Prob += probability;
                        }
                    }
                    else
                    {
                        if (classIndex == actual)
                        {
                            list[classIndex].FN++;
                            list[classIndex].FN_Prob += probability;
                        }
                        else if (classIndex == predicted)
                        {
                            list[classIndex].FP++;
                            list[classIndex].FP_Prob += probability;
                        }
                        else
                        {
                            list[classIndex].TN++;
                            list[classIndex].TN_Prob += probability;
                        }
                    }
                }
            }

            return(list);
        }
Example #14
0
        private static Data.Example ParseHierarchicalExampleLine(int exampleIndex, string line, Data.Dataset dataset, bool skipFirstAttribute)
        {
            string[]   parts  = line.Split(',');
            List <int> values = new List <int>();

            List <int> label = new List <int>();

            string[] labelParts = parts[parts.Length - 1].Split(';');
            foreach (string target in labelParts)
            {
                if (!string.IsNullOrWhiteSpace(target))
                {
                    label.Add(dataset.Metadata.Target.GetIndex(target));
                }
            }



            int skips = 0;
            int step  = skipFirstAttribute ? 1 : 0;
            int index = skipFirstAttribute ? 1 : 0;

            for (; index < parts.Length - 1; index++)
            {
                string value = parts[index];
                if (!value.Contains("All"))
                {
                    values.Add(dataset.Metadata.Attributes[index - step - skips].GetIndex(value));
                }
                else
                {
                    skips++;
                }
            }


            Data.Example example = new Data.Example(dataset.Metadata, exampleIndex, values.ToArray(), label);
            return(example);
        }
Example #15
0
        public static Data.Dataset LoadDatasetFromArff(string filePath)
        {
            StreamReader reader = new StreamReader(filePath);

            string datasetName = null;

            Data.Metadata metadata = null;
            Data.Dataset  dataset  = null;

            List <Data.Attribute> attributes = new List <Data.Attribute>();
            List <Data.Example>   examples   = new List <Data.Example>();

            string[] target = null;

            int attributeIndex = 0;
            int exampleIndex   = 0;

            List <int> nominalAttributesIndexes = new List <int>();
            int        aindex = 0;

            List <string> exampleLines = new List <string>();

            while (!reader.EndOfStream)
            {
                string line = reader.ReadLine();
                if (!string.IsNullOrEmpty(line))
                {
                    if (line.Contains("%"))
                    {
                        continue;
                    }
                    if (line.Contains("@relation"))
                    {
                        datasetName = line.Substring(line.IndexOf(' ')).Trim();
                        datasetName = datasetName.Contains("-weka") ? datasetName.Substring(0, datasetName.IndexOf("-weka")) : datasetName;
                    }

                    else if (line.Contains("@attribute"))
                    {
                        if (line.ToUpper().Contains("CLASS"))
                        {
                            target = ParseClassAttributeLine(line);
                        }

                        else
                        {
                            Data.Attribute [] currentAttributes = ParseAttributeLine(line, attributeIndex);
                            attributeIndex += currentAttributes.Length;
                            attributes.AddRange(currentAttributes);

                            if (currentAttributes.Length > 1 || !line.Contains(currentAttributes[0].Name))
                            {
                                nominalAttributesIndexes.Add(aindex);
                            }
                            aindex++;
                        }
                    }

                    else if (line.Contains("@data"))
                    {
                        metadata = new Data.Metadata(datasetName, attributes.ToArray(), target);
                        dataset  = new Data.Dataset(metadata);
                    }
                    else
                    {
                        if (exampleLines.Contains(line))
                        {
                            continue;
                        }

                        Data.Example example = ParseExampleLine(exampleIndex, line, dataset.Metadata, nominalAttributesIndexes);
                        exampleIndex++;
                        examples.Add(example);
                        exampleLines.Add(line);
                    }
                }
            }

            dataset.SetExamples(examples.ToArray());

            return(dataset);
        }