Exemplo n.º 1
0
        private static void CreateClusters(IDataset data, string target, int contours, out IClusteringModel contourCluster, out Dictionary <int, string> contourNames, out double[][] borders)
        {
            var cpd = new ClusteringProblemData((Dataset)data, new[] { target });

            contourCluster = KMeansClustering.CreateKMeansSolution(cpd, contours, 3).Model;

            borders = Enumerable.Range(0, contours).Select(x => new[] { double.MaxValue, double.MinValue }).ToArray();
            var clusters     = contourCluster.GetClusterValues(cpd.Dataset, cpd.AllIndices).ToArray();
            var targetvalues = cpd.Dataset.GetDoubleValues(target).ToArray();

            foreach (var i in cpd.AllIndices)
            {
                var cl  = clusters[i] - 1;
                var clv = targetvalues[i];
                if (borders[cl][0] > clv)
                {
                    borders[cl][0] = clv;
                }
                if (borders[cl][1] < clv)
                {
                    borders[cl][1] = clv;
                }
            }

            contourNames = new Dictionary <int, string>();
            for (var i = 0; i < contours; i++)
            {
                contourNames.Add(i, "[" + borders[i][0] + ";" + borders[i][1] + "]");
            }
        }
        protected override IClusteringProblemData ImportData(string path, DataAnalysisImportType type, TableFileParser csvFileParser)
        {
            List <IList> values = csvFileParser.Values;

            if (type.Shuffle)
            {
                values = Shuffle(values);
            }

            Dataset dataset = new Dataset(csvFileParser.VariableNames, values);

            // turn of input variables that are constant in the training partition
            var allowedInputVars = new List <string>();
            int trainingPartEnd  = (csvFileParser.Rows * type.TrainingPercentage) / 100;
            var trainingIndizes  = Enumerable.Range(0, trainingPartEnd);

            if (trainingIndizes.Count() >= 2)
            {
                foreach (var variableName in dataset.DoubleVariables)
                {
                    if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0)
                    {
                        allowedInputVars.Add(variableName);
                    }
                }
            }
            else
            {
                allowedInputVars.AddRange(dataset.DoubleVariables);
            }

            ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars);

            clusteringData.TrainingPartition.Start = 0;
            clusteringData.TrainingPartition.End   = trainingPartEnd;
            clusteringData.TestPartition.Start     = trainingPartEnd;
            clusteringData.TestPartition.End       = csvFileParser.Rows;

            clusteringData.Name = Path.GetFileName(path);

            return(clusteringData);
        }
        public override IClusteringProblemData ImportData(string path)
        {
            var csvFileParser = new TableFileParser();

            csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));

            Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);

            // turn of input variables that are constant in the training partition
            var allowedInputVars = new List <string>();
            var trainingIndizes  = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);

            if (trainingIndizes.Count() >= 2)
            {
                foreach (var variableName in dataset.DoubleVariables)
                {
                    if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0)
                    {
                        allowedInputVars.Add(variableName);
                    }
                }
            }
            else
            {
                allowedInputVars.AddRange(dataset.DoubleVariables);
            }

            ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars);

            int trainingPartEnd = trainingIndizes.Last();

            clusteringData.TrainingPartition.Start = trainingIndizes.First();
            clusteringData.TrainingPartition.End   = trainingPartEnd;
            clusteringData.TestPartition.Start     = trainingPartEnd;
            clusteringData.TestPartition.End       = csvFileParser.Rows;

            clusteringData.Name = Path.GetFileName(path);

            return(clusteringData);
        }
 private IDataAnalysisProblemData CreateClusteringData(ClusteringProblemData oldProblemData)
 {
     return(new ClusteringProblemData(ExportedDataset, GetDoubleInputVariables(String.Empty), Transformations));
 }