Ejemplo n.º 1
0
    public void Dataset(
        string[] labels,
        DatasetName datasetName,
        object[,] x,
        object[] y)
    {
        if (x == null ||
            y == null ||
            x.Rank != 2 ||
            x.GetLength(0) == 0 ||
            y.Length == 0)
        {
            TraceListeners.Log(TraceEventType.Error, 0,
                               "Invalid number of records in the " + datasetName.ToString() + " dataset!", true, true);
        }

        if (x.GetLength(1) != y.Count())
        {
            TraceListeners.Log(TraceEventType.Error, 0,
                               "Number of observations in features and target do not match!", true, true);
        }

        if (y.All(v => (string)v != labels[0]) ||
            y.All(v => (string)v != labels[1]))
        {
            TraceListeners.Log(TraceEventType.Error, 0,
                               "All the labels must be present in the " + datasetName.ToString() + " dataset!", true, true);
        }

        if (x.GetLength(1) < x.GetLength(0))
        {
            TraceListeners.Log(TraceEventType.Error, 0,
                               "Number of observations is less than number of features!", true, true);
        }
    }
Ejemplo n.º 2
0
    public override void InitialiseFeatures(
        DatasetName datasetName,
        object[,] x,
        object[] y)
    {
        Debug.Assert(x != null, "The feature vector must not be null.");
        Debug.Assert(y != null, "The targe variable must not be null.");
        Vector[] xV = null;
        Range    r  = null;

        // Validate
        _validate.Dataset(
            labels: _mapping.GetClassLabels().ToArray(),
            datasetName: datasetName,
            x: x,
            y: y);

        // Set meta data
        _numFeatures     = x.GetLength(0);
        _numObservations = x.GetLength(1);

        // Transpose
        double[][] xTrans = new double[_numObservations][];

        for (int j = 0; j < _numObservations; j++)
        {
            xTrans[j] = new double[_numFeatures];
            for (int i = 0; i < _numFeatures; i++)
            {
                xTrans[j][i] = (double)x[i, j];
            }
        }

        // Set target
        _y = Variable.Observed(
            Array.ConvertAll(y, v => (
                                 System.Convert.ToInt64(v) > 0) ? true : false)).Named(
            "y." + datasetName.ToString());

        // Set features
        xV = new Vector[_numObservations];
        r  = _y.Range.Named("person");
        for (int i = 0; i < _numObservations; i++)
        {
            xV[i] = Vector.FromArray(xTrans[i]);
        }
        _x = Variable.Observed(xV, r).Named("x." + datasetName.ToString());

        _availableDatasetName = datasetName;
    }
Ejemplo n.º 3
0
    public override void Predict(
        string inputModelFileName,
        DistributionName distributionName,
        InferenceAlgorithm inferenceEngineAlgorithm,
        double noise)
    {
        // Validate
        // _validate.Predict(inputModelFileName);

        // Initialise
        List <IDictionary <string, double> > yPredicDistrib = new List <IDictionary <string, double> >();
        List <string>        yPredicLabel = new List <string>();
        VariableArray <bool> yTest        = Variable.Array <bool>(new Range(_numObservations)).
                                            Named("y." + _availableDatasetName.ToString()); // to do: correct the size

        Bernoulli[] yPredic;

        // The inference engine
        _engine[DistributionType.Posterior] = SetInferenceEngine(
            inferenceEngineAlgorithm, 1);

        // Infer postrior weights from training
        _w[DistributionType.Posterior] = InferWeights(
            distributionType: DistributionType.Posterior,
            distributionName: distributionName,
            hyperParameters: null);

        // BPM
        BayesPointMachine(_x, yTest, _w[DistributionType.Posterior], noise);

        // predict
        // _yPredicLabel = _engine[DistributionType.Posterior].Infer(_y);
        yPredic = _engine[DistributionType.Posterior].Infer <Bernoulli[]>(yTest);
        for (int i = 0; i < yPredic.Length; i++)
        {
            yPredicDistrib.Add(new Dictionary <string, double>()
            {
                { Convert.ToInt32(yPredic[i].GetProbFalse() > _cutoffPoint).ToString(), yPredic[i].GetProbFalse() },
                { Convert.ToInt32(yPredic[i].GetProbTrue() > _cutoffPoint).ToString(), yPredic[i].GetProbTrue() }
            });
            yPredicLabel.Add(Convert.ToInt32(yPredic[i].GetProbTrue() > _cutoffPoint).ToString());
        }
        _yPredicDistrib = yPredicDistrib;
        _yPredicLabel   = yPredicLabel;
    }
Ejemplo n.º 4
0
    public object[] GetTargetConverted(DatasetName datasetName)
    {
        // Evaluate
        if (!_targetConverted.ContainsKey(datasetName))
        {
            TraceListeners.Log(TraceEventType.Error, 0,
                               "Dataset does not exist: " + datasetName.ToString(), true, true);
        }

        // Get
        return(_targetConverted[datasetName]);
    }
Ejemplo n.º 5
0
    public string[,] GetDataset(DatasetName datasetName)
    {
        // Evaluate
        if (!_datasets.ContainsKey(datasetName))
        {
            TraceListeners.Log(TraceEventType.Error, 0,
                               "Dataset does not exist: " + datasetName.ToString(), true, true);
        }

        // Get
        return(_datasets[datasetName]);
    }
Ejemplo n.º 6
0
    public void Set(
        Dictionary <string, List <string> > dataset,
        DatasetName datasetName,
        string[] featuresByOrder)
    {
        TraceListeners.Log(TraceEventType.Information, 0, "Features::Set...", false, true);
        int numFeatures     = dataset.Keys.Count;
        int numObservations = dataset[dataset.Keys.First()].Count;

        // Evaluate
        if (_datasets.ContainsKey(datasetName))
        {
            TraceListeners.Log(TraceEventType.Warning, 0,
                               "Overwriting: " + datasetName.ToString(), false, true);
        }

        // Set
        try
        {
            _featuresKey[datasetName] = new Dictionary <string, int>();
            _datasets[datasetName]    = new string[numFeatures, numObservations];
            int i = 0;
            foreach (string k in featuresByOrder)
            {
                _featuresKey[datasetName][k] = i;
                for (int j = 0; j < numObservations; j++)
                {
                    _datasets[datasetName][i, j] = dataset[k][j];
                }
                i++;
            }
        }
        catch (Exception e)
        {
            TraceListeners.Log(TraceEventType.Error, 0, e.ToString(), true, true);
        }
    }
Ejemplo n.º 7
0
    public void ConvertFeatures(
        DatasetName datasetName,
        bool standardConversion = false)
    {
        TraceListeners.Log(TraceEventType.Information, 0,
                           "Features::ConvertFeatures...", false, true);
        // Evaluate
        if (!_datasets.ContainsKey(datasetName))
        {
            TraceListeners.Log(TraceEventType.Error, 0,
                               "Dataset does not exist: " + datasetName.ToString(), true, true);
        }
        if (_featuresConverted.ContainsKey(datasetName))
        {
            TraceListeners.Log(TraceEventType.Warning, 0,
                               "Overwriting: " + datasetName.ToString(), false, true);
        }

        // Initialise
        string featureType     = null;
        int    numFeatures     = _datasets[datasetName].GetLength(0) - 1;
        int    numObservations = _datasets[datasetName].GetLength(1);

        SetFeaturesMetdata();

        // Convert features
        featureType = (standardConversion) ? "double" : null;
        _featuresConverted[datasetName] = new object[numFeatures, numObservations];
        string featureName;

        for (int i = 0, iOrig = 0; iOrig < numFeatures + 1; i++, iOrig++)
        {
            featureName = _featuresKey[datasetName].FirstOrDefault(v => v.Value == iOrig).Key;

            // Skip the label
            if (featureName == _varsMetadataLabelName)
            {
                i--;
                continue;
            }

            // Convert observations
            TraceListeners.Log(TraceEventType.Information, 0,
                               " ...Convert: " + featureName + " to " +
                               featureType + " ...", false, true);
            for (int j = 0; j < numObservations; j++)
            {
                _featuresConverted[datasetName][i, j] =
                    Convert(datasetName, featureName, iOrig, j, featureType);
            }
        }

        // Convert the label variable to labelType
        featureType = (standardConversion) ? "string" : null;
        _targetConverted[datasetName] = new object[numObservations];
        {
            int i = _featuresKey[datasetName][_varsMetadataLabelName];
            for (int j = 0; j < numObservations; j++)
            {
                _targetConverted[datasetName][j] =
                    Convert(datasetName, _varsMetadataLabelName, i, j, featureType);
            }
        }
    }