//TODO public static double Calc(BaseVector x, BaseVector y) { int n = x.Length; int c = 0; for (int i = 0; i < n; i++){ double d = x[i] - y[i]; if (double.IsNaN(d)){ continue; } c++; } if (c < 3){ return double.NaN; } double sx = 0; double sy = 0; double sxy = 0; for (int i = 0; i < n; i++){ double d = x[i] - y[i]; if (double.IsNaN(d)){ continue; } double wx = x[i]; double wy = y[i]; sx += wx*wx; sy += wy*wy; sxy += wx*wy; } sx /= c; sy /= c; sxy /= c; double corr = sxy/Math.Sqrt(sx*sy); return 1 - corr; }
public static double[] CalcSignificanceB(BaseVector ratios, BaseVector intens, TestSide side) { double[] result = new double[ratios.Length]; for (int i = 0; i < result.Length; i++) { result[i] = 1; } List <double> lRatio = new List <double>(); List <double> lIntensity = new List <double>(); List <int> indices = new List <int>(); for (int i = 0; i < ratios.Length; i++) { if (!double.IsNaN(ratios[i]) && !double.IsInfinity(ratios[i]) && !double.IsNaN(intens[i]) && !double.IsInfinity(intens[i])) { lRatio.Add(ratios[i]); lIntensity.Add(intens[i]); indices.Add(i); } } double[] ratioSignificanceB = NumUtils.MovingBoxPlot(lRatio.ToArray(), lIntensity.ToArray(), -1, side); for (int i = 0; i < indices.Count; i++) { result[indices[i]] = ratioSignificanceB[i]; } return(result); }
public override ClassificationModel Train(BaseVector[] x, int[][] y, int ngroups, Parameters param, int nthreads, Action<double> reportProgress) { int k = param.GetParam<int>("Number of neighbours").Value; IDistance distance = Distances.GetDistanceFunction(param); return new KnnClassificationModel(x, y, ngroups, k, distance); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int ind = param.GetParam <int>("Column").Value; bool descending = param.GetParam <bool>("Descending").Value; if (ind < mdata.ColumnCount) { BaseVector v = mdata.Values.GetColumn(ind); int[] o = ArrayUtils.Order(v); if (descending) { ArrayUtils.Revert(o); } mdata.ExtractRows(o); } else { double[] v = mdata.NumericColumns[ind - mdata.ColumnCount]; int[] o = ArrayUtils.Order(v); if (descending) { ArrayUtils.Revert(o); } mdata.ExtractRows(o); } }
public ClassificationModel Train(BaseVector[] x, int[][] y, int ngroups, IGroupDataProvider data, int nthreads, Action<double> reportProgress) { return groupWiseSelection ? TrainGroupWise(x, y, ngroups, data, nthreads) : TrainGlobal(x, y, ngroups, data, nthreads, reportProgress); }
public override float[] PredictStrength(BaseVector x) { float[] result = new float[classifiers.Length]; for (int i = 0; i < result.Length; i++){ result[i] = classifiers[i].PredictStrength(x)[0]; } return result; }
public static BaseVector[] ExtractFeatures(IList<BaseVector> x, IList<int> inds) { BaseVector[] result = new BaseVector[x.Count]; for (int i = 0; i < x.Count; i++){ result[i] = x[i].SubArray(inds); } return result; }
private static double[] CalcDistances(IList <BaseVector> x, BaseVector xTest, IDistance distance) { double[] result = new double[x.Count]; for (int i = 0; i < x.Count; i++) { result[i] = distance.Get(x[i], xTest); } return(result); }
public static BaseVector[] ExtractFeatures(IList <BaseVector> x, IList <int> inds) { BaseVector[] result = new BaseVector[x.Count]; for (int i = 0; i < x.Count; i++) { result[i] = x[i].SubArray(inds); } return(result); }
public override float[] PredictStrength(BaseVector x) { float[] result = new float[classifiers.Length]; for (int i = 0; i < result.Length; i++) { result[i] = classifiers[i].PredictStrength(x)[0]; } return(result); }
public override float[] PredictStrength(BaseVector x) { double[] projectedTest = MatrixUtils.VectorTimesMatrix(x, projection); float[] distances = new float[ngroups]; IDistance distance = new EuclideanDistance(); for (int j = 0; j < ngroups; j++){ distances[j] = -(float) distance.Get(projectedTest, projectedGroupMeans[j]); } return distances; }
public override float Predict(BaseVector xTest) { int[] inds = KnnClassificationModel.GetNeighborInds(x, xTest, k, distance); float result = 0; foreach (int ind in inds){ result += y[ind]; } result /= inds.Length; return result; }
public static double KFunction(BaseVector x, BaseVector y, SvmParameter param) { IKernelFunction kf = param.kernelFunction; double sx = double.NaN; double sy = double.NaN; if (kf.UsesSquares){ sx = x.Dot(x); sy = y.Dot(y); } return kf.Evaluate(x, y, sx, sy); }
public RegressionModel Train(BaseVector[] x, float[] y, Parameters param, int nthreads) { ParameterWithSubParams<int> kernelParam = param.GetParamWithSubParams<int>("Kernel"); SvmParameter sp = new SvmParameter{ kernelFunction = KernelFunctions.GetKernelFunction(kernelParam.Value, kernelParam.GetSubParameters()), svmType = SvmType.EpsilonSvr, c = param.GetParam<double>("C").Value }; SvmModel model = SvmMain.SvmTrain(new SvmProblem(x, y), sp); return new SvmRegressionModel(model); }
public int[] PredictClasses(BaseVector x) { float[] w = PredictStrength(x); List<int> result = new List<int>(); for (int i = 0; i < w.Length; i++){ if (w[i] > 0){ result.Add(i); } } return result.ToArray(); }
public RegressionModel Train(BaseVector[] x, float[] y, IGroupDataProvider data) { if (ranker == null || nfeatures >= x[0].Length){ return regressionMethod.Train(x, y, regressionParam, 1); } int[] o = ranker.Rank(x, y, rankerParam, data, 1); int[] inds = nfeatures < o.Length ? ArrayUtils.SubArray(o, nfeatures) : o; return new RegressionOnSubFeatures( regressionMethod.Train(ClassificationWithRanking.ExtractFeatures(x, inds), y, regressionParam, 1), inds); }
private static BaseVector[] ToOneHotEncodingMixed(BaseVector[] x, int[] nominal) { int len = CalcTotalLength(nominal); BaseVector[] result = new BaseVector[x.Length]; for (int i = 0; i < result.Length; i++) { result[i] = ToOneHotEncodingMixed(x[i], nominal, len); } return(result); }
public static double[] VectorTimesMatrix(BaseVector a, double[,] b) { double[] result = new double[b.GetLength(1)]; for (int i = 0; i < result.Length; i++) { for (int k = 0; k < a.Length; k++) { result[i] += a[k] * b[k, i]; } } return(result); }
public ClassificationModel TrainGlobal(BaseVector[] x, int[][] y, int ngroups, IGroupDataProvider data, int nthreads, Action<double> reportProgress) { if (ranker == null || nfeatures >= x[0].Length){ return classifier.Train(x, y, ngroups, classifierParam, nthreads, reportProgress); } int[] o = ranker.Rank(x, y, ngroups, rankerParam, data, nthreads); int[] inds = nfeatures < o.Length ? ArrayUtils.SubArray(o, nfeatures) : o; return new ClassificationOnSubFeatures(classifier.Train(ExtractFeatures(x, inds), y, ngroups, classifierParam, nthreads), inds); }
public override float Predict(BaseVector xTest) { int[] inds = KnnClassificationModel.GetNeighborInds(x, xTest, k, distance); float result = 0; foreach (int ind in inds) { result += y[ind]; } result /= inds.Length; return(result); }
public override float[] PredictStrength(BaseVector x) { double[] projectedTest = MatrixUtils.VectorTimesMatrix(x, projection); float[] distances = new float[ngroups]; IDistance distance = new EuclideanDistance(); for (int j = 0; j < ngroups; j++) { distances[j] = -(float)distance.Get(projectedTest, projectedGroupMeans[j]); } return(distances); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] cols = param.GetParam <int[]>("Columns").Value; int truncIndex = param.GetParam <int>("Use for truncation").Value; TestTruncation truncation = truncIndex == 0 ? TestTruncation.Pvalue : (truncIndex == 1 ? TestTruncation.BenjaminiHochberg : TestTruncation.PermutationBased); double threshold = param.GetParam <double>("Threshold value").Value; int sideInd = param.GetParam <int>("Side").Value; TestSide side; switch (sideInd) { case 0: side = TestSide.Both; break; case 1: side = TestSide.Left; break; case 2: side = TestSide.Right; break; default: throw new Exception("Never get here."); } foreach (int col in cols) { BaseVector r = mdata.Values.GetColumn(col); double[] pvals = CalcSignificanceA(r, side); string[][] fdr; switch (truncation) { case TestTruncation.Pvalue: fdr = PerseusPluginUtils.CalcPvalueSignificance(pvals, threshold); break; case TestTruncation.BenjaminiHochberg: double[] fdrs; fdr = PerseusPluginUtils.CalcBenjaminiHochbergFdr(pvals, threshold, pvals.Length, out fdrs); break; default: throw new Exception("Never get here."); } mdata.AddNumericColumn(mdata.ColumnNames[col] + " Significance A", "", pvals); mdata.AddCategoryColumn(mdata.ColumnNames[col] + " A significant", "", fdr); } }
internal SvmKernel(int l, BaseVector[] x1, SvmParameter param) { kernelFunction = param.kernelFunction; x = (BaseVector[]) x1.Clone(); if (kernelFunction.UsesSquares){ xSquare = new double[l]; for (int i = 0; i < l; i++){ xSquare[i] = x[i].Dot(x[i]); } } else{ xSquare = null; } }
public static double KFunction(BaseVector x, BaseVector y, SvmParameter param) { IKernelFunction kf = param.kernelFunction; double sx = double.NaN; double sy = double.NaN; if (kf.UsesSquares) { sx = x.Dot(x); sy = y.Dot(y); } return(kf.Evaluate(x, y, sx, sy)); }
public int[] Rank(BaseVector[] x, float[] y, Parameters param, IGroupDataProvider data, int nthreads) { int nfeatures = x[0].Length; double[] s = new double[nfeatures]; for (int i = 0; i < nfeatures; i++){ float[] xx = new float[x.Length]; for (int j = 0; j < xx.Length; j++){ xx[j] = (float)x[j][i]; } s[i] = CalcScore(xx, y); } return ArrayUtils.Order(s); }
public int[] PredictClasses(BaseVector x) { float[] w = PredictStrength(x); List <int> result = new List <int>(); for (int i = 0; i < w.Length; i++) { if (w[i] > 0) { result.Add(i); } } return(result.ToArray()); }
//TODO public static double Calc(BaseVector x, BaseVector y) { int n = x.Length; double sum = 0; int c = 0; for (int i = 0; i < n; i++){ double d = x[i] - y[i]; if (double.IsNaN(d)){ continue; } sum += d*d; c++; } return c == 0 ? double.NaN : Math.Sqrt(sum/c*n); }
/// <summary> /// Create distance matrix from <see cref="IDistance"/>. /// </summary> /// <param name="data"></param> /// <param name="distance"></param> public GenericDistanceMatrix(MatrixIndexer data, IDistance distance) { N = data.RowCount; distances = new double[N * (N - 1) / 2]; int k = 0; for (int i = 0; i < N; i++) { BaseVector xi = data.GetRow(i); for (int j = i + 1; j < N; j++) { distances[k++] = distance.Get(xi, data.GetRow(j)); } } }
//TODO public static double Calc(BaseVector x, BaseVector y) { int n = x.Length; double mx = 0; double my = 0; int c = 0; for (int i = 0; i < n; i++) { double d = x[i] - y[i]; if (double.IsNaN(d)) { continue; } mx += x[i]; my += y[i]; c++; } if (c < 3) { return(double.NaN); } mx /= c; my /= c; double sx = 0; double sy = 0; double sxy = 0; for (int i = 0; i < n; i++) { double d = x[i] - y[i]; if (double.IsNaN(d)) { continue; } double wx = x[i] - mx; double wy = y[i] - my; sx += wx * wx; sy += wy * wy; sxy += wx * wy; } sx /= c; sy /= c; sxy /= c; double corr = sxy / Math.Sqrt(sx * sy); return(1 - corr); }
public override int[] Rank(BaseVector[] x, int[][] y, int ngroups, Parameters param, IGroupDataProvider data, int nthreads, Action<double> reportProgress) { double s0 = param.GetParam<double>("s0").Value; int nfeatures = x[0].Length; int[][] yy = RearrangeGroups(y, ngroups); double[] s = new double[nfeatures]; for (int i = 0; i < nfeatures; i++){ double[] xx = new double[x.Length]; for (int j = 0; j < xx.Length; j++){ xx[j] = x[j][i]; } s[i] = CalcPvalue(xx, yy, ngroups, s0); } return ArrayUtils.Order(s); }
internal override void SwapIndex(int i, int j) { { BaseVector tmp = x[i]; x[i] = x[j]; x[j] = tmp; } if (xSquare != null) { { double tmp = xSquare[i]; xSquare[i] = xSquare[j]; xSquare[j] = tmp; } } }
//TODO public static double Calc(BaseVector x, BaseVector y) { int n = x.Length; double max = double.MinValue; for (int i = 0; i < n; i++){ double d = x[i] - y[i]; if (double.IsNaN(d) || double.IsInfinity(d)){ continue; } double dist = Math.Abs(d); if (dist > max){ max = dist; } } return max == double.MinValue ? double.NaN : max; }
public override int[] Rank(BaseVector[] x, int[][] y, int ngroups, Parameters param, IGroupDataProvider data, int nthreads, Action<double> reportProgress) { SvmParameter sp = new SvmParameter{ kernelFunction = new LinearKernelFunction(), svmType = SvmType.CSvc, c = param.GetParam<double>("C").Value }; bool[] invert; SvmProblem[] problems = CreateProblems(x, y, ngroups, out invert); int[][] rankedSets = new int[problems.Length][]; for (int i = 0; i < problems.Length; ++i){ rankedSets[i] = RankBinary(problems[i], sp); } return CombineRankedFeaturesLists(rankedSets); }
public override double Dot(BaseVector y) { if (y is SparseFloatVector){ return SparseFloatVector.Dot(this, (SparseFloatVector) y); } if (y is SparseBoolVector){ return SparseBoolVector.Dot(this, (SparseBoolVector) y); } if (y is DoubleArrayVector){ return Dot(this, (DoubleArrayVector) y); } if (y is FloatArrayVector){ return Dot(this, (FloatArrayVector) y); } return Dot(this, (BoolArrayVector) y); }
public RegressionModel[] Train(BaseVector[] x, float[] y, IGroupDataProvider data) { int[] o = ranker.Rank(x, y, rankerParam, data, 1); int[] sizes = GetSizes(x[0].Length, reductionFactor, maxFeatures); RegressionModel[] result = new RegressionModel[sizes.Length]; for (int i = 0; i < result.Length; i++){ if (i == 0 && sizes[0] == x[0].Length){ result[0] = classifier.Train(x, y, classifierParam, 1); } else{ int[] inds = ArrayUtils.SubArray(o, sizes[i]); result[i] = new RegressionOnSubFeatures( classifier.Train(ClassificationWithRanking.ExtractFeatures(x, inds), y, classifierParam, 1), inds); } } return result; }
//TODO public static double Calc(BaseVector x, BaseVector y) { int n = x.Length; int c = 0; double sum = 0; for (int i = 0; i < n; i++){ double d = x[i] - y[i]; if (!double.IsNaN(d)){ sum += Math.Abs(d); c++; } } if (c == 0){ return double.NaN; } return sum/c*n; }
//TODO public static double Calc(BaseVector x, BaseVector y) { int n = x.Length; List<int> valids = new List<int>(); for (int i = 0; i < n; i++){ double xx = x[i]; double yy = y[i]; if (double.IsNaN(xx) || double.IsNaN(yy) || double.IsInfinity(xx) || double.IsInfinity(yy)){ continue; } valids.Add(i); } if (valids.Count < 3){ return double.NaN; } return PearsonCorrelationDistance.Calc(ArrayUtils.Rank(x.SubArray(valids)), ArrayUtils.Rank(y.SubArray(valids))); }
public override double[] PredictStrength(BaseVector xTest) { int[] inds = GetNeighborInds(x, xTest, k, distance); double[] result = new double[ngroups]; foreach (int ind in inds) { foreach (int i in y[ind]) { result[i]++; } } for (int i = 0; i < ngroups; i++) { result[i] /= k; } return(result); }
public override float[] PredictStrength(BaseVector x) { if (models.Length == 1){ float[] result = new float[2]; double[] decVal = new double[1]; SvmMain.SvmPredictValues(models[0], x, decVal); result[0] = invert[0] ? -(float)decVal[0] : (float)decVal[0]; result[1] = -result[0]; return result; } float[] result1 = new float[models.Length]; for (int i = 0; i < result1.Length; i++){ double[] decVal = new double[1]; SvmMain.SvmPredictValues(models[i], x, decVal); result1[i] = invert[i] ? -(float)decVal[0] : (float)decVal[0]; } return result1; }
public static float SvmPredict(SvmModel model, BaseVector x) { int nrClass = model.nrClass; double[] decValues; if (model.param.svmType == SvmType.OneClass || model.param.svmType == SvmType.EpsilonSvr || model.param.svmType == SvmType.NuSvr) { decValues = new double[1]; } else { decValues = new double[nrClass * (nrClass - 1) / 2]; } float predResult = SvmPredictValues(model, x, decValues); return(predResult); }
//TODO public static double Calc(BaseVector x, BaseVector y) { int n = x.Length; double sum = 0; int c = 0; for (int i = 0; i < n; i++) { double d = x[i] - y[i]; if (double.IsNaN(d)) { continue; } sum += d * d; c++; } return(c == 0 ? double.NaN : Math.Sqrt(sum / c * n)); }
public static int[] GetNeighborInds(IList <BaseVector> x, BaseVector xTest, int k, IDistance distance) { double[] d = CalcDistances(x, xTest, distance); int[] o = ArrayUtils.Order(d); List <int> result = new List <int>(); for (int i = 0; i < d.Length; i++) { if (!double.IsNaN(d[o[i]]) && !double.IsInfinity(d[o[i]])) { result.Add(o[i]); } if (result.Count >= k) { break; } } return(result.ToArray()); }
internal static string CheckInput(BaseVector[] x, int[][] y, int ngroups) { if (ngroups < 2){ return "Number of groups has to be at least two."; } foreach (int[] ints in y){ if (ints.Length == 0){ return "There are unassigned items"; } Array.Sort(ints); } int[] vals = ArrayUtils.UniqueValues(ArrayUtils.Concat(y)); for (int i = 0; i < vals.Length; i++){ if (vals[i] != i){ //return "At least one group has no training example."; } } return null; }
public static void CombineRows(this IMatrixData mdata, List <int> rowIdxs, Func <double[], double> combineNumeric, Func <string[], string> combineString, Func <string[][], string[]> combineCategory, Func <double[][], double[]> combineMultiNumeric) { if (!rowIdxs.Any()) { return; } int resultRow = rowIdxs[0]; for (int i = 0; i < mdata.Values.ColumnCount; i++) { BaseVector column = mdata.Values.GetColumn(i); BaseVector values = column.SubArray(rowIdxs); mdata.Values[resultRow, i] = combineNumeric(ArrayUtils.ToDoubles(values)); } for (int i = 0; i < mdata.NumericColumnCount; i++) { double[] column = mdata.NumericColumns[i]; double[] values = ArrayUtils.SubArray(column, rowIdxs); column[resultRow] = combineNumeric(values); } for (int i = 0; i < mdata.StringColumnCount; i++) { string[] column = mdata.StringColumns[i]; string[] values = ArrayUtils.SubArray(column, rowIdxs); column[resultRow] = combineString(values); } for (int i = 0; i < mdata.CategoryColumnCount; i++) { string[][] column = mdata.GetCategoryColumnAt(i); string[][] values = ArrayUtils.SubArray(column, rowIdxs); column[resultRow] = combineCategory(values); mdata.SetCategoryColumnAt(column, i); } for (int i = 0; i < mdata.MultiNumericColumnCount; i++) { double[][] column = mdata.MultiNumericColumns[i]; double[][] values = ArrayUtils.SubArray(column, rowIdxs); column[resultRow] = combineMultiNumeric(values); } }
public override double[] PredictStrength(BaseVector x) { if (models.Length == 1) { double[] result = new double[2]; double[] decVal = new double[1]; SvmMain.SvmPredictValues(models[0], x, decVal); result[0] = invert[0] ? -(float)decVal[0] : (float)decVal[0]; result[1] = -result[0]; return(result); } double[] result1 = new double[models.Length]; for (int i = 0; i < result1.Length; i++) { double[] decVal = new double[1]; SvmMain.SvmPredictValues(models[i], x, decVal); result1[i] = invert[i] ? -(float)decVal[0] : (float)decVal[0]; } return(result1); }
//TODO public static double Calc(BaseVector x, BaseVector y) { int n = x.Length; double max = double.MinValue; for (int i = 0; i < n; i++) { double d = x[i] - y[i]; if (double.IsNaN(d) || double.IsInfinity(d)) { continue; } double dist = Math.Abs(d); if (dist > max) { max = dist; } } return(max == double.MinValue ? double.NaN : max); }
//TODO public static double Calc(BaseVector x, BaseVector y) { int n = x.Length; List <int> valids = new List <int>(); for (int i = 0; i < n; i++) { double xx = x[i]; double yy = y[i]; if (double.IsNaN(xx) || double.IsNaN(yy) || double.IsInfinity(xx) || double.IsInfinity(yy)) { continue; } valids.Add(i); } if (valids.Count < 3) { return(double.NaN); } return(PearsonCorrelationDistance.Calc(ArrayUtils.Rank(x.SubArray(valids)), ArrayUtils.Rank(y.SubArray(valids)))); }
//TODO public static double Calc(BaseVector x, BaseVector y) { int n = x.Length; int c = 0; double sum = 0; for (int i = 0; i < n; i++) { double d = x[i] - y[i]; if (!double.IsNaN(d)) { sum += Math.Abs(d); c++; } } if (c == 0) { return(double.NaN); } return(sum / c * n); }
public override ClassificationModel Train(BaseVector[] x, int[][] y, int ngroups, Parameters param, int nthreads, Action<double> reportProgress) { string err = CheckInput(x, y, ngroups); if (err != null){ throw new Exception(err); } ParameterWithSubParams<int> kernelParam = param.GetParamWithSubParams<int>("Kernel"); SvmParameter sp = new SvmParameter{ kernelFunction = KernelFunctions.GetKernelFunction(kernelParam.Value, kernelParam.GetSubParameters()), svmType = SvmType.CSvc, c = param.GetParam<double>("C").Value }; bool[] invert; SvmProblem[] problems = CreateProblems(x, y, ngroups, out invert); SvmModel[] models = new SvmModel[problems.Length]; ThreadDistributor td = new ThreadDistributor(nthreads, models.Length, i => { models[i] = SvmMain.SvmTrain(problems[i], sp); }, fractionDone => { reportProgress?.Invoke(fractionDone); }); td.Start(); return new SvmClassificationModel(models, invert); }
private static BaseVector ToOneHotEncodingMixed(BaseVector x, int[] nominal, int len) { float[] r = new float[len]; int pos = 0; for (int i = 0; i < x.Length; i++) { int nom = nominal[i]; double w = x[i]; if (nom <= 2) { r[pos] = (float)w; pos++; } else { r[pos + (int)Math.Round(w)] = 1; pos += nom; } } return(new FloatArrayVector(r)); }
private static BaseVector ToOneHotEncodingBoolean(BaseVector x, int[] nominal, int len) { bool[] r = new bool[len]; int pos = 0; for (int i = 0; i < x.Length; i++) { int nom = nominal[i]; int w = (int)Math.Round(x[i]); if (nom == 2) { r[pos] = w == 1; pos++; } else { r[pos + w] = true; pos += nom; } } return(new BoolArrayVector(r)); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int ind = param.GetParam <int>("Column").Value; bool descending = param.GetParam <bool>("Descending").Value; if (ind < mdata.Values.ColumnCount) { BaseVector v = mdata.Values.GetColumn(ind); SortByValues(mdata, v.ToArray(), descending); } else if (ind < mdata.Values.ColumnCount + mdata.NumericColumnCount) { double[] v = mdata.NumericColumns[ind - mdata.ColumnCount]; SortByValues(mdata, v, descending); } else { string[] v = mdata.StringColumns[ind - mdata.ColumnCount - mdata.NumericColumnCount]; SortByValues(mdata, v, descending); } }
public static float SvmPredictProbability(SvmModel model, BaseVector x, double[] probEstimates) { if ((model.param.svmType == SvmType.CSvc || model.param.svmType == SvmType.NuSvc) && model.probA != null && model.probB != null) { int nrClass = model.nrClass; double[] decValues = new double[nrClass * (nrClass - 1) / 2]; SvmPredictValues(model, x, decValues); const double minProb = 1e-7; double[][] pairwiseProb = new double[nrClass][]; for (int m = 0; m < nrClass; m++) { pairwiseProb[m] = new double[nrClass]; } int k = 0; for (int i = 0; i < nrClass; i++) { for (int j = i + 1; j < nrClass; j++) { pairwiseProb[i][j] = Math.Min(Math.Max(SigmoidPredict(decValues[k], model.probA[k], model.probB[k]), minProb), 1 - minProb); pairwiseProb[j][i] = 1 - pairwiseProb[i][j]; k++; } } MulticlassProbability(nrClass, pairwiseProb, probEstimates); int probMaxIdx = 0; for (int i = 1; i < nrClass; i++) { if (probEstimates[i] > probEstimates[probMaxIdx]) { probMaxIdx = i; } } return(model.label[probMaxIdx]); } return(SvmPredict(model, x)); }
public override float Predict(BaseVector x) { return(SvmMain.SvmPredict(model, x)); }
public override float[] PredictStrength(BaseVector x) { return classifier.PredictStrength(x.SubArray(featureInds)); }
// // Interface functions // public static SvmModel SvmTrain(SvmProblem prob, SvmParameter param) { SvmModel model = new SvmModel { param = param }; if (param.svmType == SvmType.OneClass || param.svmType == SvmType.EpsilonSvr || param.svmType == SvmType.NuSvr) { // regression or one-class-svm model.nrClass = 2; model.label = null; model.nSv = null; model.probA = null; model.probB = null; model.svCoef = new double[1][]; if (param.probability && (param.svmType == SvmType.EpsilonSvr || param.svmType == SvmType.NuSvr)) { model.probA = new double[1]; model.probA[0] = SvmSvrProbability(prob, param); } DecisionFunction f = SvmTrainOne(prob, param, 0, 0); model.rho = new double[1]; model.rho[0] = f.rho; int nSv = 0; int i; for (i = 0; i < prob.Count; i++) { if (Math.Abs(f.alpha[i]) > 0) { ++nSv; } } model.l = nSv; model.sv = new BaseVector[nSv]; model.svCoef[0] = new double[nSv]; int j = 0; for (i = 0; i < prob.Count; i++) { if (Math.Abs(f.alpha[i]) > 0) { model.sv[j] = prob.x[i]; model.svCoef[0][j] = f.alpha[i]; ++j; } } } else { // classification int l = prob.Count; int[] tmpNrClass = new int[1]; int[][] tmpLabel = new int[1][]; int[][] tmpStart = new int[1][]; int[][] tmpCount = new int[1][]; int[] perm = new int[l]; // group training data of the same class SvmGroupClasses(prob, tmpNrClass, tmpLabel, tmpStart, tmpCount, perm); int nrClass = tmpNrClass[0]; int[] label = tmpLabel[0]; int[] start = tmpStart[0]; int[] count = tmpCount[0]; if (nrClass == 1) { Info("WARNING: training data in only one class. See README for details.\n"); } BaseVector[] x = new BaseVector[l]; int i; for (i = 0; i < l; i++) { x[i] = prob.x[perm[i]]; } // calculate weighted C double[] weightedC = new double[nrClass]; for (i = 0; i < nrClass; i++) { weightedC[i] = param.c; } for (i = 0; i < param.nrWeight; i++) { int j; for (j = 0; j < nrClass; j++) { if (param.weightLabel[i] == label[j]) { break; } } if (j == nrClass) { Info("WARNING: class label " + param.weightLabel[i] + " specified in weight is not found\n"); } else { weightedC[j] *= param.weight[i]; } } // train k*(k-1)/2 models bool[] nonzero = new bool[l]; for (i = 0; i < l; i++) { nonzero[i] = false; } DecisionFunction[] f = new DecisionFunction[nrClass * (nrClass - 1) / 2]; double[] probA = null, probB = null; if (param.probability) { probA = new double[nrClass * (nrClass - 1) / 2]; probB = new double[nrClass * (nrClass - 1) / 2]; } int p = 0; for (i = 0; i < nrClass; i++) { for (int j = i + 1; j < nrClass; j++) { int si = start[i], sj = start[j]; int ci = count[i], cj = count[j]; int c = ci + cj; SvmProblem subProb = new SvmProblem { x = new BaseVector[c], y = new float[c] }; int k; for (k = 0; k < ci; k++) { subProb.x[k] = x[si + k]; subProb.y[k] = +1; } for (k = 0; k < cj; k++) { subProb.x[ci + k] = x[sj + k]; subProb.y[ci + k] = -1; } if (param.probability) { double[] probAb = new double[2]; SvmBinarySvcProbability(subProb, param, weightedC[i], weightedC[j], probAb); probA[p] = probAb[0]; probB[p] = probAb[1]; } f[p] = SvmTrainOne(subProb, param, weightedC[i], weightedC[j]); for (k = 0; k < ci; k++) { if (!nonzero[si + k] && Math.Abs(f[p].alpha[k]) > 0) { nonzero[si + k] = true; } } for (k = 0; k < cj; k++) { if (!nonzero[sj + k] && Math.Abs(f[p].alpha[ci + k]) > 0) { nonzero[sj + k] = true; } } ++p; } } // build output model.nrClass = nrClass; model.label = new int[nrClass]; for (i = 0; i < nrClass; i++) { model.label[i] = label[i]; } model.rho = new double[nrClass * (nrClass - 1) / 2]; for (i = 0; i < nrClass * (nrClass - 1) / 2; i++) { model.rho[i] = f[i].rho; } if (param.probability) { model.probA = new double[nrClass * (nrClass - 1) / 2]; model.probB = new double[nrClass * (nrClass - 1) / 2]; for (i = 0; i < nrClass * (nrClass - 1) / 2; i++) { model.probA[i] = probA[i]; model.probB[i] = probB[i]; } } else { model.probA = null; model.probB = null; } int nnz = 0; int[] nzCount = new int[nrClass]; model.nSv = new int[nrClass]; for (i = 0; i < nrClass; i++) { int nSv = 0; for (int j = 0; j < count[i]; j++) { if (nonzero[start[i] + j]) { ++nSv; ++nnz; } } model.nSv[i] = nSv; nzCount[i] = nSv; } Info("Total nSV = " + nnz + "\n"); model.l = nnz; model.sv = new BaseVector[nnz]; p = 0; for (i = 0; i < l; i++) { if (nonzero[i]) { model.sv[p++] = x[i]; } } int[] nzStart = new int[nrClass]; nzStart[0] = 0; for (i = 1; i < nrClass; i++) { nzStart[i] = nzStart[i - 1] + nzCount[i - 1]; } model.svCoef = new double[nrClass - 1][]; for (i = 0; i < nrClass - 1; i++) { model.svCoef[i] = new double[nnz]; } p = 0; for (i = 0; i < nrClass; i++) { for (int j = i + 1; j < nrClass; j++) { // classifier (i,j): coefficients with // i are in sv_coef[j-1][nz_start[i]...], // j are in sv_coef[i][nz_start[j]...] int si = start[i]; int sj = start[j]; int ci = count[i]; int cj = count[j]; int q = nzStart[i]; int k; for (k = 0; k < ci; k++) { if (nonzero[si + k]) { model.svCoef[j - 1][q++] = f[p].alpha[k]; } } q = nzStart[j]; for (k = 0; k < cj; k++) { if (nonzero[sj + k]) { model.svCoef[i][q++] = f[p].alpha[ci + k]; } } ++p; } } } return(model); }
public override double SumSquaredDiffs(BaseVector y) { if (y is FloatArrayVector){ return SumSquaredDiffs((FloatArrayVector) y, this); } if (y is DoubleArrayVector){ return SumSquaredDiffs((DoubleArrayVector) y, this); } if (y is BoolArrayVector){ return SumSquaredDiffs((BoolArrayVector) y, this); } return SumSquaredDiffs(this, (SparseFloatVector) y); }
public RegressionModel Train(BaseVector[] x, float[] y, Parameters param, int ntheads) { int k = param.GetParam<int>("Number of neighbours").Value; IDistance distance = Distances.GetDistanceFunction(param); return new KnnRegressionModel(x, y, k, distance); }
public double Evaluate(BaseVector xi, BaseVector xj, double xSquarei, double xSquarej) { return Math.Tanh(Gamma*xi.Dot(xj) + Offset); }
public double Get(BaseVector x, BaseVector y) { return Calc(x, y); }