public void Train(ILabeledExampleCollection <LblT, SparseVector <double> > dataset) { Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null); Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null); Dispose(); int[] trainSet = new int[dataset.Count]; int[] labels = new int[dataset.Count]; int j = 0; foreach (LabeledExample <LblT, SparseVector <double> > lblEx in dataset) { SparseVector <double> vec = lblEx.Example; int[] idx = new int[vec.Count]; float[] val = new float[vec.Count]; for (int i = 0; i < vec.Count; i++) { idx[i] = vec.InnerIdx[i] + 1; // *** indices are 1-based in SvmLightLib val[i] = (float)vec.InnerDat[i]; // *** loss of precision (double -> float) } int lbl; if (!mLblToId.TryGetValue(lblEx.Label, out lbl)) { mLblToId.Add(lblEx.Label, lbl = mLblToId.Count + 1); // *** labels start with 1 in SvmLightLib mIdxToLbl.Add(lblEx.Label); } trainSet[j++] = SvmLightLib.NewFeatureVector(idx.Length, idx, val, lbl); } mModelId = SvmLightLib.TrainMulticlassModel(string.Format("-c {0} -e {1}", mC.ToString(CultureInfo.InvariantCulture), mEps.ToString(CultureInfo.InvariantCulture)), trainSet.Length, trainSet); // delete training vectors foreach (int vecIdx in trainSet) { SvmLightLib.DeleteFeatureVector(vecIdx); } }
public void Load(BinarySerializer reader) { Utils.ThrowException(reader == null ? new ArgumentNullException("reader") : null); Dispose(); // the following statements throw serialization-related exceptions mVerbosityLevel = (SvmLightVerbosityLevel)reader.ReadInt(); mC = reader.ReadDouble(); mBiasedHyperplane = reader.ReadBool(); mKernelType = (SvmLightKernelType)reader.ReadInt(); mKernelParamGamma = reader.ReadDouble(); mKernelParamD = reader.ReadDouble(); mKernelParamS = reader.ReadDouble(); mKernelParamC = reader.ReadDouble(); mBiasedCostFunction = reader.ReadBool(); mCustomParams = reader.ReadString(); mEps = reader.ReadDouble(); mMaxIter = reader.ReadInt(); mIdxToLbl.Load(reader); mLblCmp = reader.ReadObject <IEqualityComparer <LblT> >(); if (reader.ReadBool()) { SvmLightLib.ReadByteCallback rb = delegate() { return(reader.ReadByte()); }; try { mModelId = SvmLightLib.LoadModelBinCallback(rb); } catch (BadImageFormatException e) { string assemblyPath = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); throw new BadImageFormatException(e.Message + "\n assembly path: " + assemblyPath, e); } GC.KeepAlive(rb); } }
// *** ISerializable interface implementation *** public void Save(BinarySerializer writer) { Utils.ThrowException(writer == null ? new ArgumentNullException("writer") : null); // the following statements throw serialization-related exceptions writer.WriteInt((int)mVerbosityLevel); writer.WriteDouble(mC); writer.WriteBool(mBiasedHyperplane); writer.WriteInt((int)mKernelType); writer.WriteDouble(mKernelParamGamma); writer.WriteDouble(mKernelParamD); writer.WriteDouble(mKernelParamS); writer.WriteDouble(mKernelParamC); writer.WriteBool(mBiasedCostFunction); writer.WriteString(mCustomParams); writer.WriteDouble(mEps); writer.WriteInt(mMaxIter); mIdxToLbl.Save(writer); writer.WriteObject(mLblCmp); writer.WriteBool(mModelId != -1); if (mModelId != -1) { SvmLightLib.WriteByteCallback wb = delegate(byte b) { writer.WriteByte(b); }; SvmLightLib.SaveModelBinCallback(mModelId, wb); GC.KeepAlive(wb); } }
public void Load(BinarySerializer reader) { Utils.ThrowException(reader == null ? new ArgumentNullException("reader") : null); Dispose(); // the following statements throw serialization-related exceptions mVerbosityLevel = (SvmLightVerbosityLevel)reader.ReadInt(); mC = reader.ReadDouble(); mBiasedHyperplane = reader.ReadBool(); mKernelType = (SvmLightKernelType)reader.ReadInt(); mKernelParamGamma = reader.ReadDouble(); mKernelParamD = reader.ReadDouble(); mKernelParamS = reader.ReadDouble(); mKernelParamC = reader.ReadDouble(); mBiasedCostFunction = reader.ReadBool(); mCustomParams = reader.ReadString(); mEps = reader.ReadDouble(); mMaxIter = reader.ReadInt(); mIdxToLbl.Load(reader); mLblCmp = reader.ReadObject <IEqualityComparer <LblT> >(); if (reader.ReadBool()) { SvmLightLib.ReadByteCallback rb = delegate() { return(reader.ReadByte()); }; mModelId = SvmLightLib.LoadModelBinCallback(rb); GC.KeepAlive(rb); } }
public ArrayList <KeyDat <double, int> > RankFeatures() // Guyon et al. 2002 { Utils.ThrowException(mModelId == -1 ? new InvalidOperationException() : null); ArrayList <KeyDat <double, int> > result = new ArrayList <KeyDat <double, int> >(); if (mKernelType != SvmLightKernelType.Linear) { // any kernel int numFeat = SvmLightLib.GetFeatureCount(mModelId); double allFeat = 0.5 * ComputeCost(-1); for (int i = 0; i < numFeat; i++) { //Console.WriteLine("{0} / {1}", i + 1, numFeat); double featScore = Math.Abs(allFeat - 0.5 * ComputeCost(/*rmvFeatIdx=*/ i)); result.Add(new KeyDat <double, int>(featScore, i)); } } else { // linear kernel (fast) double[] w = GetLinearWeights(); for (int i = 0; i < w.Length; i++) { result.Add(new KeyDat <double, int>(0.5 * w[i] * w[i], i)); } } result.Sort(DescSort <KeyDat <double, int> > .Instance); return(result); }
public Prediction <LblT> Predict(SparseVector <double> example) { Utils.ThrowException(mModelId == -1 ? new InvalidOperationException() : null); Utils.ThrowException(example == null ? new ArgumentNullException("example") : null); Prediction <LblT> result = new Prediction <LblT>(); int[] idx = new int[example.Count]; float[] val = new float[example.Count]; for (int i = 0; i < example.Count; i++) { idx[i] = example.InnerIdx[i] + 1; // *** indices are 1-based in SvmLightLib val[i] = (float)example.InnerDat[i]; // *** loss of precision (double -> float) } int vecId = SvmLightLib.NewFeatureVector(idx.Length, idx, val, 0); SvmLightLib.MulticlassClassify(mModelId, 1, new int[] { vecId }); int n = SvmLightLib.GetFeatureVectorClassifScoreCount(vecId); for (int i = 0; i < n; i++) { double score = SvmLightLib.GetFeatureVectorClassifScore(vecId, i); LblT lbl = mIdxToLbl[i]; result.Inner.Add(new KeyDat <double, LblT>(score, lbl)); } result.Inner.Sort(DescSort <KeyDat <double, LblT> > .Instance); result.Trim(); SvmLightLib.DeleteFeatureVector(vecId); // delete feature vector return(result); }
public Prediction <LblT> Predict(SparseVector <double> example) { Utils.ThrowException(mModelId == -1 ? new InvalidOperationException() : null); Utils.ThrowException(example == null ? new ArgumentNullException("example") : null); Prediction <LblT> result = new Prediction <LblT>(); int[] idx = new int[example.Count]; float[] val = new float[example.Count]; for (int i = 0; i < example.Count; i++) { idx[i] = example.InnerIdx[i] + 1; val[i] = (float)example.InnerDat[i]; // *** cast to float } int vecId = SvmLightLib.NewFeatureVector(idx.Length, idx, val, 0); SvmLightLib.Classify(mModelId, 1, new int[] { vecId }); double score = SvmLightLib.GetFeatureVectorClassifScore(vecId, 0); LblT lbl = mIdxToLbl[score > 0 ? 0 : 1]; LblT otherLbl = mIdxToLbl[score > 0 ? 1 : 0]; result.Inner.Add(new KeyDat <double, LblT>(Math.Abs(score), lbl)); result.Inner.Add(new KeyDat <double, LblT>(-Math.Abs(score), otherLbl)); SvmLightLib.DeleteFeatureVector(vecId); // delete feature vector return(result); }
public void LoadModel(string fileName) { Utils.ThrowException(typeof(LblT) != typeof(int) ? new InvalidOperationException() : null); Utils.ThrowException(!Utils.VerifyFileNameOpen(fileName) ? new ArgumentValueException("fileName") : null); Dispose(); mIdxToLbl.Add((LblT)(object)1); mIdxToLbl.Add((LblT)(object)-1); mModelId = SvmLightLib.LoadModel(fileName); }
// *** IDisposable interface implementation *** public void Dispose() { if (mModelId != -1) { SvmLightLib.DeleteMulticlassModel(mModelId); mLblToId.Clear(); mIdxToLbl.Clear(); mModelId = -1; } }
// *** IDisposable interface implementation *** public void Dispose() { if (mModelId != -1) { SvmLightLib.DeleteModel(mModelId); mIdxToLbl.Clear(); mModelId = -1; mWeights = null; } }
private SparseVector <double> GetSupportVector(int idx) { int featCount = SvmLightLib.GetSupportVectorFeatureCount(mModelId, idx); SparseVector <double> vec = new SparseVector <double>(); for (int i = 0; i < featCount; i++) { vec.InnerIdx.Add(SvmLightLib.GetSupportVectorFeature(mModelId, idx, i)); vec.InnerDat.Add(SvmLightLib.GetSupportVectorWeight(mModelId, idx, i)); } return(vec); }
public ArrayList <IdxDat <double> > GetAlphas() // returns pairs (support vector index, alpha * y) { Utils.ThrowException(mModelId == -1 ? new InvalidOperationException() : null); ArrayList <IdxDat <double> > alphas = new ArrayList <IdxDat <double> >(); for (int i = 0; i < SvmLightLib.GetSupportVectorCount(mModelId); i++) { double alpha = SvmLightLib.GetSupportVectorAlpha(mModelId, i); int idx = SvmLightLib.GetSupportVectorIndex(mModelId, i); alphas.Add(new IdxDat <double>(idx, alpha)); } return(alphas); }
// *** ISerializable interface implementation *** public void Save(BinarySerializer writer) { Utils.ThrowException(writer == null ? new ArgumentNullException("writer") : null); // the following statements throw serialization-related exceptions writer.WriteDouble(mC); writer.WriteDouble(mEps); mIdxToLbl.Save(writer); writer.WriteObject(mLblCmp); writer.WriteBool(mModelId != -1); if (mModelId != -1) { SvmLightLib.WriteByteCallback wb = delegate(byte b) { writer.WriteByte(b); }; SvmLightLib.SaveMulticlassModelBinCallback(mModelId, wb); GC.KeepAlive(wb); } }
public void Train(ILabeledExampleCollection <LblT, SparseVector <double> > dataset) { Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null); Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null); Dispose(); int[] trainSet = new int[dataset.Count]; int[] labels = new int[dataset.Count]; Dictionary <LblT, int> lblToIdx = new Dictionary <LblT, int>(mLblCmp); MultiSet <int> lblCount = new MultiSet <int>(); int j = 0; foreach (LabeledExample <LblT, SparseVector <double> > lblEx in dataset) { SparseVector <double> vec = lblEx.Example; int[] idx = new int[vec.Count]; float[] val = new float[vec.Count]; for (int i = 0; i < vec.Count; i++) { idx[i] = vec.InnerIdx[i] + 1; val[i] = (float)vec.InnerDat[i]; // *** cast to float } int lbl; if (!lblToIdx.TryGetValue(lblEx.Label, out lbl)) { lblToIdx.Add(lblEx.Label, lbl = lblToIdx.Count); mIdxToLbl.Add(lblEx.Label); } Utils.ThrowException(lbl == 2 ? new ArgumentValueException("dataset") : null); trainSet[j++] = SvmLightLib.NewFeatureVector(idx.Length, idx, val, lbl == 0 ? 1 : -1); lblCount.Add(lbl == 0 ? 1 : -1); } string costFactor = ""; if (mBiasedCostFunction) { costFactor = "-j " + ((double)lblCount.GetCount(-1) / (double)lblCount.GetCount(1)); } mModelId = SvmLightLib.TrainModel(string.Format(CultureInfo.InvariantCulture, "-v {0} -c {1} -t {2} -g {3} -d {4} -s {5} -r {6} -b {7} -e {8} -# {9} {10} {11}", (int)mVerbosityLevel, mC, (int)mKernelType, mKernelParamGamma, mKernelParamD, mKernelParamS, mKernelParamC, mBiasedHyperplane ? 1 : 0, mEps, mMaxIter, mCustomParams, costFactor), trainSet.Length, trainSet); // delete training vectors foreach (int vecIdx in trainSet) { SvmLightLib.DeleteFeatureVector(vecIdx); } }
public double[] GetLinearWeights() { Utils.ThrowException(mModelId == -1 ? new InvalidOperationException() : null); Utils.ThrowException(mKernelType != SvmLightKernelType.Linear ? new InvalidOperationException() : null); if (mWeights != null) { return(mWeights); } int featureCount = SvmLightLib.GetFeatureCount(mModelId); double[] weights = new double[featureCount]; for (int i = 0; i < featureCount; i++) { weights[i] = SvmLightLib.GetLinearWeight(mModelId, i); } mWeights = weights; return(weights); }
public void Load(BinarySerializer reader) { Utils.ThrowException(reader == null ? new ArgumentNullException("reader") : null); Dispose(); // the following statements throw serialization-related exceptions mC = reader.ReadDouble(); mEps = reader.ReadDouble(); mIdxToLbl.Load(reader); for (int i = 0; i < mIdxToLbl.Count; i++) { mLblToId.Add(mIdxToLbl[i], i + 1); } mLblCmp = reader.ReadObject <IEqualityComparer <LblT> >(); if (reader.ReadBool()) { SvmLightLib.ReadByteCallback rb = delegate() { return(reader.ReadByte()); }; mModelId = SvmLightLib.LoadMulticlassModelBinCallback(rb); GC.KeepAlive(rb); } }
public void SaveModel(string fileName) { Utils.ThrowException(!Utils.VerifyFileNameCreate(fileName) ? new ArgumentValueException("fileName") : null); Utils.ThrowException(mModelId == -1 ? new InvalidOperationException() : null); SvmLightLib.SaveModel(mModelId, fileName); }
public double GetHyperplaneBias() { Utils.ThrowException(mModelId == -1 ? new InvalidOperationException() : null); return(SvmLightLib.GetHyperplaneBias(mModelId)); }
private double[][] GetKernel(int rmvFeatIdx) { int numSv = SvmLightLib.GetSupportVectorCount(mModelId); // initialize matrix double[][] kernel = new double[numSv][]; // compute linear kernel SparseMatrix <double> m = new SparseMatrix <double>(); for (int i = 0; i < numSv; i++) { SparseVector <double> sv = GetSupportVector(i); m[i] = sv; } if (rmvFeatIdx >= 0) { m.RemoveColAt(rmvFeatIdx); } SparseMatrix <double> mTr = m.GetTransposedCopy(); for (int i = 0; i < numSv; i++) { double[] innerProd = ModelUtils.GetDotProductSimilarity(mTr, numSv, m[i]); kernel[i] = innerProd; } // compute non-linear kernel switch (mKernelType) { case SvmLightKernelType.Polynomial: for (int row = 0; row < kernel.Length; row++) { for (int col = 0; col < kernel.Length; col++) { kernel[row][col] = Math.Pow(mKernelParamS * kernel[row][col] + mKernelParamC, mKernelParamD); } } break; case SvmLightKernelType.RadialBasisFunction: double[] diag = new double[kernel.Length]; for (int i = 0; i < kernel.Length; i++) { diag[i] = kernel[i][i]; } // save diagonal for (int row = 0; row < kernel.Length; row++) { for (int col = 0; col < kernel.Length; col++) { kernel[row][col] = Math.Exp(-mKernelParamGamma * (diag[row] + diag[col] - 2.0 * kernel[row][col])); } } break; case SvmLightKernelType.Sigmoid: for (int row = 0; row < kernel.Length; row++) { for (int col = 0; col < kernel.Length; col++) { kernel[row][col] = Math.Tanh(mKernelParamS * kernel[row][col] + mKernelParamC); } } break; } return(kernel); }