private static SparseMatrix <double> CreateObservationMatrix <LblT>(IExampleCollection <LblT, BinaryVector <int> .ReadOnly> dataset, ref LblT[] idx_to_lbl) { SparseMatrix <double> mtx = new SparseMatrix <double>(); ArrayList <LblT> tmp = new ArrayList <LblT>(); Dictionary <LblT, int> lbl_to_idx = new Dictionary <LblT, int>(); foreach (LabeledExample <LblT, BinaryVector <int> .ReadOnly> labeled_example in dataset) { if (!lbl_to_idx.ContainsKey(labeled_example.Label)) { lbl_to_idx.Add(labeled_example.Label, lbl_to_idx.Count); tmp.Add(labeled_example.Label); } } int i = 0; foreach (LabeledExample <LblT, BinaryVector <int> .ReadOnly> labeled_example in dataset) { Utils.Verbose("{0} / {1}\r", ++i, dataset.Count); int lbl_idx = lbl_to_idx[labeled_example.Label]; if (!mtx.ContainsRowAt(lbl_idx)) { mtx[lbl_idx] = ModelUtils.ConvertExample <SparseVector <double> >(labeled_example.Example); } else { SparseVector <double> new_vec = ModelUtils.ConvertExample <SparseVector <double> >(labeled_example.Example); new_vec.Merge(mtx[lbl_idx], new SumOperator()); mtx[lbl_idx] = new_vec; } } Utils.VerboseLine(""); idx_to_lbl = tmp.ToArray(); return(mtx); }
public static ClassifierResult <LblT> Classify <LblT>(BinaryVector <int> .ReadOnly bin_vec, SparseMatrix <double> .ReadOnly lambdas, LblT[] idx_to_lbl) { DotProductSimilarity dot_prod = new DotProductSimilarity(); SparseVector <double> vec = ModelUtils.ConvertExample <SparseVector <double> >(bin_vec); ArrayList <KeyDat <double, LblT> > scores = new ArrayList <KeyDat <double, LblT> >(); foreach (IdxDat <SparseVector <double> .ReadOnly> row in lambdas) { double score = Math.Exp(dot_prod.GetSimilarity(row.Dat, vec)); scores.Add(new KeyDat <double, LblT>(score, idx_to_lbl[row.Idx])); } return(new ClassifierResult <LblT>(scores)); // *** for some reason, the code below is slower than the one currently in use /*ClassifierResult<LblT> classifier_result = new ClassifierResult<LblT>(); * foreach (IdxDat<SparseVector<double>.ReadOnly> row in lambdas) * { * int i = 0, j = 0; * int a_count = bin_vec.Count; * int b_count = row.Dat.Count; * double dot_prod = 0; * List<int> a_idx = bin_vec.Inner.Inner; * ArrayList<int> b_idx = row.Dat.Inner.InnerIdx; * ArrayList<double> b_dat = row.Dat.Inner.InnerDat; * int a_idx_i = a_idx[0]; * int b_idx_j = b_idx[0]; * while (true) * { * if (a_idx_i < b_idx_j) * { * if (++i == a_count) { break; } * a_idx_i = a_idx[i]; * } * else if (a_idx_i > b_idx_j) * { * if (++j == b_count) { break; } * b_idx_j = b_idx[j]; * } * else * { * dot_prod += b_dat[j]; * if (++i == a_count || ++j == b_count) { break; } * a_idx_i = a_idx[i]; * b_idx_j = b_idx[j]; * } * } * double score = Math.Exp(dot_prod); * classifier_result.Inner.Add(new KeyDat<double, LblT>(score, idx_to_lbl[row.Idx])); * } * classifier_result.Inner.Sort(new DescSort<KeyDat<double, LblT>>()); * return classifier_result;*/ }
public IUnlabeledDataset ConvertDataset(Type newExType, bool move) { Utils.ThrowException(newExType == null ? new ArgumentNullException("newExType") : null); Utils.ThrowException(move && typeof(ExT).IsValueType ? new ArgumentValueException("newExType") : null); IUnlabeledDataset newDataset = null; ArrayList <object> tmp = new ArrayList <object>(mItems.Count); for (int i = 0; i < mItems.Count; i++) { tmp.Add(ModelUtils.ConvertExample(mItems[i], newExType)); // throws ArgumentValueException if (move) { mItems[i] = default(ExT); } // *** this is guaranteed to be null by the second assertion } if (move) { mItems.Clear(); } if (newExType == typeof(SparseVector <double>)) { newDataset = new UnlabeledDataset <SparseVector <double> >(tmp); } else if (newExType == typeof(SparseVector <double> .ReadOnly)) { newDataset = new UnlabeledDataset <SparseVector <double> .ReadOnly>(tmp); } else if (newExType == typeof(BinaryVector)) { newDataset = new UnlabeledDataset <BinaryVector>(tmp); } else if (newExType == typeof(BinaryVector.ReadOnly)) { newDataset = new UnlabeledDataset <BinaryVector.ReadOnly>(tmp); } else { throw new ArgumentNotSupportedException("newExType"); } return(newDataset); }
public ILabeledDataset <LblT> ConvertDataset(Type newExType, bool move) { Utils.ThrowException(newExType == null ? new ArgumentNullException("newExType") : null); ILabeledDataset <LblT> newDataset = null; ArrayList <LabeledExample <LblT, object> > tmp = new ArrayList <LabeledExample <LblT, object> >(mItems.Count); for (int i = 0; i < mItems.Count; i++) { tmp.Add(new LabeledExample <LblT, object>(mItems[i].Label, ModelUtils.ConvertExample(mItems[i].Example, newExType))); // throws ArgumentValueException if (move) { mItems[i] = null; } } if (move) { mItems.Clear(); } if (newExType == typeof(SparseVector <double>)) { newDataset = new LabeledDataset <LblT, SparseVector <double> >(tmp); } else if (newExType == typeof(SparseVector <double> .ReadOnly)) { newDataset = new LabeledDataset <LblT, SparseVector <double> .ReadOnly>(tmp); } else if (newExType == typeof(BinaryVector)) { newDataset = new LabeledDataset <LblT, BinaryVector>(tmp); } else if (newExType == typeof(BinaryVector.ReadOnly)) { newDataset = new LabeledDataset <LblT, BinaryVector.ReadOnly>(tmp); } else { throw new ArgumentNotSupportedException("newExType"); } return(newDataset); }
private static SparseMatrix <double> TransposeDataset <LblT>(ILabeledExampleCollection <LblT, BinaryVector> dataset, bool clearDataset) { SparseMatrix <double> aux = new SparseMatrix <double>(); int i = 0; if (clearDataset) { foreach (LabeledExample <LblT, BinaryVector> item in dataset) { aux[i++] = ModelUtils.ConvertExample <SparseVector <double> >(item.Example); item.Example.Clear(); } } else { foreach (LabeledExample <LblT, BinaryVector> item in dataset) { aux[i++] = ModelUtils.ConvertExample <SparseVector <double> >(item.Example); } } return(aux.GetTransposedCopy()); }
private static SparseMatrix <double> TransposeDataset <LblT>(IExampleCollection <LblT, BinaryVector <int> .ReadOnly> dataset, bool clear_dataset) { SparseMatrix <double> aux = new SparseMatrix <double>(); int i = 0; if (clear_dataset) { foreach (LabeledExample <LblT, BinaryVector <int> .ReadOnly> item in dataset) { aux[i++] = ModelUtils.ConvertExample <SparseVector <double> >(item.Example); item.Example.Inner.Clear(); // *** clear read-only vectors to save space } } else { foreach (LabeledExample <LblT, BinaryVector <int> .ReadOnly> item in dataset) { aux[i++] = ModelUtils.ConvertExample <SparseVector <double> >(item.Example); } } return(aux.GetTransposedCopy()); }
public static Prediction <LblT> Classify <LblT>(BinaryVector binVec, SparseMatrix <double> .ReadOnly lambdas, LblT[] idxToLbl, bool normalize) { SparseVector <double> vec = ModelUtils.ConvertExample <SparseVector <double> >(binVec); Prediction <LblT> scores = new Prediction <LblT>(); double sum = 0; foreach (IdxDat <SparseVector <double> .ReadOnly> row in lambdas) { double score = Math.Exp(DotProductSimilarity.Instance.GetSimilarity(row.Dat, vec)); scores.Inner.Add(new KeyDat <double, LblT>(score, idxToLbl[row.Idx])); sum += score; } if (normalize && sum > 0) { for (int i = 0; i < scores.Count; i++) { KeyDat <double, LblT> score = scores[i]; scores.Inner[i] = new KeyDat <double, LblT>(score.Key / sum, score.Dat); } } scores.Inner.Sort(DescSort <KeyDat <double, LblT> > .Instance); return(scores); }
public IDataset <LblT> ConvertDataset(Type new_ex_type, bool move) { Utils.ThrowException(new_ex_type == null ? new ArgumentNullException("new_ex_type") : null); if (new_ex_type == typeof(SparseVector <double>)) { Dataset <LblT, SparseVector <double> > new_dataset = new Dataset <LblT, SparseVector <double> >(); for (int i = 0; i < m_items.Count; i++) { LabeledExample <LblT, ExT> example = m_items[i]; new_dataset.Add(example.Label, ModelUtils.ConvertExample <SparseVector <double> >(example.Example)); if (move) { m_items[i] = new LabeledExample <LblT, ExT>(); } } if (move) { m_items.Clear(); } return(new_dataset); } else if (new_ex_type == typeof(SparseVector <double> .ReadOnly)) { Dataset <LblT, SparseVector <double> .ReadOnly> new_dataset = new Dataset <LblT, SparseVector <double> .ReadOnly>(); for (int i = 0; i < m_items.Count; i++) { LabeledExample <LblT, ExT> example = m_items[i]; new_dataset.Add(example.Label, ModelUtils.ConvertExample <SparseVector <double> .ReadOnly>(example.Example)); if (move) { m_items[i] = new LabeledExample <LblT, ExT>(); } } if (move) { m_items.Clear(); } return(new_dataset); } else if (new_ex_type == typeof(BinaryVector <int>)) { Dataset <LblT, BinaryVector <int> > new_dataset = new Dataset <LblT, BinaryVector <int> >(); for (int i = 0; i < m_items.Count; i++) { LabeledExample <LblT, ExT> example = m_items[i]; new_dataset.Add(example.Label, ModelUtils.ConvertExample <BinaryVector <int> >(example.Example)); if (move) { m_items[i] = new LabeledExample <LblT, ExT>(); } } if (move) { m_items.Clear(); } return(new_dataset); } else if (new_ex_type == typeof(BinaryVector <int> .ReadOnly)) { Dataset <LblT, BinaryVector <int> .ReadOnly> new_dataset = new Dataset <LblT, BinaryVector <int> .ReadOnly>(); for (int i = 0; i < m_items.Count; i++) { LabeledExample <LblT, ExT> example = m_items[i]; new_dataset.Add(example.Label, ModelUtils.ConvertExample <BinaryVector <int> .ReadOnly>(example.Example)); if (move) { m_items[i] = new LabeledExample <LblT, ExT>(); } } if (move) { m_items.Clear(); } return(new_dataset); } //else if (new_ex_type == typeof(SvmFeatureVector)) //{ // Dataset<LblT, SvmFeatureVector> new_dataset = new Dataset<LblT, SvmFeatureVector>(); // for (int i = 0; i < m_items.Count; i++) // { // LabeledExample<LblT, ExT> example = m_items[i]; // new_dataset.Add(example.Label, ModelUtils.ConvertVector<SvmFeatureVector>(example.Example)); // if (move) { m_items[i] = new LabeledExample<LblT, ExT>(); } // } // if (move) { m_items.Clear(); } // return new_dataset; //} else { throw new ArgumentNotSupportedException("new_ex_type"); } }