예제 #1
0
        private static SparseMatrix <double> CreateObservationMatrix <LblT>(IExampleCollection <LblT, BinaryVector <int> .ReadOnly> dataset, ref LblT[] idx_to_lbl)
        {
            SparseMatrix <double>  mtx        = new SparseMatrix <double>();
            ArrayList <LblT>       tmp        = new ArrayList <LblT>();
            Dictionary <LblT, int> lbl_to_idx = new Dictionary <LblT, int>();

            foreach (LabeledExample <LblT, BinaryVector <int> .ReadOnly> labeled_example in dataset)
            {
                if (!lbl_to_idx.ContainsKey(labeled_example.Label))
                {
                    lbl_to_idx.Add(labeled_example.Label, lbl_to_idx.Count);
                    tmp.Add(labeled_example.Label);
                }
            }
            int i = 0;

            foreach (LabeledExample <LblT, BinaryVector <int> .ReadOnly> labeled_example in dataset)
            {
                Utils.Verbose("{0} / {1}\r", ++i, dataset.Count);
                int lbl_idx = lbl_to_idx[labeled_example.Label];
                if (!mtx.ContainsRowAt(lbl_idx))
                {
                    mtx[lbl_idx] = ModelUtils.ConvertExample <SparseVector <double> >(labeled_example.Example);
                }
                else
                {
                    SparseVector <double> new_vec = ModelUtils.ConvertExample <SparseVector <double> >(labeled_example.Example);
                    new_vec.Merge(mtx[lbl_idx], new SumOperator());
                    mtx[lbl_idx] = new_vec;
                }
            }
            Utils.VerboseLine("");
            idx_to_lbl = tmp.ToArray();
            return(mtx);
        }
예제 #2
0
        public static ClassifierResult <LblT> Classify <LblT>(BinaryVector <int> .ReadOnly bin_vec, SparseMatrix <double> .ReadOnly lambdas, LblT[] idx_to_lbl)
        {
            DotProductSimilarity  dot_prod            = new DotProductSimilarity();
            SparseVector <double> vec                 = ModelUtils.ConvertExample <SparseVector <double> >(bin_vec);
            ArrayList <KeyDat <double, LblT> > scores = new ArrayList <KeyDat <double, LblT> >();

            foreach (IdxDat <SparseVector <double> .ReadOnly> row in lambdas)
            {
                double score = Math.Exp(dot_prod.GetSimilarity(row.Dat, vec));
                scores.Add(new KeyDat <double, LblT>(score, idx_to_lbl[row.Idx]));
            }
            return(new ClassifierResult <LblT>(scores));
            // *** for some reason, the code below is slower than the one currently in use

            /*ClassifierResult<LblT> classifier_result = new ClassifierResult<LblT>();
             * foreach (IdxDat<SparseVector<double>.ReadOnly> row in lambdas)
             * {
             *  int i = 0, j = 0;
             *  int a_count = bin_vec.Count;
             *  int b_count = row.Dat.Count;
             *  double dot_prod = 0;
             *  List<int> a_idx = bin_vec.Inner.Inner;
             *  ArrayList<int> b_idx = row.Dat.Inner.InnerIdx;
             *  ArrayList<double> b_dat = row.Dat.Inner.InnerDat;
             *  int a_idx_i = a_idx[0];
             *  int b_idx_j = b_idx[0];
             *  while (true)
             *  {
             *      if (a_idx_i < b_idx_j)
             *      {
             *          if (++i == a_count) { break; }
             *          a_idx_i = a_idx[i];
             *      }
             *      else if (a_idx_i > b_idx_j)
             *      {
             *          if (++j == b_count) { break; }
             *          b_idx_j = b_idx[j];
             *      }
             *      else
             *      {
             *          dot_prod += b_dat[j];
             *          if (++i == a_count || ++j == b_count) { break; }
             *          a_idx_i = a_idx[i];
             *          b_idx_j = b_idx[j];
             *      }
             *  }
             *  double score = Math.Exp(dot_prod);
             *  classifier_result.Inner.Add(new KeyDat<double, LblT>(score, idx_to_lbl[row.Idx]));
             * }
             * classifier_result.Inner.Sort(new DescSort<KeyDat<double, LblT>>());
             * return classifier_result;*/
        }
예제 #3
0
        public IUnlabeledDataset ConvertDataset(Type newExType, bool move)
        {
            Utils.ThrowException(newExType == null ? new ArgumentNullException("newExType") : null);
            Utils.ThrowException(move && typeof(ExT).IsValueType ? new ArgumentValueException("newExType") : null);
            IUnlabeledDataset  newDataset = null;
            ArrayList <object> tmp        = new ArrayList <object>(mItems.Count);

            for (int i = 0; i < mItems.Count; i++)
            {
                tmp.Add(ModelUtils.ConvertExample(mItems[i], newExType)); // throws ArgumentValueException
                if (move)
                {
                    mItems[i] = default(ExT);
                }                                       // *** this is guaranteed to be null by the second assertion
            }
            if (move)
            {
                mItems.Clear();
            }
            if (newExType == typeof(SparseVector <double>))
            {
                newDataset = new UnlabeledDataset <SparseVector <double> >(tmp);
            }
            else if (newExType == typeof(SparseVector <double> .ReadOnly))
            {
                newDataset = new UnlabeledDataset <SparseVector <double> .ReadOnly>(tmp);
            }
            else if (newExType == typeof(BinaryVector))
            {
                newDataset = new UnlabeledDataset <BinaryVector>(tmp);
            }
            else if (newExType == typeof(BinaryVector.ReadOnly))
            {
                newDataset = new UnlabeledDataset <BinaryVector.ReadOnly>(tmp);
            }
            else
            {
                throw new ArgumentNotSupportedException("newExType");
            }
            return(newDataset);
        }
예제 #4
0
        public ILabeledDataset <LblT> ConvertDataset(Type newExType, bool move)
        {
            Utils.ThrowException(newExType == null ? new ArgumentNullException("newExType") : null);
            ILabeledDataset <LblT> newDataset = null;
            ArrayList <LabeledExample <LblT, object> > tmp = new ArrayList <LabeledExample <LblT, object> >(mItems.Count);

            for (int i = 0; i < mItems.Count; i++)
            {
                tmp.Add(new LabeledExample <LblT, object>(mItems[i].Label, ModelUtils.ConvertExample(mItems[i].Example, newExType))); // throws ArgumentValueException
                if (move)
                {
                    mItems[i] = null;
                }
            }
            if (move)
            {
                mItems.Clear();
            }
            if (newExType == typeof(SparseVector <double>))
            {
                newDataset = new LabeledDataset <LblT, SparseVector <double> >(tmp);
            }
            else if (newExType == typeof(SparseVector <double> .ReadOnly))
            {
                newDataset = new LabeledDataset <LblT, SparseVector <double> .ReadOnly>(tmp);
            }
            else if (newExType == typeof(BinaryVector))
            {
                newDataset = new LabeledDataset <LblT, BinaryVector>(tmp);
            }
            else if (newExType == typeof(BinaryVector.ReadOnly))
            {
                newDataset = new LabeledDataset <LblT, BinaryVector.ReadOnly>(tmp);
            }
            else
            {
                throw new ArgumentNotSupportedException("newExType");
            }
            return(newDataset);
        }
예제 #5
0
파일: MaxEnt.cs 프로젝트: mgrcar/Detextive
        private static SparseMatrix <double> TransposeDataset <LblT>(ILabeledExampleCollection <LblT, BinaryVector> dataset, bool clearDataset)
        {
            SparseMatrix <double> aux = new SparseMatrix <double>();
            int i = 0;

            if (clearDataset)
            {
                foreach (LabeledExample <LblT, BinaryVector> item in dataset)
                {
                    aux[i++] = ModelUtils.ConvertExample <SparseVector <double> >(item.Example);
                    item.Example.Clear();
                }
            }
            else
            {
                foreach (LabeledExample <LblT, BinaryVector> item in dataset)
                {
                    aux[i++] = ModelUtils.ConvertExample <SparseVector <double> >(item.Example);
                }
            }
            return(aux.GetTransposedCopy());
        }
예제 #6
0
        private static SparseMatrix <double> TransposeDataset <LblT>(IExampleCollection <LblT, BinaryVector <int> .ReadOnly> dataset, bool clear_dataset)
        {
            SparseMatrix <double> aux = new SparseMatrix <double>();
            int i = 0;

            if (clear_dataset)
            {
                foreach (LabeledExample <LblT, BinaryVector <int> .ReadOnly> item in dataset)
                {
                    aux[i++] = ModelUtils.ConvertExample <SparseVector <double> >(item.Example);
                    item.Example.Inner.Clear(); // *** clear read-only vectors to save space
                }
            }
            else
            {
                foreach (LabeledExample <LblT, BinaryVector <int> .ReadOnly> item in dataset)
                {
                    aux[i++] = ModelUtils.ConvertExample <SparseVector <double> >(item.Example);
                }
            }
            return(aux.GetTransposedCopy());
        }
예제 #7
0
파일: MaxEnt.cs 프로젝트: mgrcar/Detextive
        public static Prediction <LblT> Classify <LblT>(BinaryVector binVec, SparseMatrix <double> .ReadOnly lambdas, LblT[] idxToLbl, bool normalize)
        {
            SparseVector <double> vec    = ModelUtils.ConvertExample <SparseVector <double> >(binVec);
            Prediction <LblT>     scores = new Prediction <LblT>();
            double sum = 0;

            foreach (IdxDat <SparseVector <double> .ReadOnly> row in lambdas)
            {
                double score = Math.Exp(DotProductSimilarity.Instance.GetSimilarity(row.Dat, vec));
                scores.Inner.Add(new KeyDat <double, LblT>(score, idxToLbl[row.Idx]));
                sum += score;
            }
            if (normalize && sum > 0)
            {
                for (int i = 0; i < scores.Count; i++)
                {
                    KeyDat <double, LblT> score = scores[i];
                    scores.Inner[i] = new KeyDat <double, LblT>(score.Key / sum, score.Dat);
                }
            }
            scores.Inner.Sort(DescSort <KeyDat <double, LblT> > .Instance);
            return(scores);
        }
예제 #8
0
파일: Dataset.cs 프로젝트: mgrcar/Detextive
 public IDataset <LblT> ConvertDataset(Type new_ex_type, bool move)
 {
     Utils.ThrowException(new_ex_type == null ? new ArgumentNullException("new_ex_type") : null);
     if (new_ex_type == typeof(SparseVector <double>))
     {
         Dataset <LblT, SparseVector <double> > new_dataset = new Dataset <LblT, SparseVector <double> >();
         for (int i = 0; i < m_items.Count; i++)
         {
             LabeledExample <LblT, ExT> example = m_items[i];
             new_dataset.Add(example.Label, ModelUtils.ConvertExample <SparseVector <double> >(example.Example));
             if (move)
             {
                 m_items[i] = new LabeledExample <LblT, ExT>();
             }
         }
         if (move)
         {
             m_items.Clear();
         }
         return(new_dataset);
     }
     else if (new_ex_type == typeof(SparseVector <double> .ReadOnly))
     {
         Dataset <LblT, SparseVector <double> .ReadOnly> new_dataset = new Dataset <LblT, SparseVector <double> .ReadOnly>();
         for (int i = 0; i < m_items.Count; i++)
         {
             LabeledExample <LblT, ExT> example = m_items[i];
             new_dataset.Add(example.Label, ModelUtils.ConvertExample <SparseVector <double> .ReadOnly>(example.Example));
             if (move)
             {
                 m_items[i] = new LabeledExample <LblT, ExT>();
             }
         }
         if (move)
         {
             m_items.Clear();
         }
         return(new_dataset);
     }
     else if (new_ex_type == typeof(BinaryVector <int>))
     {
         Dataset <LblT, BinaryVector <int> > new_dataset = new Dataset <LblT, BinaryVector <int> >();
         for (int i = 0; i < m_items.Count; i++)
         {
             LabeledExample <LblT, ExT> example = m_items[i];
             new_dataset.Add(example.Label, ModelUtils.ConvertExample <BinaryVector <int> >(example.Example));
             if (move)
             {
                 m_items[i] = new LabeledExample <LblT, ExT>();
             }
         }
         if (move)
         {
             m_items.Clear();
         }
         return(new_dataset);
     }
     else if (new_ex_type == typeof(BinaryVector <int> .ReadOnly))
     {
         Dataset <LblT, BinaryVector <int> .ReadOnly> new_dataset = new Dataset <LblT, BinaryVector <int> .ReadOnly>();
         for (int i = 0; i < m_items.Count; i++)
         {
             LabeledExample <LblT, ExT> example = m_items[i];
             new_dataset.Add(example.Label, ModelUtils.ConvertExample <BinaryVector <int> .ReadOnly>(example.Example));
             if (move)
             {
                 m_items[i] = new LabeledExample <LblT, ExT>();
             }
         }
         if (move)
         {
             m_items.Clear();
         }
         return(new_dataset);
     }
     //else if (new_ex_type == typeof(SvmFeatureVector))
     //{
     //    Dataset<LblT, SvmFeatureVector> new_dataset = new Dataset<LblT, SvmFeatureVector>();
     //    for (int i = 0; i < m_items.Count; i++)
     //    {
     //        LabeledExample<LblT, ExT> example = m_items[i];
     //        new_dataset.Add(example.Label, ModelUtils.ConvertVector<SvmFeatureVector>(example.Example));
     //        if (move) { m_items[i] = new LabeledExample<LblT, ExT>(); }
     //    }
     //    if (move) { m_items.Clear(); }
     //    return new_dataset;
     //}
     else
     {
         throw new ArgumentNotSupportedException("new_ex_type");
     }
 }