Esempio n. 1
0
        public static SparseVector <double> ComputeCentroid(IEnumerable <int> vecIdxList, IUnlabeledExampleCollection <SparseVector <double> > dataset, CentroidType type)
        {
            Utils.ThrowException(vecIdxList == null ? new ArgumentNullException("vecIdxList") : null);
            Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
            Dictionary <int, double> tmp = new Dictionary <int, double>();
            int vecCount = 0;

            foreach (int vecIdx in vecIdxList)
            {
                Utils.ThrowException((vecIdx < 0 || vecIdx >= dataset.Count) ? new ArgumentValueException("vecIdxList") : null);
                SparseVector <double> vec = dataset[vecIdx];
                foreach (IdxDat <double> item in vec)
                {
                    if (tmp.ContainsKey(item.Idx))
                    {
                        tmp[item.Idx] += item.Dat;
                    }
                    else
                    {
                        tmp.Add(item.Idx, item.Dat);
                    }
                }
                vecCount++;
            }
            //Utils.ThrowException(vecCount == 0 ? new ArgumentValueException("vecIdxList") : null);
            if (vecCount == 0)
            {
                return(new SparseVector <double>());
            }
            SparseVector <double> centroid = new SparseVector <double>();

            switch (type)
            {
            case CentroidType.Sum:
                foreach (KeyValuePair <int, double> item in tmp)
                {
                    centroid.InnerIdx.Add(item.Key);
                    centroid.InnerDat.Add(item.Value);
                }
                break;

            case CentroidType.Avg:
                foreach (KeyValuePair <int, double> item in tmp)
                {
                    centroid.InnerIdx.Add(item.Key);
                    centroid.InnerDat.Add(item.Value / (double)vecCount);
                }
                break;

            case CentroidType.NrmL2:
                double vecLen = 0;
                foreach (KeyValuePair <int, double> item in tmp)
                {
                    vecLen += item.Value * item.Value;
                }
                //Utils.ThrowException(vecLen == 0 ? new InvalidOperationException() : null);
                vecLen = Math.Sqrt(vecLen);
                if (vecLen > 0)
                {
                    foreach (KeyValuePair <int, double> item in tmp)
                    {
                        centroid.InnerIdx.Add(item.Key);
                        centroid.InnerDat.Add(item.Value / vecLen);
                    }
                }
                break;
            }
            centroid.Sort();
            return(centroid);
        }
Esempio n. 2
0
        public static SparseVector <double> ComputeCentroidWgt(IEnumerable <Pair <double, SparseVector <double> > > wgtVecList, CentroidType type)
        {
            Utils.ThrowException(wgtVecList == null ? new ArgumentNullException("wgtVecList") : null);
            Dictionary <int, double> tmp = new Dictionary <int, double>();
            double wgtSum = 0;

            foreach (Pair <double, SparseVector <double> > wgtVec in wgtVecList)
            {
                Utils.ThrowException(wgtVec.First < 0 || wgtVec.Second == null ? new ArgumentValueException("wgtVecList") : null);
                foreach (IdxDat <double> item in wgtVec.Second)
                {
                    if (tmp.ContainsKey(item.Idx))
                    {
                        tmp[item.Idx] += wgtVec.First * item.Dat;
                    }
                    else
                    {
                        tmp.Add(item.Idx, wgtVec.First * item.Dat);
                    }
                }
                wgtSum += wgtVec.First;
            }
            if (wgtSum == 0)
            {
                return(new SparseVector <double>());
            }
            SparseVector <double> centroid = new SparseVector <double>();

            switch (type)
            {
            case CentroidType.Sum:
                foreach (KeyValuePair <int, double> item in tmp)
                {
                    centroid.InnerIdx.Add(item.Key);
                    centroid.InnerDat.Add(item.Value);
                }
                break;

            case CentroidType.Avg:
                foreach (KeyValuePair <int, double> item in tmp)
                {
                    centroid.InnerIdx.Add(item.Key);
                    centroid.InnerDat.Add(item.Value / wgtSum);
                }
                break;

            case CentroidType.NrmL2:
                double vecLen = 0;
                foreach (KeyValuePair <int, double> item in tmp)
                {
                    vecLen += item.Value * item.Value;
                }
                //Utils.ThrowException(vecLen == 0 ? new InvalidOperationException() : null);
                vecLen = Math.Sqrt(vecLen);
                if (vecLen > 0)
                {
                    foreach (KeyValuePair <int, double> item in tmp)
                    {
                        centroid.InnerIdx.Add(item.Key);
                        centroid.InnerDat.Add(item.Value / vecLen);
                    }
                }
                break;
            }
            centroid.Sort();
            return(centroid);
        }
Esempio n. 3
0
        public SparseVector <double> .ReadOnly ComputeCentroid <LblT>(IExampleCollection <LblT, SparseVector <double> .ReadOnly> dataset, CentroidType type)
        {
            Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
            Dictionary <int, double> tmp = new Dictionary <int, double>();
            double wgt_sum = 0;

            foreach (Pair <double, int> wgt_vec in m_items)
            {
                Utils.ThrowException((wgt_vec.Second < 0 || wgt_vec.Second >= dataset.Count) ? new IndexOutOfRangeException("Items (dataset index)") : null);
                foreach (IdxDat <double> item in dataset[wgt_vec.Second].Example)
                {
                    if (tmp.ContainsKey(item.Idx))
                    {
                        tmp[item.Idx] += wgt_vec.First * item.Dat;
                    }
                    else
                    {
                        tmp.Add(item.Idx, wgt_vec.First * item.Dat);
                    }
                }
                wgt_sum += wgt_vec.First;
            }
            Utils.ThrowException(wgt_sum == 0 ? new ArgumentValueException("Items (weights)") : null);
            SparseVector <double> centroid = new SparseVector <double>();

            switch (type)
            {
            case CentroidType.Sum:
                foreach (KeyValuePair <int, double> item in tmp)
                {
                    centroid.InnerIdx.Add(item.Key);
                    centroid.InnerDat.Add(item.Value);
                }
                break;

            case CentroidType.Avg:
                foreach (KeyValuePair <int, double> item in tmp)
                {
                    centroid.InnerIdx.Add(item.Key);
                    centroid.InnerDat.Add(item.Value / wgt_sum);
                }
                break;

            case CentroidType.NrmL2:
                double vec_len = 0;
                foreach (KeyValuePair <int, double> item in tmp)
                {
                    vec_len += item.Value * item.Value;
                }
                Utils.ThrowException(vec_len == 0 ? new InvalidOperationException() : null);
                vec_len = Math.Sqrt(vec_len);
                foreach (KeyValuePair <int, double> item in tmp)
                {
                    centroid.InnerIdx.Add(item.Key);
                    centroid.InnerDat.Add(item.Value / vec_len);
                }
                break;
            }
            centroid.Sort();
            return(centroid);
        }
Esempio n. 4
0
 public SparseVector <double> ComputeCentroid(IUnlabeledExampleCollection <SparseVector <double> > dataset, CentroidType type)
 {
     return(ModelUtils.ComputeCentroid(mItems, dataset, type)); // throws ArgumentValueException
 }
Esempio n. 5
0
 public SparseVector<double> ComputeCentroid(IUnlabeledExampleCollection<SparseVector<double>> dataset, CentroidType type)
 {
     return ModelUtils.ComputeCentroid(mItems, dataset, type); // throws ArgumentNullException, ArgumentValueException, InvalidOperationException
 }
Esempio n. 6
0
        // *** Computation of centroids ***

        public static SparseVector <double> ComputeCentroid(IEnumerable <SparseVector <double> > vecList, CentroidType type)
        {
            Utils.ThrowException(vecList == null ? new ArgumentNullException("vecList") : null);
            Dictionary <int, double> tmp = new Dictionary <int, double>();
            Dictionary <int, int>    c   = new Dictionary <int, int>();
            int vecCount = 0;

            foreach (SparseVector <double> vec in vecList)
            {
                foreach (IdxDat <double> item in vec)
                {
                    if (tmp.ContainsKey(item.Idx))
                    {
                        tmp[item.Idx] += item.Dat;
                        c[item.Idx]   += 1;
                    }
                    else
                    {
                        tmp.Add(item.Idx, item.Dat);
                        c.Add(item.Idx, 1);
                    }
                }
                vecCount++;
            }
            if (vecCount == 0)
            {
                return(new SparseVector <double>());
            }
            SparseVector <double> centroid = new SparseVector <double>();

            switch (type)
            {
            case CentroidType.Sum:
                foreach (KeyValuePair <int, double> item in tmp)
                {
                    centroid.InnerIdx.Add(item.Key);
                    centroid.InnerDat.Add(item.Value);
                }
                break;

            case CentroidType.Avg:
                foreach (KeyValuePair <int, double> item in tmp)
                {
                    centroid.InnerIdx.Add(item.Key);
                    centroid.InnerDat.Add(item.Value / (double)vecCount);
                }
                break;

            case CentroidType.AvgNonZero:
                foreach (KeyValuePair <int, double> item in tmp)
                {
                    centroid.InnerIdx.Add(item.Key);
                    centroid.InnerDat.Add(item.Value / (double)c[item.Key]);
                }
                break;

            case CentroidType.NrmL2:
                double vecLen = 0;
                foreach (KeyValuePair <int, double> item in tmp)
                {
                    vecLen += item.Value * item.Value;
                }
                vecLen = Math.Sqrt(vecLen);
                if (vecLen > 0)
                {
                    foreach (KeyValuePair <int, double> item in tmp)
                    {
                        centroid.InnerIdx.Add(item.Key);
                        centroid.InnerDat.Add(item.Value / vecLen);
                    }
                }
                break;
            }
            centroid.Sort();
            return(centroid);
        }
Esempio n. 7
0
        public static SparseVector <double> ComputeCentroid(IEnumerable <SparseVector <double> .ReadOnly> vec_list, CentroidType type)
        {
            Utils.ThrowException(vec_list == null ? new ArgumentNullException("vec_list") : null);
            Dictionary <int, double> tmp = new Dictionary <int, double>();
            int vec_count = 0;

            foreach (SparseVector <double> .ReadOnly vec in vec_list)
            {
                foreach (IdxDat <double> item in vec)
                {
                    if (tmp.ContainsKey(item.Idx))
                    {
                        tmp[item.Idx] += item.Dat;
                    }
                    else
                    {
                        tmp.Add(item.Idx, item.Dat);
                    }
                }
                vec_count++;
            }
            Utils.ThrowException(vec_count == 0 ? new ArgumentValueException("vec_list") : null);
            SparseVector <double> centroid = new SparseVector <double>();

            switch (type)
            {
            case CentroidType.Sum:
                foreach (KeyValuePair <int, double> item in tmp)
                {
                    centroid.InnerIdx.Add(item.Key);
                    centroid.InnerDat.Add(item.Value);
                }
                break;

            case CentroidType.Avg:
                foreach (KeyValuePair <int, double> item in tmp)
                {
                    centroid.InnerIdx.Add(item.Key);
                    centroid.InnerDat.Add(item.Value / (double)vec_count);
                }
                break;

            case CentroidType.NrmL2:
                double vec_len = 0;
                foreach (KeyValuePair <int, double> item in tmp)
                {
                    vec_len += item.Value * item.Value;
                }
                Utils.ThrowException(vec_len == 0 ? new InvalidOperationException() : null);
                vec_len = Math.Sqrt(vec_len);
                foreach (KeyValuePair <int, double> item in tmp)
                {
                    centroid.InnerIdx.Add(item.Key);
                    centroid.InnerDat.Add(item.Value / vec_len);
                }
                break;
            }
            centroid.Sort();
            return(centroid);
        }