Example #1
0
        public int optimize(double[] x, double C, bool orthant)
        {
            const long msize = 5;
            var        size  = x.LongLength - 1;

            if (w == null || w.LongLength == 0)
            {
                iflag_ = 0;
                w      = new FixedBigArray <double>(size * (2 * msize + 1) + 2 * msize, 1);
                diag   = new double[size + 1];
                if (orthant == true)
                {
                    xi = new double[size + 1];
                    v  = new double[size + 1];
                }
            }

            if (orthant == true)
            {
                pseudo_gradient(x, C);
            }
            else
            {
                v = expected;
            }

            lbfgs_optimize(msize, x, orthant, C);
            if (iflag_ < 0)
            {
                Console.WriteLine("routine stops with unexpected error");
                return(-1);
            }

            return(iflag_);
        }
Example #2
0
        //Generate feature string and its id list
        public void GenerateLexicalIdList(out IList <string> keyList, out IList <int> valList)
        {
            var fixArrayKey = new FixedBigArray <string>(Size, 0);

            keyList = fixArrayKey;

            var fixArrayValue = new FixedBigArray <int>(Size, 0);

            valList = fixArrayValue;
            Parallel.For(0, arrayFeatureFreqSize, parallelOption, i =>
            {
                fixArrayKey[i]   = arrayFeatureFreq[i].strFeature;
                fixArrayValue[i] = (int)(arrayFeatureFreq[i].value);
            });
        }
Example #3
0
        private double ddot_(long size, FixedBigArray <double> dx, long dx_idx, FixedBigArray <double> dy, long dy_idx)
        {
            double ret = 0.0f;

            Parallel.For <double>(0, size, parallelOption, () => 0, (i, loop, subtotal) =>
            {
                subtotal += dx[i + dx_idx] * dy[i + dy_idx];
                return(subtotal);
            },
                                  (subtotal) => // lock free accumulator
            {
                double initialValue;
                double newValue;
                do
                {
                    initialValue = ret;                     // read current value
                    newValue     = initialValue + subtotal; //calculate new value
                }while (initialValue != Interlocked.CompareExchange(ref ret, newValue, initialValue));
            });
            return(ret);
        }
        //Generate feature string and its id list
        public void GenerateLexicalIdList(out IList<string> keyList, out IList<int> valList)
        {
            var fixArrayKey = new FixedBigArray<string>(Size, 0);
            keyList = fixArrayKey;

            var fixArrayValue = new FixedBigArray<int>(Size, 0);
            valList = fixArrayValue;

#if NO_SUPPORT_PARALLEL_LIB
            for (long i = 0;i < arrayFeatureFreqSize;i++)
#else
            Parallel.For(0, arrayFeatureFreqSize, parallelOption, i =>
#endif
            {
                fixArrayKey[i] = arrayFeatureFreq[i].strFeature;
                fixArrayValue[i] = (int)(arrayFeatureFreq[i].value);
            }
#if NO_SUPPORT_PARALLEL_LIB
#else
            );
#endif
        }
Example #5
0
        static void Main(string[] args)
        {
            FixedBigArray <int> ba = new FixedBigArray <int>(1024, 0);

            ba[1] = 1;
        }
Example #6
0
        public void Process(string strModelFileName, string strShrinkedModelFileName, int thread_num_ = 1)
        {
            var sr = new StreamReader(strModelFileName);
            string strLine;

            //读入版本号
            strLine = sr.ReadLine();
            var version = uint.Parse(strLine.Split(':')[1].Trim());
            if (version == CRFSharp.Utils.MODEL_TYPE_SHRINKED)
            {
                Console.WriteLine("The input model has been shrinked");
                return;
            }

            //读入cost_factor
            strLine = sr.ReadLine();
            var cost_factor_ = double.Parse(strLine.Split(':')[1].Trim());

            //读入maxid
            strLine = sr.ReadLine();
            var maxid_ = long.Parse(strLine.Split(':')[1].Trim());

            //读入xsize
            strLine = sr.ReadLine();
            var xsize_ = uint.Parse(strLine.Split(':')[1].Trim());

            //读入空行
            strLine = sr.ReadLine();

            //读入待标注的标签
            var y_ = new List<string>();
            while (true)
            {
                strLine = sr.ReadLine();
                if (strLine.Length == 0)
                {
                    break;
                }
                y_.Add(strLine);
            }

            //读入unigram和bigram模板
            var unigram_templs_ = new List<string>();
            var bigram_templs_ = new List<string>();
            while (sr.EndOfStream == false)
            {
                strLine = sr.ReadLine();
                if (strLine.Length == 0)
                {
                    break;
                }
                if (strLine[0] == 'U')
                {
                    unigram_templs_.Add(strLine);
                }
                if (strLine[0] == 'B')
                {
                    bigram_templs_.Add(strLine);
                }
            }
            sr.Close();


            //Load all features alpha data
            var filename_alpha = strModelFileName + ".alpha";
            var filename_shrink_alpha = strShrinkedModelFileName + ".alpha";
            var sr_alpha = new StreamReader(filename_alpha);
            var br_alpha = new BinaryReader(sr_alpha.BaseStream);

            var sw_alpha = new StreamWriter(filename_shrink_alpha);
            var bw_alpha = new BinaryWriter(sw_alpha.BaseStream);
            long shrinked_alpha_size = 0;

            //Only reserve non-zero feature weights and save them into file as two-tuples format
            var alpha_ = new FixedBigArray<double>(maxid_ + 1, 0);
            for (long i = 0; i < maxid_; i++)
            {
                alpha_[i] = br_alpha.ReadSingle();
                if (alpha_[i] != 0)
                {
                    bw_alpha.Write(i);
                    bw_alpha.Write((float)alpha_[i]);
                    shrinked_alpha_size++;
                }
            }

            br_alpha.Close();
            bw_alpha.Close();

            //Only reserved lexical feature whose weights is non-zero
            var varValue = new VarBigArray<int>(1024);
            var varFeature = new VarBigArray<string>(1024);
            var feaCnt = 0;
            var filename_feature = strModelFileName + ".feature.raw_text";
            var sr_fea = new StreamReader(filename_feature);
            while (sr_fea.EndOfStream == false)
            {
                strLine = sr_fea.ReadLine();
                var items = strLine.Split('\t');
                var strFeature = items[0];
                var key = int.Parse(items[1]);
                var size = (strFeature[0] == 'U' ? y_.Count : y_.Count * y_.Count);
                var hasAlpha = false;
                for (var i = key; i < key + size; i++)
                {
                    if (alpha_[i] != 0)
                    {
                        hasAlpha = true;
                        break;
                    }
                }

                if (hasAlpha == true)
                {
                    varFeature[feaCnt] = strFeature;
                    varValue[feaCnt] = key;
                    feaCnt++;
                }

            }
            sr_fea.Close();

            Console.WriteLine("Shrink feature size from {0} to {1}", maxid_, shrinked_alpha_size);
            maxid_ = shrinked_alpha_size;

            //Build new lexical feature
            var val = new FixedBigArray<int>(feaCnt, 0);
            var fea = new FixedBigArray<string>(feaCnt, 0);
            for (var i = 0; i < feaCnt; i++)
            {
                fea[i] = varFeature[i];
                val[i] = varValue[i];
            }
            varFeature = null;
            varValue = null;
            var da = new DoubleArrayTrieBuilder(thread_num_);
            if (da.build(fea, val, 0.95) == false)
            {
                Console.WriteLine("Build lexical dictionary failed.");
                return;
            }
            da.save(strShrinkedModelFileName + ".feature");

            var tofs = new StreamWriter(strShrinkedModelFileName);

            // header
            tofs.WriteLine("version: " + CRFSharp.Utils.MODEL_TYPE_SHRINKED);
            tofs.WriteLine("cost-factor: " + cost_factor_);
            tofs.WriteLine("maxid: " + maxid_);
            tofs.WriteLine("xsize: " + xsize_);

            tofs.WriteLine();

            // y
            for (var i = 0; i < y_.Count; ++i)
            {
                tofs.WriteLine(y_[i]);
            }
            tofs.WriteLine();

            // template
            for (var i = 0; i < unigram_templs_.Count; ++i)
            {
                tofs.WriteLine(unigram_templs_[i]);
            }
            for (var i = 0; i < bigram_templs_.Count; ++i)
            {
                tofs.WriteLine(bigram_templs_[i]);
            }

            tofs.Close();
        }
Example #7
0
        public void mcsrch(double[] x, double f, double[] g, FixedBigArray <double> s, long s_idx,
                           ref double stp, ref long info, ref long nfev, double[] wa)
        {
            var size = x.LongLength - 1;

            /* Parameter adjustments */
            if (info == -1)
            {
                info = 0;
                nfev++;

                var dg     = ddot_(size, g, 1, s, s_idx + 1);
                var ftest1 = finit + stp * dgtest;

                if (brackt && ((stp <= stmin || stp >= stmax) || infoc == 0))
                {
                    info = 6;
                    Console.WriteLine("MCSRCH warning: Rounding errors prevent further progress.There may not be a step which satisfies the sufficient decrease and curvature conditions. Tolerances may be too small.");
                    Console.WriteLine("bracket: {0}, stp:{1}, stmin:{2}, stmax:{3}, infoc:{4}", brackt, stp, stmin, stmax, infoc);
                }
                if (stp == lb3_1_stpmax && f <= ftest1 && dg <= dgtest)
                {
                    info = 5;
                    Console.WriteLine("MCSRCH warning: The step is too large.");
                }
                if (stp == lb3_1_stpmin && (f > ftest1 || dg >= dgtest))
                {
                    info = 4;
                    Console.WriteLine("MCSRCH warning: The step is too small.");
                    Console.WriteLine("stp:{0}, lb3_1_stpmin:{1}, f:{2}, ftest1:{3}, dg:{4}, dgtest:{5}", stp, lb3_1_stpmin, f, ftest1, dg, dgtest);
                }
                if (nfev >= maxfev)
                {
                    info = 3;
                    Console.WriteLine("MCSRCH warning: More than {0} function evaluations were required at the present iteration.", maxfev);
                }
                if (brackt && stmax - stmin <= xtol * stmax)
                {
                    info = 2;
                    Console.WriteLine("MCSRCH warning: Relative width of the interval of uncertainty is at most xtol.");
                }
                if (f <= ftest1 && Math.Abs(dg) <= lb3_1_gtol * (-dginit))
                {
                    info = 1;
                }

                if (info != 0)
                {
                    return;
                }

                if (stage1 && f <= ftest1 && dg >= Math.Min(ftol, lb3_1_gtol) * dginit)
                {
                    stage1 = false;
                }

                if (stage1 && f <= fx && f > ftest1)
                {
                    var fm   = f - stp * dgtest;
                    var fxm  = fx - stx * dgtest;
                    var fym  = fy - sty * dgtest;
                    var dgm  = dg - dgtest;
                    var dgxm = dgx - dgtest;
                    var dgym = dgy - dgtest;
                    mcstep(ref stx, ref fxm, ref dgxm, ref sty, ref fym, ref dgym, ref stp, fm, dgm, ref brackt,
                           stmin, stmax, ref infoc);
                    fx  = fxm + stx * dgtest;
                    fy  = fym + sty * dgtest;
                    dgx = dgxm + dgtest;
                    dgy = dgym + dgtest;
                }
                else
                {
                    mcstep(ref stx, ref fx, ref dgx, ref sty, ref fy, ref dgy, ref stp, f, dg, ref brackt,
                           stmin, stmax, ref infoc);
                }

                if (brackt)
                {
                    var d1 = 0.0;
                    d1 = sty - stx;
                    if (Math.Abs(d1) >= p66 * width1)
                    {
                        stp = stx + p5 * (sty - stx);
                    }
                    width1 = width;
                    d1     = sty - stx;
                    width  = Math.Abs(d1);
                }
            }
            else
            {
                infoc = 1;
                if (size <= 0 || stp <= 0.0)
                {
                    return;
                }

                dginit = ddot_(size, g, 1, s, s_idx + 1);
                if (dginit >= 0.0)
                {
                    return;
                }

                brackt = false;
                stage1 = true;
                nfev   = 0;
                finit  = f;
                dgtest = ftol * dginit;
                width  = lb3_1_stpmax - lb3_1_stpmin;
                width1 = width / p5;

                Parallel.For(1, size + 1, parallelOption, i =>
                {
                    wa[i] = x[i];
                }
                             );

                stx = 0.0;
                fx  = finit;
                dgx = dginit;
                sty = 0.0;
                fy  = finit;
                dgy = dginit;
            }

            if (brackt)
            {
                stmin = Math.Min(stx, sty);
                stmax = Math.Max(stx, sty);
            }
            else
            {
                stmin = stx;
                stmax = stp + xtrapf * (stp - stx);
            }

            stp = Math.Max(stp, lb3_1_stpmin);
            stp = Math.Min(stp, lb3_1_stpmax);

            if ((brackt && ((stp <= stmin || stp >= stmax) ||
                            nfev >= maxfev - 1 || infoc == 0)) ||
                (brackt && (stmax - stmin <= xtol * stmax)))
            {
                stp = stx;
            }

            var stp_t = stp;

            Parallel.For(1, size + 1, parallelOption, i =>
            {
                x[i] = (wa[i] + stp_t * s[s_idx + i]);
            });

            info = -1;
        }
Example #8
0
        public void Process(string strModelFileName, string strShrinkedModelFileName, int thread_num_ = 1)
        {
            var    sr = new StreamReader(strModelFileName);
            string strLine;

            //读入版本号
            strLine = sr.ReadLine();
            var version = uint.Parse(strLine.Split(':')[1].Trim());

            if (version == CRFSharp.Utils.MODEL_TYPE_SHRINKED)
            {
                Console.WriteLine("The input model has been shrinked");
                return;
            }

            //读入cost_factor
            strLine = sr.ReadLine();
            var cost_factor_ = double.Parse(strLine.Split(':')[1].Trim());

            //读入maxid
            strLine = sr.ReadLine();
            var maxid_ = long.Parse(strLine.Split(':')[1].Trim());

            //读入xsize
            strLine = sr.ReadLine();
            var xsize_ = uint.Parse(strLine.Split(':')[1].Trim());

            //读入空行
            strLine = sr.ReadLine();

            //读入待标注的标签
            var y_ = new List <string>();

            while (true)
            {
                strLine = sr.ReadLine();
                if (strLine.Length == 0)
                {
                    break;
                }
                y_.Add(strLine);
            }

            //读入unigram和bigram模板
            var unigram_templs_ = new List <string>();
            var bigram_templs_  = new List <string>();

            while (sr.EndOfStream == false)
            {
                strLine = sr.ReadLine();
                if (strLine.Length == 0)
                {
                    break;
                }
                if (strLine[0] == 'U')
                {
                    unigram_templs_.Add(strLine);
                }
                if (strLine[0] == 'B')
                {
                    bigram_templs_.Add(strLine);
                }
            }
            sr.Close();


            //Load all features alpha data
            var filename_alpha        = strModelFileName + ".alpha";
            var filename_shrink_alpha = strShrinkedModelFileName + ".alpha";
            var sr_alpha = new StreamReader(filename_alpha);
            var br_alpha = new BinaryReader(sr_alpha.BaseStream);

            var  sw_alpha            = new StreamWriter(filename_shrink_alpha);
            var  bw_alpha            = new BinaryWriter(sw_alpha.BaseStream);
            long shrinked_alpha_size = 0;

            //Only reserve non-zero feature weights and save them into file as two-tuples format
            var alpha_ = new FixedBigArray <double>(maxid_ + 1, 0);

            for (long i = 0; i < maxid_; i++)
            {
                alpha_[i] = br_alpha.ReadSingle();
                if (alpha_[i] != 0)
                {
                    bw_alpha.Write(i);
                    bw_alpha.Write((float)alpha_[i]);
                    shrinked_alpha_size++;
                }
            }

            br_alpha.Close();
            bw_alpha.Close();

            //Only reserved lexical feature whose weights is non-zero
            var varValue         = new VarBigArray <int>(1024);
            var varFeature       = new VarBigArray <string>(1024);
            var feaCnt           = 0;
            var filename_feature = strModelFileName + ".feature.raw_text";
            var sr_fea           = new StreamReader(filename_feature);

            while (sr_fea.EndOfStream == false)
            {
                strLine = sr_fea.ReadLine();
                var items      = strLine.Split('\t');
                var strFeature = items[0];
                var key        = int.Parse(items[1]);
                var size       = (strFeature[0] == 'U' ? y_.Count : y_.Count * y_.Count);
                var hasAlpha   = false;
                for (var i = key; i < key + size; i++)
                {
                    if (alpha_[i] != 0)
                    {
                        hasAlpha = true;
                        break;
                    }
                }

                if (hasAlpha == true)
                {
                    varFeature[feaCnt] = strFeature;
                    varValue[feaCnt]   = key;
                    feaCnt++;
                }
            }
            sr_fea.Close();

            Console.WriteLine("Shrink feature size from {0} to {1}", maxid_, shrinked_alpha_size);
            maxid_ = shrinked_alpha_size;

            //Build new lexical feature
            var val = new FixedBigArray <int>(feaCnt, 0);
            var fea = new FixedBigArray <string>(feaCnt, 0);

            for (var i = 0; i < feaCnt; i++)
            {
                fea[i] = varFeature[i];
                val[i] = varValue[i];
            }
            varFeature = null;
            varValue   = null;
            var da = new DoubleArrayTrieBuilder(thread_num_);

            if (da.build(fea, val, 0.95) == false)
            {
                Console.WriteLine("Build lexical dictionary failed.");
                return;
            }
            da.save(strShrinkedModelFileName + ".feature");

            var tofs = new StreamWriter(strShrinkedModelFileName);

            // header
            tofs.WriteLine("version: " + CRFSharp.Utils.MODEL_TYPE_SHRINKED);
            tofs.WriteLine("cost-factor: " + cost_factor_);
            tofs.WriteLine("maxid: " + maxid_);
            tofs.WriteLine("xsize: " + xsize_);

            tofs.WriteLine();

            // y
            for (var i = 0; i < y_.Count; ++i)
            {
                tofs.WriteLine(y_[i]);
            }
            tofs.WriteLine();

            // template
            for (var i = 0; i < unigram_templs_.Count; ++i)
            {
                tofs.WriteLine(unigram_templs_[i]);
            }
            for (var i = 0; i < bigram_templs_.Count; ++i)
            {
                tofs.WriteLine(bigram_templs_[i]);
            }

            tofs.Close();
        }
Example #9
0
 static void Main(string[] args)
 {
     FixedBigArray<int> ba = new FixedBigArray<int>(1024, 0);
     ba[1] = 1;
 }