public int optimize(double[] x, double C, bool orthant) { const long msize = 5; var size = x.LongLength - 1; if (w == null || w.LongLength == 0) { iflag_ = 0; w = new FixedBigArray <double>(size * (2 * msize + 1) + 2 * msize, 1); diag = new double[size + 1]; if (orthant == true) { xi = new double[size + 1]; v = new double[size + 1]; } } if (orthant == true) { pseudo_gradient(x, C); } else { v = expected; } lbfgs_optimize(msize, x, orthant, C); if (iflag_ < 0) { Console.WriteLine("routine stops with unexpected error"); return(-1); } return(iflag_); }
//Generate feature string and its id list public void GenerateLexicalIdList(out IList <string> keyList, out IList <int> valList) { var fixArrayKey = new FixedBigArray <string>(Size, 0); keyList = fixArrayKey; var fixArrayValue = new FixedBigArray <int>(Size, 0); valList = fixArrayValue; Parallel.For(0, arrayFeatureFreqSize, parallelOption, i => { fixArrayKey[i] = arrayFeatureFreq[i].strFeature; fixArrayValue[i] = (int)(arrayFeatureFreq[i].value); }); }
private double ddot_(long size, FixedBigArray <double> dx, long dx_idx, FixedBigArray <double> dy, long dy_idx) { double ret = 0.0f; Parallel.For <double>(0, size, parallelOption, () => 0, (i, loop, subtotal) => { subtotal += dx[i + dx_idx] * dy[i + dy_idx]; return(subtotal); }, (subtotal) => // lock free accumulator { double initialValue; double newValue; do { initialValue = ret; // read current value newValue = initialValue + subtotal; //calculate new value }while (initialValue != Interlocked.CompareExchange(ref ret, newValue, initialValue)); }); return(ret); }
//Generate feature string and its id list public void GenerateLexicalIdList(out IList<string> keyList, out IList<int> valList) { var fixArrayKey = new FixedBigArray<string>(Size, 0); keyList = fixArrayKey; var fixArrayValue = new FixedBigArray<int>(Size, 0); valList = fixArrayValue; #if NO_SUPPORT_PARALLEL_LIB for (long i = 0;i < arrayFeatureFreqSize;i++) #else Parallel.For(0, arrayFeatureFreqSize, parallelOption, i => #endif { fixArrayKey[i] = arrayFeatureFreq[i].strFeature; fixArrayValue[i] = (int)(arrayFeatureFreq[i].value); } #if NO_SUPPORT_PARALLEL_LIB #else ); #endif }
static void Main(string[] args) { FixedBigArray <int> ba = new FixedBigArray <int>(1024, 0); ba[1] = 1; }
public void Process(string strModelFileName, string strShrinkedModelFileName, int thread_num_ = 1) { var sr = new StreamReader(strModelFileName); string strLine; //读入版本号 strLine = sr.ReadLine(); var version = uint.Parse(strLine.Split(':')[1].Trim()); if (version == CRFSharp.Utils.MODEL_TYPE_SHRINKED) { Console.WriteLine("The input model has been shrinked"); return; } //读入cost_factor strLine = sr.ReadLine(); var cost_factor_ = double.Parse(strLine.Split(':')[1].Trim()); //读入maxid strLine = sr.ReadLine(); var maxid_ = long.Parse(strLine.Split(':')[1].Trim()); //读入xsize strLine = sr.ReadLine(); var xsize_ = uint.Parse(strLine.Split(':')[1].Trim()); //读入空行 strLine = sr.ReadLine(); //读入待标注的标签 var y_ = new List<string>(); while (true) { strLine = sr.ReadLine(); if (strLine.Length == 0) { break; } y_.Add(strLine); } //读入unigram和bigram模板 var unigram_templs_ = new List<string>(); var bigram_templs_ = new List<string>(); while (sr.EndOfStream == false) { strLine = sr.ReadLine(); if (strLine.Length == 0) { break; } if (strLine[0] == 'U') { unigram_templs_.Add(strLine); } if (strLine[0] == 'B') { bigram_templs_.Add(strLine); } } sr.Close(); //Load all features alpha data var filename_alpha = strModelFileName + ".alpha"; var filename_shrink_alpha = strShrinkedModelFileName + ".alpha"; var sr_alpha = new StreamReader(filename_alpha); var br_alpha = new BinaryReader(sr_alpha.BaseStream); var sw_alpha = new StreamWriter(filename_shrink_alpha); var bw_alpha = new BinaryWriter(sw_alpha.BaseStream); long shrinked_alpha_size = 0; //Only reserve non-zero feature weights and save them into file as two-tuples format var alpha_ = new FixedBigArray<double>(maxid_ + 1, 0); for (long i = 0; i < maxid_; i++) { alpha_[i] = br_alpha.ReadSingle(); if (alpha_[i] != 0) { bw_alpha.Write(i); bw_alpha.Write((float)alpha_[i]); shrinked_alpha_size++; } } br_alpha.Close(); bw_alpha.Close(); //Only reserved lexical feature whose weights is non-zero var varValue = new VarBigArray<int>(1024); var varFeature = new VarBigArray<string>(1024); var feaCnt = 0; var filename_feature = strModelFileName + ".feature.raw_text"; var sr_fea = new StreamReader(filename_feature); while (sr_fea.EndOfStream == false) { strLine = sr_fea.ReadLine(); var items = strLine.Split('\t'); var strFeature = items[0]; var key = int.Parse(items[1]); var size = (strFeature[0] == 'U' ? y_.Count : y_.Count * y_.Count); var hasAlpha = false; for (var i = key; i < key + size; i++) { if (alpha_[i] != 0) { hasAlpha = true; break; } } if (hasAlpha == true) { varFeature[feaCnt] = strFeature; varValue[feaCnt] = key; feaCnt++; } } sr_fea.Close(); Console.WriteLine("Shrink feature size from {0} to {1}", maxid_, shrinked_alpha_size); maxid_ = shrinked_alpha_size; //Build new lexical feature var val = new FixedBigArray<int>(feaCnt, 0); var fea = new FixedBigArray<string>(feaCnt, 0); for (var i = 0; i < feaCnt; i++) { fea[i] = varFeature[i]; val[i] = varValue[i]; } varFeature = null; varValue = null; var da = new DoubleArrayTrieBuilder(thread_num_); if (da.build(fea, val, 0.95) == false) { Console.WriteLine("Build lexical dictionary failed."); return; } da.save(strShrinkedModelFileName + ".feature"); var tofs = new StreamWriter(strShrinkedModelFileName); // header tofs.WriteLine("version: " + CRFSharp.Utils.MODEL_TYPE_SHRINKED); tofs.WriteLine("cost-factor: " + cost_factor_); tofs.WriteLine("maxid: " + maxid_); tofs.WriteLine("xsize: " + xsize_); tofs.WriteLine(); // y for (var i = 0; i < y_.Count; ++i) { tofs.WriteLine(y_[i]); } tofs.WriteLine(); // template for (var i = 0; i < unigram_templs_.Count; ++i) { tofs.WriteLine(unigram_templs_[i]); } for (var i = 0; i < bigram_templs_.Count; ++i) { tofs.WriteLine(bigram_templs_[i]); } tofs.Close(); }
public void mcsrch(double[] x, double f, double[] g, FixedBigArray <double> s, long s_idx, ref double stp, ref long info, ref long nfev, double[] wa) { var size = x.LongLength - 1; /* Parameter adjustments */ if (info == -1) { info = 0; nfev++; var dg = ddot_(size, g, 1, s, s_idx + 1); var ftest1 = finit + stp * dgtest; if (brackt && ((stp <= stmin || stp >= stmax) || infoc == 0)) { info = 6; Console.WriteLine("MCSRCH warning: Rounding errors prevent further progress.There may not be a step which satisfies the sufficient decrease and curvature conditions. Tolerances may be too small."); Console.WriteLine("bracket: {0}, stp:{1}, stmin:{2}, stmax:{3}, infoc:{4}", brackt, stp, stmin, stmax, infoc); } if (stp == lb3_1_stpmax && f <= ftest1 && dg <= dgtest) { info = 5; Console.WriteLine("MCSRCH warning: The step is too large."); } if (stp == lb3_1_stpmin && (f > ftest1 || dg >= dgtest)) { info = 4; Console.WriteLine("MCSRCH warning: The step is too small."); Console.WriteLine("stp:{0}, lb3_1_stpmin:{1}, f:{2}, ftest1:{3}, dg:{4}, dgtest:{5}", stp, lb3_1_stpmin, f, ftest1, dg, dgtest); } if (nfev >= maxfev) { info = 3; Console.WriteLine("MCSRCH warning: More than {0} function evaluations were required at the present iteration.", maxfev); } if (brackt && stmax - stmin <= xtol * stmax) { info = 2; Console.WriteLine("MCSRCH warning: Relative width of the interval of uncertainty is at most xtol."); } if (f <= ftest1 && Math.Abs(dg) <= lb3_1_gtol * (-dginit)) { info = 1; } if (info != 0) { return; } if (stage1 && f <= ftest1 && dg >= Math.Min(ftol, lb3_1_gtol) * dginit) { stage1 = false; } if (stage1 && f <= fx && f > ftest1) { var fm = f - stp * dgtest; var fxm = fx - stx * dgtest; var fym = fy - sty * dgtest; var dgm = dg - dgtest; var dgxm = dgx - dgtest; var dgym = dgy - dgtest; mcstep(ref stx, ref fxm, ref dgxm, ref sty, ref fym, ref dgym, ref stp, fm, dgm, ref brackt, stmin, stmax, ref infoc); fx = fxm + stx * dgtest; fy = fym + sty * dgtest; dgx = dgxm + dgtest; dgy = dgym + dgtest; } else { mcstep(ref stx, ref fx, ref dgx, ref sty, ref fy, ref dgy, ref stp, f, dg, ref brackt, stmin, stmax, ref infoc); } if (brackt) { var d1 = 0.0; d1 = sty - stx; if (Math.Abs(d1) >= p66 * width1) { stp = stx + p5 * (sty - stx); } width1 = width; d1 = sty - stx; width = Math.Abs(d1); } } else { infoc = 1; if (size <= 0 || stp <= 0.0) { return; } dginit = ddot_(size, g, 1, s, s_idx + 1); if (dginit >= 0.0) { return; } brackt = false; stage1 = true; nfev = 0; finit = f; dgtest = ftol * dginit; width = lb3_1_stpmax - lb3_1_stpmin; width1 = width / p5; Parallel.For(1, size + 1, parallelOption, i => { wa[i] = x[i]; } ); stx = 0.0; fx = finit; dgx = dginit; sty = 0.0; fy = finit; dgy = dginit; } if (brackt) { stmin = Math.Min(stx, sty); stmax = Math.Max(stx, sty); } else { stmin = stx; stmax = stp + xtrapf * (stp - stx); } stp = Math.Max(stp, lb3_1_stpmin); stp = Math.Min(stp, lb3_1_stpmax); if ((brackt && ((stp <= stmin || stp >= stmax) || nfev >= maxfev - 1 || infoc == 0)) || (brackt && (stmax - stmin <= xtol * stmax))) { stp = stx; } var stp_t = stp; Parallel.For(1, size + 1, parallelOption, i => { x[i] = (wa[i] + stp_t * s[s_idx + i]); }); info = -1; }
public void Process(string strModelFileName, string strShrinkedModelFileName, int thread_num_ = 1) { var sr = new StreamReader(strModelFileName); string strLine; //读入版本号 strLine = sr.ReadLine(); var version = uint.Parse(strLine.Split(':')[1].Trim()); if (version == CRFSharp.Utils.MODEL_TYPE_SHRINKED) { Console.WriteLine("The input model has been shrinked"); return; } //读入cost_factor strLine = sr.ReadLine(); var cost_factor_ = double.Parse(strLine.Split(':')[1].Trim()); //读入maxid strLine = sr.ReadLine(); var maxid_ = long.Parse(strLine.Split(':')[1].Trim()); //读入xsize strLine = sr.ReadLine(); var xsize_ = uint.Parse(strLine.Split(':')[1].Trim()); //读入空行 strLine = sr.ReadLine(); //读入待标注的标签 var y_ = new List <string>(); while (true) { strLine = sr.ReadLine(); if (strLine.Length == 0) { break; } y_.Add(strLine); } //读入unigram和bigram模板 var unigram_templs_ = new List <string>(); var bigram_templs_ = new List <string>(); while (sr.EndOfStream == false) { strLine = sr.ReadLine(); if (strLine.Length == 0) { break; } if (strLine[0] == 'U') { unigram_templs_.Add(strLine); } if (strLine[0] == 'B') { bigram_templs_.Add(strLine); } } sr.Close(); //Load all features alpha data var filename_alpha = strModelFileName + ".alpha"; var filename_shrink_alpha = strShrinkedModelFileName + ".alpha"; var sr_alpha = new StreamReader(filename_alpha); var br_alpha = new BinaryReader(sr_alpha.BaseStream); var sw_alpha = new StreamWriter(filename_shrink_alpha); var bw_alpha = new BinaryWriter(sw_alpha.BaseStream); long shrinked_alpha_size = 0; //Only reserve non-zero feature weights and save them into file as two-tuples format var alpha_ = new FixedBigArray <double>(maxid_ + 1, 0); for (long i = 0; i < maxid_; i++) { alpha_[i] = br_alpha.ReadSingle(); if (alpha_[i] != 0) { bw_alpha.Write(i); bw_alpha.Write((float)alpha_[i]); shrinked_alpha_size++; } } br_alpha.Close(); bw_alpha.Close(); //Only reserved lexical feature whose weights is non-zero var varValue = new VarBigArray <int>(1024); var varFeature = new VarBigArray <string>(1024); var feaCnt = 0; var filename_feature = strModelFileName + ".feature.raw_text"; var sr_fea = new StreamReader(filename_feature); while (sr_fea.EndOfStream == false) { strLine = sr_fea.ReadLine(); var items = strLine.Split('\t'); var strFeature = items[0]; var key = int.Parse(items[1]); var size = (strFeature[0] == 'U' ? y_.Count : y_.Count * y_.Count); var hasAlpha = false; for (var i = key; i < key + size; i++) { if (alpha_[i] != 0) { hasAlpha = true; break; } } if (hasAlpha == true) { varFeature[feaCnt] = strFeature; varValue[feaCnt] = key; feaCnt++; } } sr_fea.Close(); Console.WriteLine("Shrink feature size from {0} to {1}", maxid_, shrinked_alpha_size); maxid_ = shrinked_alpha_size; //Build new lexical feature var val = new FixedBigArray <int>(feaCnt, 0); var fea = new FixedBigArray <string>(feaCnt, 0); for (var i = 0; i < feaCnt; i++) { fea[i] = varFeature[i]; val[i] = varValue[i]; } varFeature = null; varValue = null; var da = new DoubleArrayTrieBuilder(thread_num_); if (da.build(fea, val, 0.95) == false) { Console.WriteLine("Build lexical dictionary failed."); return; } da.save(strShrinkedModelFileName + ".feature"); var tofs = new StreamWriter(strShrinkedModelFileName); // header tofs.WriteLine("version: " + CRFSharp.Utils.MODEL_TYPE_SHRINKED); tofs.WriteLine("cost-factor: " + cost_factor_); tofs.WriteLine("maxid: " + maxid_); tofs.WriteLine("xsize: " + xsize_); tofs.WriteLine(); // y for (var i = 0; i < y_.Count; ++i) { tofs.WriteLine(y_[i]); } tofs.WriteLine(); // template for (var i = 0; i < unigram_templs_.Count; ++i) { tofs.WriteLine(unigram_templs_[i]); } for (var i = 0; i < bigram_templs_.Count; ++i) { tofs.WriteLine(bigram_templs_[i]); } tofs.Close(); }
static void Main(string[] args) { FixedBigArray<int> ba = new FixedBigArray<int>(1024, 0); ba[1] = 1; }