/** * init parameter for continue estimating or for later inference */ public bool initEstimatedModel(LDACommandLineOptions option) { if (!init(option)) { return(false); } int m, n, w; p = new double[K]; // load model, i.e., read z and trndata if (!loadModel()) { Console.WriteLine("Fail to load word-topic assignment file of the model!\n"); return(false); } Console.WriteLine("Model loaded:"); Console.WriteLine("\talpha:" + alpha); Console.WriteLine("\tbeta:" + beta); Console.WriteLine("\tM:" + M); Console.WriteLine("\tV:" + V); nw = ArrayInitializers.ZerosInt(V, K); nd = ArrayInitializers.ZerosInt(M, K); nwsum = ArrayInitializers.ZerosInt(K); ndsum = ArrayInitializers.ZerosInt(M); for (m = 0; m < data.M; m++) { int N = data.Docs[m].Length; // assign values for nw, nd, nwsum, and ndsum for (n = 0; n < N; n++) { w = data.Docs[m].Words[n]; int topic = z[m][n]; // number of instances of word i assigned to topic j nw[w][topic] += 1; // number of words in document i assigned to topic j nd[m][topic] += 1; // total number of words assigned to topic j nwsum[topic] += 1; } // total number of words in document i ndsum[m] = N; } theta = ArrayInitializers.Empty(M, K); phi = ArrayInitializers.Empty(K, V); dir = option.dir; savestep = option.savestep; return(true); }
public bool init(LDACommandLineOptions option) { this.option = option; trnModel = new Model(); trnModel.dfile = option.dfile; trnModel.dir = option.dir; trnModel.K = option.K; trnModel.savestep = option.savestep; trnModel.niters = option.niters; if (option.est) { if (!trnModel.initNewModel(option)) { return(false); } trnModel.data.LocalDictionary.WriteWordMap(option.dir + "\\" + option.wordMapFileName); } else if (option.estc) { if (!trnModel.initEstimatedModel(option)) { return(false); } } return(true); }
//--------------------------------------------------------------- // Init Methods //--------------------------------------------------------------- /** * initialize the model */ protected bool init(LDACommandLineOptions option) { if (option == null) { return(false); } modelName = option.modelName; K = option.K; alpha = option.alpha; if (alpha < 0.0) { alpha = 50.0 / K; } if (option.beta >= 0) { beta = option.beta; } niters = option.niters; dir = option.dir; if (dir.EndsWith("\\")) { dir = dir.Substring(0, dir.Length - 1); } dfile = option.dfile; twords = option.twords; wordMapFile = option.wordMapFileName; return(true); }
//----------------------------------------------------- // Init method //----------------------------------------------------- public bool init(LDACommandLineOptions option) { this.option = option; trnModel = new Model(); if (!trnModel.initEstimatedModel(option)) { return(false); } globalDict = trnModel.data.LocalDictionary; computeTrnTheta(); computeTrnPhi(); return(true); }
/** * Init parameters for inference * reading new dataset from file */ public bool initNewModel(LDACommandLineOptions option, Model trnModel) { if (!init(option)) { return(false); } LDADataset dataset = LDADataset.ReadDataset(dir + "\\" + dfile, trnModel.data.LocalDictionary); if (dataset == null) { Console.WriteLine("Fail to read dataset!\n"); return(false); } return(initNewModel(option, dataset, trnModel)); }
public static void showHelp(LDACommandLineOptions option) { Console.WriteLine("LDA [options ...] [arguments...] \n"); Console.WriteLine(option.GetUsage()); }
static void Main(string[] args) { LDACommandLineOptions option = new LDACommandLineOptions(); var parser = new Parser(); option.beta = 0.1; option.K = 10; option.niters = 1000; option.savestep = 100; option.twords = 20; option.dfile = "trndocs.dat"; option.dir = @"C:\Users\Amine\Documents\visual studio 2013\Projects\GibbsLDA.NET\GibbsLDA.NET\data"; option.est = true; option.modelName = "model-final"; option.wordMapFileName = "wordmap.txt"; var stopWatch = new Stopwatch(); stopWatch.Start(); try { // if (args.length == 0){ // showHelp(parser); // return; // } parser.ParseArguments(args, option); if (option.est || option.estc) { Estimator estimator = new Estimator(); estimator.init(option); estimator.estimate(); } else if (option.inf) { Inferencer inferencer = new Inferencer(); inferencer.init(option); Model newModel = inferencer.inference(); for (int i = 0; i < newModel.phi.Length; ++i) { //phi: K * V Console.WriteLine("-----------------------\ntopic" + i + " : "); for (int j = 0; j < 10; ++j) { Console.WriteLine(inferencer.globalDict.Id2Word[j] + "\t" + newModel.phi[i][j]); } } } } catch (ParserException cle) { Console.WriteLine("Command line error: " + cle.Message); showHelp(option); Console.ReadLine(); return; } catch (Exception e) { Console.WriteLine("Error in main: " + e.Message); Console.WriteLine(e.StackTrace); Console.ReadLine(); return; } stopWatch.Stop(); Console.WriteLine("\n This run took : " + stopWatch.ElapsedMilliseconds / 1000.0 + " seconds"); Console.ReadLine(); }
/** * Init parameters for inference * @param newData DataSet for which we do inference */ public bool initNewModel(LDACommandLineOptions option, LDADataset newData, Model trnModel) { if (!init(option)) { return(false); } int m, n; var rnd = new Random(); K = trnModel.K; alpha = trnModel.alpha; beta = trnModel.beta; p = new double[K]; Console.WriteLine("K:" + K); data = newData; //+ allocate memory and assign values for variables M = data.M; V = data.V; dir = option.dir; savestep = option.savestep; Console.WriteLine("M:" + M); Console.WriteLine("V:" + V); // K: from command line or default value // alpha, beta: from command line or default values // niters, savestep: from command line or default values nw = ArrayInitializers.ZerosInt(V, K); nd = ArrayInitializers.ZerosInt(M, K); nwsum = ArrayInitializers.ZerosInt(K); ndsum = ArrayInitializers.ZerosInt(M); z = new List <int> [M]; for (m = 0; m < data.M; m++) { int N = data.Docs[m].Length; z[m] = new List <int>(); //initilize for z for (n = 0; n < N; n++) { int topic = (int)Math.Floor(rnd.NextDouble() * K); z[m].Add(topic); // number of instances of word assigned to topic j nw[data.Docs[m].Words[n]][topic] += 1; // number of words in document i assigned to topic j nd[m][topic] += 1; // total number of words assigned to topic j nwsum[topic] += 1; } // total number of words in document i ndsum[m] = N; } theta = ArrayInitializers.Empty(M, K); phi = ArrayInitializers.Empty(K, V); return(true); }
/** * Init parameters for estimation */ public bool initNewModel(LDACommandLineOptions option) { //if (!init(option)) //return false; var rnd = new Random(); int m, n, w, k; p = new double[K]; data = LDADataset.ReadDataset(dir + "\\" + dfile); if (data == null) { Console.WriteLine("Fail to read training data!\n"); return(false); } //+ allocate memory and assign values for variables M = data.M; V = data.V; dir = option.dir; savestep = option.savestep; // K: from command line or default value // alpha, beta: from command line or default values // niters, savestep: from command line or default values nw = new int[V][]; for (w = 0; w < V; w++) { nw[w] = new int[K]; for (k = 0; k < K; k++) { nw[w][k] = 0; } } nd = new int[M][]; for (m = 0; m < M; m++) { nd[m] = new int[K]; for (k = 0; k < K; k++) { nd[m][k] = 0; } } nwsum = new int[K]; for (k = 0; k < K; k++) { nwsum[k] = 0; } ndsum = new int[M]; for (m = 0; m < M; m++) { ndsum[m] = 0; } z = new List <int> [M]; for (m = 0; m < data.M; m++) { int N = data.Docs[m].Length; z[m] = new List <int>(); //initilize for z for (n = 0; n < N; n++) { int topic = (int)Math.Floor(rnd.NextDouble() * K); z[m].Add(topic); // number of instances of word assigned to topic j nw[data.Docs[m].Words[n]][topic] += 1; // number of words in document i assigned to topic j nd[m][topic] += 1; // total number of words assigned to topic j nwsum[topic] += 1; } // total number of words in document i ndsum[m] = N; } theta = new double[M][]; for (m = 0; m < M; m++) { theta[m] = new double[K]; } phi = new double[K][]; for (k = 0; k < K; k++) { phi[k] = new double[V]; } return(true); }