//inference new model ~ getting data from a specified dataset public Model inference(LDADataset newData) { Console.WriteLine("init new model"); Model newModel = new Model(); newModel.initNewModel(option, newData, trnModel); this.newModel = newModel; Console.WriteLine("Sampling " + niters + " iteration for inference!"); for (newModel.liter = 1; newModel.liter <= niters; newModel.liter++) { //System.out.println("Iteration " + newModel.liter + " ..."); // for all newz_i for (int m = 0; m < newModel.M; ++m) { for (int n = 0; n < newModel.data.Docs[m].Length; n++) { // (newz_i = newz[m][n] // sample from p(z_i|z_-1,w) int topic = infSampling(m, n); newModel.z[m].Insert(n, topic); } } //end foreach new doc } // end iterations Console.WriteLine("Gibbs sampling for inference completed!"); computeNewTheta(); computeNewPhi(); newModel.liter--; return(this.newModel); }
protected bool readTAssignFile(string tassignFile) { try { int i, j; var reader = new StreamReader(tassignFile); string line; z = new List <int> [M]; data = new LDADataset(M); data.V = V; for (i = 0; i < M; i++) { line = reader.ReadLine(); var parts = line.Split(); int length = parts.Length; var words = new List <int>(); var topics = new List <int>(); for (j = 0; j < length; j++) { var token = parts[j]; var tokenParts = token.Split(':'); if (tokenParts.Count() != 2) { Console.WriteLine("Invalid word-topic assignment line\n"); return(false); } words.Add(Convert.ToInt32(tokenParts[0])); topics.Add(Convert.ToInt32(tokenParts[0])); }//end for each topic assignment //allocate and add new document to the corpus Document doc = new Document(words); data.SetDoc(doc, i); //assign values for z z[i] = new List <int>(); for (j = 0; j < topics.Count(); j++) { z[i].Add(topics[j]); } }//end for each doc reader.Close(); } catch (Exception e) { Console.WriteLine("Error while loading model: " + e.Message); return(false); } return(true); }
public Model inference(string[] strs) { //System.out.println("inference"); Model newModel = new Model(); //System.out.println("read dataset"); LDADataset dataset = LDADataset.ReadDataset(strs, globalDict); return(inference(dataset)); }
public static LDADataset ReadDataset(string[] strings, WordDictionary dictionary) { var dataSet = new LDADataset(strings.Length, dictionary); for (int i = 0; i < strings.Length; i++) { dataSet.SetDoc(strings[i], i); } return(dataSet); }
public static LDADataset ReadDataset(string[] strings) { var dataSet = new LDADataset(strings.Length); for (int i = 0; i < strings.Length; i++) { dataSet.SetDoc(strings[i], i); } return(dataSet); }
/** * Init parameters for inference * reading new dataset from file */ public bool initNewModel(LDACommandLineOptions option, Model trnModel) { if (!init(option)) { return(false); } LDADataset dataset = LDADataset.ReadDataset(dir + "\\" + dfile, trnModel.data.LocalDictionary); if (dataset == null) { Console.WriteLine("Fail to read dataset!\n"); return(false); } return(initNewModel(option, dataset, trnModel)); }
public static LDADataset ReadDataset(string filename) { try { using (StreamReader reader = new StreamReader(filename)) { var line = reader.ReadLine(); var m = Convert.ToInt32(line); var dataSet = new LDADataset(m); for (int i = 0; i < m; i++) { line = reader.ReadLine(); dataSet.SetDoc(line, i); } return(dataSet); } } catch (Exception e) { Console.WriteLine("Failed reading dataSet :" + e.Message); return(null); } }
/** * Init parameters for inference * @param newData DataSet for which we do inference */ public bool initNewModel(LDACommandLineOptions option, LDADataset newData, Model trnModel) { if (!init(option)) { return(false); } int m, n; var rnd = new Random(); K = trnModel.K; alpha = trnModel.alpha; beta = trnModel.beta; p = new double[K]; Console.WriteLine("K:" + K); data = newData; //+ allocate memory and assign values for variables M = data.M; V = data.V; dir = option.dir; savestep = option.savestep; Console.WriteLine("M:" + M); Console.WriteLine("V:" + V); // K: from command line or default value // alpha, beta: from command line or default values // niters, savestep: from command line or default values nw = ArrayInitializers.ZerosInt(V, K); nd = ArrayInitializers.ZerosInt(M, K); nwsum = ArrayInitializers.ZerosInt(K); ndsum = ArrayInitializers.ZerosInt(M); z = new List <int> [M]; for (m = 0; m < data.M; m++) { int N = data.Docs[m].Length; z[m] = new List <int>(); //initilize for z for (n = 0; n < N; n++) { int topic = (int)Math.Floor(rnd.NextDouble() * K); z[m].Add(topic); // number of instances of word assigned to topic j nw[data.Docs[m].Words[n]][topic] += 1; // number of words in document i assigned to topic j nd[m][topic] += 1; // total number of words assigned to topic j nwsum[topic] += 1; } // total number of words in document i ndsum[m] = N; } theta = ArrayInitializers.Empty(M, K); phi = ArrayInitializers.Empty(K, V); return(true); }
/** * Init parameters for estimation */ public bool initNewModel(LDACommandLineOptions option) { //if (!init(option)) //return false; var rnd = new Random(); int m, n, w, k; p = new double[K]; data = LDADataset.ReadDataset(dir + "\\" + dfile); if (data == null) { Console.WriteLine("Fail to read training data!\n"); return(false); } //+ allocate memory and assign values for variables M = data.M; V = data.V; dir = option.dir; savestep = option.savestep; // K: from command line or default value // alpha, beta: from command line or default values // niters, savestep: from command line or default values nw = new int[V][]; for (w = 0; w < V; w++) { nw[w] = new int[K]; for (k = 0; k < K; k++) { nw[w][k] = 0; } } nd = new int[M][]; for (m = 0; m < M; m++) { nd[m] = new int[K]; for (k = 0; k < K; k++) { nd[m][k] = 0; } } nwsum = new int[K]; for (k = 0; k < K; k++) { nwsum[k] = 0; } ndsum = new int[M]; for (m = 0; m < M; m++) { ndsum[m] = 0; } z = new List <int> [M]; for (m = 0; m < data.M; m++) { int N = data.Docs[m].Length; z[m] = new List <int>(); //initilize for z for (n = 0; n < N; n++) { int topic = (int)Math.Floor(rnd.NextDouble() * K); z[m].Add(topic); // number of instances of word assigned to topic j nw[data.Docs[m].Words[n]][topic] += 1; // number of words in document i assigned to topic j nd[m][topic] += 1; // total number of words assigned to topic j nwsum[topic] += 1; } // total number of words in document i ndsum[m] = N; } theta = new double[M][]; for (m = 0; m < M; m++) { theta[m] = new double[K]; } phi = new double[K][]; for (k = 0; k < K; k++) { phi[k] = new double[V]; } return(true); }