Пример #1
0
        //inference new model ~ getting data from a specified dataset
        public Model inference(LDADataset newData)
        {
            Console.WriteLine("init new model");
            Model newModel = new Model();

            newModel.initNewModel(option, newData, trnModel);
            this.newModel = newModel;

            Console.WriteLine("Sampling " + niters + " iteration for inference!");
            for (newModel.liter = 1; newModel.liter <= niters; newModel.liter++)
            {
                //System.out.println("Iteration " + newModel.liter + " ...");

                // for all newz_i
                for (int m = 0; m < newModel.M; ++m)
                {
                    for (int n = 0; n < newModel.data.Docs[m].Length; n++)
                    {
                        // (newz_i = newz[m][n]
                        // sample from p(z_i|z_-1,w)
                        int topic = infSampling(m, n);
                        newModel.z[m].Insert(n, topic);
                    }
                } //end foreach new doc
            }     // end iterations

            Console.WriteLine("Gibbs sampling for inference completed!");

            computeNewTheta();
            computeNewPhi();
            newModel.liter--;
            return(this.newModel);
        }
Пример #2
0
        protected bool readTAssignFile(string tassignFile)
        {
            try
            {
                int i, j;
                var reader = new StreamReader(tassignFile);

                string line;
                z      = new List <int> [M];
                data   = new LDADataset(M);
                data.V = V;
                for (i = 0; i < M; i++)
                {
                    line = reader.ReadLine();
                    var parts = line.Split();

                    int length = parts.Length;

                    var words  = new List <int>();
                    var topics = new List <int>();

                    for (j = 0; j < length; j++)
                    {
                        var token = parts[j];

                        var tokenParts = token.Split(':');
                        if (tokenParts.Count() != 2)
                        {
                            Console.WriteLine("Invalid word-topic assignment line\n");
                            return(false);
                        }

                        words.Add(Convert.ToInt32(tokenParts[0]));
                        topics.Add(Convert.ToInt32(tokenParts[0]));
                    }//end for each topic assignment

                    //allocate and add new document to the corpus
                    Document doc = new Document(words);
                    data.SetDoc(doc, i);

                    //assign values for z
                    z[i] = new List <int>();
                    for (j = 0; j < topics.Count(); j++)
                    {
                        z[i].Add(topics[j]);
                    }
                }//end for each doc

                reader.Close();
            }
            catch (Exception e)
            {
                Console.WriteLine("Error while loading model: " + e.Message);
                return(false);
            }
            return(true);
        }
Пример #3
0
        public Model inference(string[] strs)
        {
            //System.out.println("inference");
            Model newModel = new Model();

            //System.out.println("read dataset");
            LDADataset dataset = LDADataset.ReadDataset(strs, globalDict);

            return(inference(dataset));
        }
Пример #4
0
        public static LDADataset ReadDataset(string[] strings, WordDictionary dictionary)
        {
            var dataSet = new LDADataset(strings.Length, dictionary);

            for (int i = 0; i < strings.Length; i++)
            {
                dataSet.SetDoc(strings[i], i);
            }

            return(dataSet);
        }
Пример #5
0
        public static LDADataset ReadDataset(string[] strings)
        {
            var dataSet = new LDADataset(strings.Length);

            for (int i = 0; i < strings.Length; i++)
            {
                dataSet.SetDoc(strings[i], i);
            }

            return(dataSet);
        }
Пример #6
0
        /**
         * Init parameters for inference
         * reading new dataset from file
         */
        public bool initNewModel(LDACommandLineOptions option, Model trnModel)
        {
            if (!init(option))
            {
                return(false);
            }

            LDADataset dataset = LDADataset.ReadDataset(dir + "\\" + dfile, trnModel.data.LocalDictionary);

            if (dataset == null)
            {
                Console.WriteLine("Fail to read dataset!\n");
                return(false);
            }

            return(initNewModel(option, dataset, trnModel));
        }
Пример #7
0
 public static LDADataset ReadDataset(string filename)
 {
     try
     {
         using (StreamReader reader = new StreamReader(filename))
         {
             var line    = reader.ReadLine();
             var m       = Convert.ToInt32(line);
             var dataSet = new LDADataset(m);
             for (int i = 0; i < m; i++)
             {
                 line = reader.ReadLine();
                 dataSet.SetDoc(line, i);
             }
             return(dataSet);
         }
     }
     catch (Exception e)
     {
         Console.WriteLine("Failed reading dataSet :" + e.Message);
         return(null);
     }
 }
Пример #8
0
        /**
         * Init parameters for inference
         * @param newData DataSet for which we do inference
         */
        public bool initNewModel(LDACommandLineOptions option, LDADataset newData, Model trnModel)
        {
            if (!init(option))
            {
                return(false);
            }

            int m, n;

            var rnd = new Random();

            K     = trnModel.K;
            alpha = trnModel.alpha;
            beta  = trnModel.beta;

            p = new double[K];
            Console.WriteLine("K:" + K);

            data = newData;

            //+ allocate memory and assign values for variables
            M        = data.M;
            V        = data.V;
            dir      = option.dir;
            savestep = option.savestep;
            Console.WriteLine("M:" + M);
            Console.WriteLine("V:" + V);

            // K: from command line or default value
            // alpha, beta: from command line or default values
            // niters, savestep: from command line or default values

            nw = ArrayInitializers.ZerosInt(V, K);
            nd = ArrayInitializers.ZerosInt(M, K);

            nwsum = ArrayInitializers.ZerosInt(K);
            ndsum = ArrayInitializers.ZerosInt(M);

            z = new List <int> [M];
            for (m = 0; m < data.M; m++)
            {
                int N = data.Docs[m].Length;
                z[m] = new List <int>();

                //initilize for z
                for (n = 0; n < N; n++)
                {
                    int topic = (int)Math.Floor(rnd.NextDouble() * K);
                    z[m].Add(topic);

                    // number of instances of word assigned to topic j
                    nw[data.Docs[m].Words[n]][topic] += 1;
                    // number of words in document i assigned to topic j
                    nd[m][topic] += 1;
                    // total number of words assigned to topic j
                    nwsum[topic] += 1;
                }
                // total number of words in document i
                ndsum[m] = N;
            }

            theta = ArrayInitializers.Empty(M, K);
            phi   = ArrayInitializers.Empty(K, V);

            return(true);
        }
Пример #9
0
        /**
         * Init parameters for estimation
         */
        public bool initNewModel(LDACommandLineOptions option)
        {
            //if (!init(option))
            //return false;
            var rnd = new Random();
            int m, n, w, k;

            p = new double[K];

            data = LDADataset.ReadDataset(dir + "\\" + dfile);
            if (data == null)
            {
                Console.WriteLine("Fail to read training data!\n");
                return(false);
            }

            //+ allocate memory and assign values for variables
            M        = data.M;
            V        = data.V;
            dir      = option.dir;
            savestep = option.savestep;

            // K: from command line or default value
            // alpha, beta: from command line or default values
            // niters, savestep: from command line or default values

            nw = new int[V][];
            for (w = 0; w < V; w++)
            {
                nw[w] = new int[K];
                for (k = 0; k < K; k++)
                {
                    nw[w][k] = 0;
                }
            }

            nd = new int[M][];
            for (m = 0; m < M; m++)
            {
                nd[m] = new int[K];
                for (k = 0; k < K; k++)
                {
                    nd[m][k] = 0;
                }
            }

            nwsum = new int[K];
            for (k = 0; k < K; k++)
            {
                nwsum[k] = 0;
            }

            ndsum = new int[M];
            for (m = 0; m < M; m++)
            {
                ndsum[m] = 0;
            }

            z = new List <int> [M];
            for (m = 0; m < data.M; m++)
            {
                int N = data.Docs[m].Length;
                z[m] = new List <int>();

                //initilize for z
                for (n = 0; n < N; n++)
                {
                    int topic = (int)Math.Floor(rnd.NextDouble() * K);
                    z[m].Add(topic);

                    // number of instances of word assigned to topic j
                    nw[data.Docs[m].Words[n]][topic] += 1;
                    // number of words in document i assigned to topic j
                    nd[m][topic] += 1;
                    // total number of words assigned to topic j
                    nwsum[topic] += 1;
                }
                // total number of words in document i
                ndsum[m] = N;
            }

            theta = new double[M][];
            for (m = 0; m < M; m++)
            {
                theta[m] = new double[K];
            }
            phi = new double[K][];
            for (k = 0; k < K; k++)
            {
                phi[k] = new double[V];
            }

            return(true);
        }