Ejemplo n.º 1
0
        /**
         * init parameter for continue estimating or for later inference
         */
        public bool initEstimatedModel(LDACommandLineOptions option)
        {
            if (!init(option))
            {
                return(false);
            }

            int m, n, w;

            p = new double[K];

            // load model, i.e., read z and trndata
            if (!loadModel())
            {
                Console.WriteLine("Fail to load word-topic assignment file of the model!\n");
                return(false);
            }

            Console.WriteLine("Model loaded:");
            Console.WriteLine("\talpha:" + alpha);
            Console.WriteLine("\tbeta:" + beta);
            Console.WriteLine("\tM:" + M);
            Console.WriteLine("\tV:" + V);


            nw = ArrayInitializers.ZerosInt(V, K);
            nd = ArrayInitializers.ZerosInt(M, K);

            nwsum = ArrayInitializers.ZerosInt(K);
            ndsum = ArrayInitializers.ZerosInt(M);

            for (m = 0; m < data.M; m++)
            {
                int N = data.Docs[m].Length;

                // assign values for nw, nd, nwsum, and ndsum
                for (n = 0; n < N; n++)
                {
                    w = data.Docs[m].Words[n];
                    int topic = z[m][n];

                    // number of instances of word i assigned to topic j
                    nw[w][topic] += 1;
                    // number of words in document i assigned to topic j
                    nd[m][topic] += 1;
                    // total number of words assigned to topic j
                    nwsum[topic] += 1;
                }
                // total number of words in document i
                ndsum[m] = N;
            }


            theta    = ArrayInitializers.Empty(M, K);
            phi      = ArrayInitializers.Empty(K, V);
            dir      = option.dir;
            savestep = option.savestep;

            return(true);
        }
Ejemplo n.º 2
0
        public bool init(LDACommandLineOptions option)
        {
            this.option = option;
            trnModel    = new Model();

            trnModel.dfile    = option.dfile;
            trnModel.dir      = option.dir;
            trnModel.K        = option.K;
            trnModel.savestep = option.savestep;
            trnModel.niters   = option.niters;

            if (option.est)
            {
                if (!trnModel.initNewModel(option))
                {
                    return(false);
                }
                trnModel.data.LocalDictionary.WriteWordMap(option.dir + "\\" + option.wordMapFileName);
            }
            else if (option.estc)
            {
                if (!trnModel.initEstimatedModel(option))
                {
                    return(false);
                }
            }

            return(true);
        }
Ejemplo n.º 3
0
        //---------------------------------------------------------------
        //	Init Methods
        //---------------------------------------------------------------

        /**
         * initialize the model
         */
        protected bool init(LDACommandLineOptions option)
        {
            if (option == null)
            {
                return(false);
            }

            modelName = option.modelName;
            K         = option.K;

            alpha = option.alpha;
            if (alpha < 0.0)
            {
                alpha = 50.0 / K;
            }

            if (option.beta >= 0)
            {
                beta = option.beta;
            }

            niters = option.niters;

            dir = option.dir;
            if (dir.EndsWith("\\"))
            {
                dir = dir.Substring(0, dir.Length - 1);
            }

            dfile       = option.dfile;
            twords      = option.twords;
            wordMapFile = option.wordMapFileName;

            return(true);
        }
Ejemplo n.º 4
0
        //-----------------------------------------------------
        // Init method
        //-----------------------------------------------------
        public bool init(LDACommandLineOptions option)
        {
            this.option = option;
            trnModel    = new Model();

            if (!trnModel.initEstimatedModel(option))
            {
                return(false);
            }

            globalDict = trnModel.data.LocalDictionary;
            computeTrnTheta();
            computeTrnPhi();

            return(true);
        }
Ejemplo n.º 5
0
        /**
         * Init parameters for inference
         * reading new dataset from file
         */
        public bool initNewModel(LDACommandLineOptions option, Model trnModel)
        {
            if (!init(option))
            {
                return(false);
            }

            LDADataset dataset = LDADataset.ReadDataset(dir + "\\" + dfile, trnModel.data.LocalDictionary);

            if (dataset == null)
            {
                Console.WriteLine("Fail to read dataset!\n");
                return(false);
            }

            return(initNewModel(option, dataset, trnModel));
        }
Ejemplo n.º 6
0
 public static void showHelp(LDACommandLineOptions option)
 {
     Console.WriteLine("LDA [options ...] [arguments...] \n");
     Console.WriteLine(option.GetUsage());
 }
Ejemplo n.º 7
0
        static void Main(string[] args)
        {
            LDACommandLineOptions option = new LDACommandLineOptions();
            var parser = new Parser();

            option.beta            = 0.1;
            option.K               = 10;
            option.niters          = 1000;
            option.savestep        = 100;
            option.twords          = 20;
            option.dfile           = "trndocs.dat";
            option.dir             = @"C:\Users\Amine\Documents\visual studio 2013\Projects\GibbsLDA.NET\GibbsLDA.NET\data";
            option.est             = true;
            option.modelName       = "model-final";
            option.wordMapFileName = "wordmap.txt";

            var stopWatch = new Stopwatch();

            stopWatch.Start();

            try
            {
                //			if (args.length == 0){
                //				showHelp(parser);
                //				return;
                //			}

                parser.ParseArguments(args, option);
                if (option.est || option.estc)
                {
                    Estimator estimator = new Estimator();
                    estimator.init(option);
                    estimator.estimate();
                }
                else if (option.inf)
                {
                    Inferencer inferencer = new Inferencer();
                    inferencer.init(option);

                    Model newModel = inferencer.inference();

                    for (int i = 0; i < newModel.phi.Length; ++i)
                    {
                        //phi: K * V
                        Console.WriteLine("-----------------------\ntopic" + i + " : ");
                        for (int j = 0; j < 10; ++j)
                        {
                            Console.WriteLine(inferencer.globalDict.Id2Word[j] + "\t" + newModel.phi[i][j]);
                        }
                    }
                }
            }
            catch (ParserException cle)
            {
                Console.WriteLine("Command line error: " + cle.Message);
                showHelp(option);
                Console.ReadLine();
                return;
            }
            catch (Exception e)
            {
                Console.WriteLine("Error in main: " + e.Message);
                Console.WriteLine(e.StackTrace);
                Console.ReadLine();
                return;
            }

            stopWatch.Stop();
            Console.WriteLine("\n This run took : " + stopWatch.ElapsedMilliseconds / 1000.0 + " seconds");
            Console.ReadLine();
        }
Ejemplo n.º 8
0
        /**
         * Init parameters for inference
         * @param newData DataSet for which we do inference
         */
        public bool initNewModel(LDACommandLineOptions option, LDADataset newData, Model trnModel)
        {
            if (!init(option))
            {
                return(false);
            }

            int m, n;

            var rnd = new Random();

            K     = trnModel.K;
            alpha = trnModel.alpha;
            beta  = trnModel.beta;

            p = new double[K];
            Console.WriteLine("K:" + K);

            data = newData;

            //+ allocate memory and assign values for variables
            M        = data.M;
            V        = data.V;
            dir      = option.dir;
            savestep = option.savestep;
            Console.WriteLine("M:" + M);
            Console.WriteLine("V:" + V);

            // K: from command line or default value
            // alpha, beta: from command line or default values
            // niters, savestep: from command line or default values

            nw = ArrayInitializers.ZerosInt(V, K);
            nd = ArrayInitializers.ZerosInt(M, K);

            nwsum = ArrayInitializers.ZerosInt(K);
            ndsum = ArrayInitializers.ZerosInt(M);

            z = new List <int> [M];
            for (m = 0; m < data.M; m++)
            {
                int N = data.Docs[m].Length;
                z[m] = new List <int>();

                //initilize for z
                for (n = 0; n < N; n++)
                {
                    int topic = (int)Math.Floor(rnd.NextDouble() * K);
                    z[m].Add(topic);

                    // number of instances of word assigned to topic j
                    nw[data.Docs[m].Words[n]][topic] += 1;
                    // number of words in document i assigned to topic j
                    nd[m][topic] += 1;
                    // total number of words assigned to topic j
                    nwsum[topic] += 1;
                }
                // total number of words in document i
                ndsum[m] = N;
            }

            theta = ArrayInitializers.Empty(M, K);
            phi   = ArrayInitializers.Empty(K, V);

            return(true);
        }
Ejemplo n.º 9
0
        /**
         * Init parameters for estimation
         */
        public bool initNewModel(LDACommandLineOptions option)
        {
            //if (!init(option))
            //return false;
            var rnd = new Random();
            int m, n, w, k;

            p = new double[K];

            data = LDADataset.ReadDataset(dir + "\\" + dfile);
            if (data == null)
            {
                Console.WriteLine("Fail to read training data!\n");
                return(false);
            }

            //+ allocate memory and assign values for variables
            M        = data.M;
            V        = data.V;
            dir      = option.dir;
            savestep = option.savestep;

            // K: from command line or default value
            // alpha, beta: from command line or default values
            // niters, savestep: from command line or default values

            nw = new int[V][];
            for (w = 0; w < V; w++)
            {
                nw[w] = new int[K];
                for (k = 0; k < K; k++)
                {
                    nw[w][k] = 0;
                }
            }

            nd = new int[M][];
            for (m = 0; m < M; m++)
            {
                nd[m] = new int[K];
                for (k = 0; k < K; k++)
                {
                    nd[m][k] = 0;
                }
            }

            nwsum = new int[K];
            for (k = 0; k < K; k++)
            {
                nwsum[k] = 0;
            }

            ndsum = new int[M];
            for (m = 0; m < M; m++)
            {
                ndsum[m] = 0;
            }

            z = new List <int> [M];
            for (m = 0; m < data.M; m++)
            {
                int N = data.Docs[m].Length;
                z[m] = new List <int>();

                //initilize for z
                for (n = 0; n < N; n++)
                {
                    int topic = (int)Math.Floor(rnd.NextDouble() * K);
                    z[m].Add(topic);

                    // number of instances of word assigned to topic j
                    nw[data.Docs[m].Words[n]][topic] += 1;
                    // number of words in document i assigned to topic j
                    nd[m][topic] += 1;
                    // total number of words assigned to topic j
                    nwsum[topic] += 1;
                }
                // total number of words in document i
                ndsum[m] = N;
            }

            theta = new double[M][];
            for (m = 0; m < M; m++)
            {
                theta[m] = new double[K];
            }
            phi = new double[K][];
            for (k = 0; k < K; k++)
            {
                phi[k] = new double[V];
            }

            return(true);
        }