Ejemplo n.º 1
0
        static async Task Main(string[] args)
        {
            Console.OutputEncoding = Encoding.UTF8;
            ApplicationLogging.SetLoggerFactory(LoggerFactory.Create(lb => lb.AddConsole()));

            //Need to register the languages we want to use first
            Catalyst.Models.English.Register();

            //Configures the model storage to use the local folder ./catalyst-models/
            Storage.Current = new DiskStorage("catalyst-models");

            //Download the Reuters corpus if necessary
            var(train, test) = await Corpus.Reuters.GetAsync();

            //Parse the documents using the English pipeline, as the text data is untokenized so far
            var nlp = Pipeline.For(Language.English);

            var trainDocs = nlp.Process(train).ToArray();
            var testDocs  = nlp.Process(test).ToArray();


            //Train an LDA topic model on the trainind dateset
            using (var lda = new LDA(Language.English, 0, "reuters-lda"))
            {
                lda.Data.NumberOfTopics = 20; //Arbitrary number of topics
                lda.Train(trainDocs, Environment.ProcessorCount);
                await lda.StoreAsync();
            }

            using (var lda = await LDA.FromStoreAsync(Language.English, 0, "reuters-lda"))
            {
                foreach (var doc in testDocs)
                {
                    if (lda.TryPredict(doc, out var topics))
                    {
                        var docTopics = string.Join("\n", topics.Select(t => lda.TryDescribeTopic(t.TopicID, out var td) ? $"[{t.Score:n3}] => {td.ToString()}" : ""));

                        Console.WriteLine("------------------------------------------");
                        Console.WriteLine(doc.Value);
                        Console.WriteLine("------------------------------------------");
                        Console.WriteLine(docTopics);
                        Console.WriteLine("------------------------------------------\n\n");
                    }
                }
            }
        }
        public static async Task LDAllocation()
        {
            Storage.Current = new OnlineRepositoryStorage(new DiskStorage("catalyst-models"));
            //var train = await Corpus.Reuters.GetAsync();
            string        connectionStr = "Data Source = (localdb)\\MSSQLLocalDB; Initial Catalog = master; Integrated Security = True; Connect Timeout = 30; Encrypt = False; TrustServerCertificate = False; ApplicationIntent = ReadWrite; MultiSubnetFailover = False";
            SqlConnection connection    = null;
            SqlCommand    command       = null;
            SqlCommand    command1      = null;
            string        sqlQuery      = "SELECT * FROM TestApplication.dbo.Files where test = 0";
            string        sqlQuery1     = "SELECT * FROM TestApplication.dbo.Files where test = 1";
            var           nlp           = Pipeline.For(Language.English);

            try
            {
                connection = new SqlConnection(connectionStr);
                command    = new SqlCommand(sqlQuery, connection);
                command1   = new SqlCommand(sqlQuery1, connection);
                connection.Open();
                SqlDataReader reader = command.ExecuteReader();
                System.Collections.Generic.List <Document> train = new System.Collections.Generic.List <Document>();
                System.Collections.Generic.List <Document> test  = new System.Collections.Generic.List <Document>();
                System.Text.RegularExpressions.Regex       reg   = new System.Text.RegularExpressions.Regex("/[^/]*/");
                while (reader.Read())
                {
                    NLPFile flTrain = new NLPFile((IDataRecord)reader);
                    //Console.WriteLine(flTrain.FetchData());
                    var    doc   = new Document(flTrain.FetchData(), Language.English);
                    string label = reg.Matches(((string)reader[1]))[2].Value;
                    label = label.Trim('/');
                    doc.Labels.Add(label);
                    train.Add(doc);
                    var trainDocs = nlp.Process(train).ToArray();
                    using (var lda = new LDA(Language.English, 0, "reuters-lda"))
                    {
                        lda.Data.NumberOfTopics = 20; //Arbitrary number of topics
                        lda.Train(trainDocs, Environment.ProcessorCount);
                        await lda.StoreAsync();
                    }
                }
                reader.Close();


                SqlDataReader reader1 = command1.ExecuteReader();
                while (reader1.Read())
                {
                    NLPFile flTest = new NLPFile((IDataRecord)reader1);
                    //Console.WriteLine(flTest.FetchData());
                    var    doc1  = new Document(flTest.FetchData(), Language.English);
                    string label = reg.Matches(((string)reader[1]))[2].Value;
                    label = label.Trim('/');
                    doc1.Labels.Add(label);
                    test.Add(doc1);
                    var testDocs = nlp.Process(test).ToArray();
                    using (var lda = await LDA.FromStoreAsync(Language.English, 0, "reuters-lda"))
                    {
                        foreach (var doc in testDocs)
                        {
                            if (lda.TryPredict(doc, out var topics))
                            {
                                var docTopics = string.Join("\n", topics.Select(t => lda.TryDescribeTopic(t.TopicID, out var td) ? $"[{t.Score:n3}] => {td.ToString()}" : ""));

                                Console.WriteLine("------------------------------------------");
                                Console.WriteLine(doc.Value);
                                Console.WriteLine("------------------------------------------");
                                Console.WriteLine(docTopics);
                                Console.WriteLine("------------------------------------------\n\n");
                            }
                        }
                    }
                }
                reader1.Close();
            }
            catch (Exception e)
            {
                Console.WriteLine("Error: " + e.Message);
            }
            finally
            {
                if (command != null || command1 != null)
                {
                    command.Dispose();
                    command1.Dispose();
                }
                if (connection != null)
                {
                    connection.Close();
                    connection.Dispose();
                }
            }
        }