static async Task Main(string[] args) { Console.OutputEncoding = Encoding.UTF8; ApplicationLogging.SetLoggerFactory(LoggerFactory.Create(lb => lb.AddConsole())); //Need to register the languages we want to use first Catalyst.Models.English.Register(); //Configures the model storage to use the local folder ./catalyst-models/ Storage.Current = new DiskStorage("catalyst-models"); //Download the Reuters corpus if necessary var(train, test) = await Corpus.Reuters.GetAsync(); //Parse the documents using the English pipeline, as the text data is untokenized so far var nlp = Pipeline.For(Language.English); var trainDocs = nlp.Process(train).ToArray(); var testDocs = nlp.Process(test).ToArray(); //Train an LDA topic model on the trainind dateset using (var lda = new LDA(Language.English, 0, "reuters-lda")) { lda.Data.NumberOfTopics = 20; //Arbitrary number of topics lda.Train(trainDocs, Environment.ProcessorCount); await lda.StoreAsync(); } using (var lda = await LDA.FromStoreAsync(Language.English, 0, "reuters-lda")) { foreach (var doc in testDocs) { if (lda.TryPredict(doc, out var topics)) { var docTopics = string.Join("\n", topics.Select(t => lda.TryDescribeTopic(t.TopicID, out var td) ? $"[{t.Score:n3}] => {td.ToString()}" : "")); Console.WriteLine("------------------------------------------"); Console.WriteLine(doc.Value); Console.WriteLine("------------------------------------------"); Console.WriteLine(docTopics); Console.WriteLine("------------------------------------------\n\n"); } } } }
public static async Task LDAllocation() { Storage.Current = new OnlineRepositoryStorage(new DiskStorage("catalyst-models")); //var train = await Corpus.Reuters.GetAsync(); string connectionStr = "Data Source = (localdb)\\MSSQLLocalDB; Initial Catalog = master; Integrated Security = True; Connect Timeout = 30; Encrypt = False; TrustServerCertificate = False; ApplicationIntent = ReadWrite; MultiSubnetFailover = False"; SqlConnection connection = null; SqlCommand command = null; SqlCommand command1 = null; string sqlQuery = "SELECT * FROM TestApplication.dbo.Files where test = 0"; string sqlQuery1 = "SELECT * FROM TestApplication.dbo.Files where test = 1"; var nlp = Pipeline.For(Language.English); try { connection = new SqlConnection(connectionStr); command = new SqlCommand(sqlQuery, connection); command1 = new SqlCommand(sqlQuery1, connection); connection.Open(); SqlDataReader reader = command.ExecuteReader(); System.Collections.Generic.List <Document> train = new System.Collections.Generic.List <Document>(); System.Collections.Generic.List <Document> test = new System.Collections.Generic.List <Document>(); System.Text.RegularExpressions.Regex reg = new System.Text.RegularExpressions.Regex("/[^/]*/"); while (reader.Read()) { NLPFile flTrain = new NLPFile((IDataRecord)reader); //Console.WriteLine(flTrain.FetchData()); var doc = new Document(flTrain.FetchData(), Language.English); string label = reg.Matches(((string)reader[1]))[2].Value; label = label.Trim('/'); doc.Labels.Add(label); train.Add(doc); var trainDocs = nlp.Process(train).ToArray(); using (var lda = new LDA(Language.English, 0, "reuters-lda")) { lda.Data.NumberOfTopics = 20; //Arbitrary number of topics lda.Train(trainDocs, Environment.ProcessorCount); await lda.StoreAsync(); } } reader.Close(); SqlDataReader reader1 = command1.ExecuteReader(); while (reader1.Read()) { NLPFile flTest = new NLPFile((IDataRecord)reader1); //Console.WriteLine(flTest.FetchData()); var doc1 = new Document(flTest.FetchData(), Language.English); string label = reg.Matches(((string)reader[1]))[2].Value; label = label.Trim('/'); doc1.Labels.Add(label); test.Add(doc1); var testDocs = nlp.Process(test).ToArray(); using (var lda = await LDA.FromStoreAsync(Language.English, 0, "reuters-lda")) { foreach (var doc in testDocs) { if (lda.TryPredict(doc, out var topics)) { var docTopics = string.Join("\n", topics.Select(t => lda.TryDescribeTopic(t.TopicID, out var td) ? $"[{t.Score:n3}] => {td.ToString()}" : "")); Console.WriteLine("------------------------------------------"); Console.WriteLine(doc.Value); Console.WriteLine("------------------------------------------"); Console.WriteLine(docTopics); Console.WriteLine("------------------------------------------\n\n"); } } } } reader1.Close(); } catch (Exception e) { Console.WriteLine("Error: " + e.Message); } finally { if (command != null || command1 != null) { command.Dispose(); command1.Dispose(); } if (connection != null) { connection.Close(); connection.Dispose(); } } }