예제 #1
0
        static void Main(string[] args)
        {
            // Some example documents.
            string[] documents =
            {
                "The sun in the sky is bright.",
                "We can see the shining sun, the bright sun."
            };

            // Apply TF*IDF to the documents and get the resulting vectors.
            double[][] inputs = TFIDF.Transform(documents, 0);
            inputs = TFIDF.Normalize(inputs);

            // Display the output.
            for (int index = 0; index < inputs.Length; index++)
            {
                Console.WriteLine(documents[index]);

                foreach (double value in inputs[index])
                {
                    Console.Write(value + ", ");
                }

                Console.WriteLine("\n");
            }

            Console.WriteLine("Press any key ..");
            Console.ReadKey();
        }
예제 #2
0
        static void Main(string[] args)
        {
            // Some example documents.
            string[] documents =
            {
                "My name is James",
                "James is the best in the world",
                "I sure do love James"
            };

            // Apply TF*IDF to the documents and get the resulting vectors.
            var inputs = TFIDF.Transform(documents, 0);

            inputs = TFIDF.Normalize(inputs);

            // Display the output.
            for (int index = 0; index < inputs.Count; index++)
            {
                Console.WriteLine(documents[index]);

                foreach (var value in inputs[index])
                {
                    Console.Write(value.Key + ":" + value.Value + ", ");
                }

                Console.WriteLine("\n");
            }

            Console.WriteLine("ranking");

            foreach (var term in TFIDF.GetRanking(inputs).OrderBy(d => d.Value))
            {
                Console.WriteLine(term.Key + " - " + term.Value);
            }

            Console.WriteLine("\n");
        }
예제 #3
0
        static void Main(string[] args)
        {
#if Cluster
            // output file
            List <string> outputLines = new List <string>();

            DateTime timeStart = new DateTime();
            // Some example documents.
            string[] documents = new GetTweets().GetTweetsFromExcelFile("Train_NN.xlsx");

            // Apply TF*IDF to the documents and get the resulting vectors.
            double[][] inputs = TFIDF.Transform(documents, 0);
            Console.WriteLine("time to transformation " + (DateTime.Now - timeStart));
            outputLines.Add("time to transformation " + (DateTime.Now - timeStart));
            Console.WriteLine("TFIDF transformation done...");

            inputs = TFIDF.Normalize(inputs);
            Console.WriteLine("time to Normalization " + (DateTime.Now - timeStart));
            outputLines.Add("time to Normalization " + (DateTime.Now - timeStart));
            Console.WriteLine("TFIDF Normalization done...");
            //inputs = Accord.Math.Norm.Norm2(inputs);

            string[] topics = TFIDF.Topics(documents, 5);
            Console.WriteLine("time to topics " + (DateTime.Now - timeStart));
            outputLines.Add("time to topics " + (DateTime.Now - timeStart));
            Console.WriteLine("Topics gathered...");

            //Random random = new Random();
            //double[][] rand = new double[inputs.Length][];

            //for (int i = 0; i < inputs.Length; i++)
            //{

            //    rand[i] = new double[inputs[i].Length];
            //    for (int j = 0; j < inputs[i].Length; j++)
            //    {

            //        rand[i][j] = random.NextDouble();
            //    }
            //}
            //Console.WriteLine("time to generate random numbers " + (DateTime.Now - timeStart));
            //outputLines.Add("time to topics " + (DateTime.Now - timeStart));
            //Console.WriteLine("Randoms generated...");

            KMeans cluster = new KMeans(topics.Length, Distance.Cosine);

            //cluster.MaxIterations = 1;
            //cluster.Randomize(rand);
            int[] index = cluster.Compute(inputs);
            Console.WriteLine("time to cluster " + (DateTime.Now - timeStart));
            outputLines.Add("time to cluster " + (DateTime.Now - timeStart));
            Console.WriteLine("Clustering done...");
            //Accord.Statistics.Analysis.PrincipalComponentAnalysis pca = new Accord.Statistics.Analysis.PrincipalComponentAnalysis(inputs, Accord.Statistics.Analysis.AnalysisMethod.Center);
            //pca.Compute();
            //double[][] newinput = pca.Transform(inputs, 2);

            //ScatterplotBox.Show("KMeans Clustering of Tweets", newinput, index).Hold();



            for (double i = 0; i <= topics.Length; i++)
            {
                outputLines.Add(Convert.ToString(i + 1));
                List <string> topicDecider = new List <string>();
                string[]      topicString;

                int j = 0;
                foreach (int x in index)
                {
                    if (x == i + 1)
                    {
                        topicDecider.Add(documents[j]);
                    }
                    j++;
                }

                topicString = TFIDF.Topics(topicDecider.ToArray(), topicDecider.Count / 2);

                if (topicString.Length == 0)
                {
                    outputLines.Add("--------------------------------------------------------");
                    outputLines.Add("TOPIC: other");
                    outputLines.Add("--------------------------------------------------------");
                }
                else
                {
                    outputLines.Add("--------------------------------------------------------");
                    outputLines.Add("TOPIC: " + topicString[0]);
                    outputLines.Add("--------------------------------------------------------");
                }

                j = 0;
                foreach (int x in index)
                {
                    if (x == i + 1)
                    {
                        outputLines.Add("Tweet ID " + j + ":\t" + documents[j]);
                    }
                    j++;
                }
                outputLines.Add("");
                outputLines.Add("");
                outputLines.Add("");
                outputLines.Add("");
            }

            System.IO.File.WriteAllLines(@"Train_NN_2.txt", outputLines.ToArray());
            Console.WriteLine("Output is written...");
#else
            // output file
            List <string> outputLines = new List <string>();

            DateTime timeStart = new DateTime();
            // Some example documents.
            string[]   documents_Train = new GetTweets().GetTweetsFromExcelFile("Train_NN.xlsx");
            double[][] Train_Labels    = new GetTweets().GetLabelsFromExcelFile("Train_Labels.xlsx");

            // Apply TF*IDF to the documents and get the resulting vectors.
            double[][] inputs = TFIDF.Transform(documents_Train, 0);
            Console.WriteLine("time to transformation " + (DateTime.Now - timeStart));
            outputLines.Add("time to transformation " + (DateTime.Now - timeStart));
            Console.WriteLine("TFIDF transformation done...");

            inputs = TFIDF.Normalize(inputs);
            Console.WriteLine("time to Normalization " + (DateTime.Now - timeStart));
            outputLines.Add("time to Normalization " + (DateTime.Now - timeStart));
            Console.WriteLine("TFIDF Normalization done...");


            //double[][] inputs;
            double[][] train_input = new double[140][];
            double[][] outputs;
            double[][] testInputs  = new double[1000 - 140][];
            double[][] testOutputs = new double[1000 - 140][];

            for (int i = 0; i < 140; i++)
            {
                train_input[i] = new double[inputs[i].Length];
                for (int j = 0; j < inputs[i].Length; j++)
                {
                    train_input[i][j] = inputs[i][j];
                }
            }

            for (int i = 0; i < 1000 - 140; i++)
            {
                testInputs[i] = new double[inputs[i].Length];
                for (int j = 0; j < inputs[i].Length; j++)
                {
                    testInputs[i][j] = inputs[i][j];
                }
            }


            // The first 500 data rows will be for training. The rest will be for testing.
            //testInputs = inputs.Skip(500).ToArray();
            //testOutputs = outputs.Skip(500).ToArray();
            //inputs = inputs.Take(500).ToArray();
            //outputs = outputs.Take(500).ToArray();

            // Setup the deep belief network and initialize with random weights.
            DeepBeliefNetwork network = new DeepBeliefNetwork(train_input.First().Length, 7);
            new GaussianWeights(network, 0.1).Randomize();
            network.UpdateVisibleWeights();

            // Setup the learning algorithm.
            DeepBeliefNetworkLearning teacher = new DeepBeliefNetworkLearning(network)
            {
                Algorithm = (h, v, i) => new ContrastiveDivergenceLearning(h, v)
                {
                    LearningRate = 0.1,
                    Momentum     = 0.5,
                    Decay        = 0.001,
                }
            };

            // Setup batches of input for learning.
            int batchCount = Math.Max(1, train_input.Length / 100);
            // Create mini-batches to speed learning.
            int[]        groups  = Accord.Statistics.Tools.RandomGroups(train_input.Length, batchCount);
            double[][][] batches = train_input.Subgroups(groups);
            // Learning data for the specified layer.
            double[][][] layerData;

            // Unsupervised learning on each hidden layer, except for the output layer.
            for (int layerIndex = 0; layerIndex < network.Machines.Count - 1; layerIndex++)
            {
                teacher.LayerIndex = layerIndex;
                layerData          = teacher.GetLayerInput(batches);
                for (int i = 0; i < 200; i++)
                {
                    double error = teacher.RunEpoch(layerData) / train_input.Length;
                    if (i % 10 == 0)
                    {
                        Console.WriteLine(i + ", Error = " + error);
                    }
                }
            }

            // Supervised learning on entire network, to provide output classification.
            var teacher2 = new BackPropagationLearning(network)
            {
                LearningRate = 0.1,
                Momentum     = 0.5
            };

            //Transpose
            double[][] Train_Labels_T = new double[140][];
            for (int i = 0; i < 140; i++)
            {
                Train_Labels_T[i] = new double[7];
                for (int j = 0; j < 7; j++)
                {
                    Train_Labels_T[i][j] = Train_Labels[j][i];
                }
            }

            // Run supervised learning.
            for (int i = 0; i < 500; i++)
            {
                double error = teacher2.RunEpoch(train_input, Train_Labels_T) / train_input.Length;
                if (i % 10 == 0)
                {
                    Console.WriteLine(i + ", Error = " + error);
                }
            }
            outputLines.Add("time to Training " + (DateTime.Now - timeStart));
            // Test the resulting accuracy.
            double[][] outputValues = new double[testInputs.Length][];
            for (int i = 0; i < testInputs.Length; i++)
            {
                outputValues[i] = network.Compute(testInputs[i]);
            }
            outputLines.Add("time to Testing/clustering " + (DateTime.Now - timeStart));
            outputLines.Add("");
            outputLines.Add("");
            outputLines.Add("");

            List <string> class1 = new List <string>();
            List <string> class2 = new List <string>();
            List <string> class3 = new List <string>();
            List <string> class4 = new List <string>();
            List <string> class5 = new List <string>();
            List <string> class6 = new List <string>();
            List <string> class7 = new List <string>();

            //creating output file
            for (int i = 0; i < documents_Train.Length; i++)
            {
                if (i < 10 && i > -1)
                {
                    if (i == 0)
                    {
                        class1.Add("-------------------------------");
                        class1.Add("TOPIC: WEATHER");
                        class1.Add("-------------------------------");
                    }
                    class1.Add("Training_Tweet:\t" + documents_Train[i]);
                }
                if (i < 20 && i > 9)
                {
                    if (i == 10)
                    {
                        class2.Add("-------------------------------");
                        class2.Add("TOPIC: MUSIC");
                        class2.Add("-------------------------------");
                    }
                    class2.Add("Training_Tweet:\t" + documents_Train[i]);
                }
                if (i < 30 && i > 19)
                {
                    if (i == 20)
                    {
                        class3.Add("-------------------------------");
                        class3.Add("TOPIC: ITALY");
                        class3.Add("-------------------------------");
                    }
                    class3.Add("Training_Tweet:\t" + documents_Train[i]);
                }
                if (i < 40 && i > 29)
                {
                    if (i == 30)
                    {
                        class4.Add("-------------------------------");
                        class4.Add("TOPIC: FOOD");
                        class4.Add("-------------------------------");
                    }
                    class4.Add("Training_Tweet:\t" + documents_Train[i]);
                }
                if (i < 50 && i > 39)
                {
                    if (i == 40)
                    {
                        class5.Add("-------------------------------");
                        class5.Add("TOPIC: FASHION");
                        class5.Add("-------------------------------");
                    }
                    class5.Add("Training_Tweet:\t" + documents_Train[i]);
                }
                if (i < 60 && i > 49)
                {
                    if (i == 50)
                    {
                        class6.Add("-------------------------------");
                        class6.Add("TOPIC: FOOTBALL");
                        class6.Add("-------------------------------");
                    }
                    class6.Add("Training_Tweet:\t" + documents_Train[i]);
                }
                if (i < 140 && i > 59)
                {
                    if (i == 60)
                    {
                        class7.Add("-------------------------------");
                        class7.Add("TOPIC: OTHER");
                        class7.Add("-------------------------------");
                    }
                    class7.Add("Training_Tweet:\t" + documents_Train[i]);
                }
                if (i >= 140)
                {
                    int what;
                    what = outputValues[i - 140].IndexOf(outputValues[i - 140].Max());
                    switch (what)
                    {
                    case 0:
                        class1.Add("Test_Tweet:\t" + documents_Train[i]);
                        break;

                    case 1:
                        class2.Add("Test_Tweet:\t" + documents_Train[i]);
                        break;

                    case 2:
                        class3.Add("Test_Tweet:\t" + documents_Train[i]);
                        break;

                    case 3:
                        class4.Add("Test_Tweet:\t" + documents_Train[i]);
                        break;

                    case 4:
                        class5.Add("Test_Tweet:\t" + documents_Train[i]);
                        break;

                    case 5:
                        class6.Add("Test_Tweet:\t" + documents_Train[i]);
                        break;

                    case 6:
                        class7.Add("Test_Tweet:\t" + documents_Train[i]);
                        break;
                    }
                }
            }

            outputLines.Add("");
            outputLines.Add("");
            outputLines.Add("");
            outputLines.AddRange(class1);
            outputLines.Add("");
            outputLines.Add("");
            outputLines.Add("");
            outputLines.AddRange(class2);
            outputLines.Add("");
            outputLines.Add("");
            outputLines.Add("");
            outputLines.AddRange(class3);
            outputLines.Add("");
            outputLines.Add("");
            outputLines.Add("");
            outputLines.AddRange(class4);
            outputLines.Add("");
            outputLines.Add("");
            outputLines.Add("");
            outputLines.AddRange(class5);
            outputLines.Add("");
            outputLines.Add("");
            outputLines.Add("");
            outputLines.AddRange(class6);
            outputLines.Add("");
            outputLines.Add("");
            outputLines.Add("");
            outputLines.AddRange(class7);
            outputLines.Add("");
            outputLines.Add("");
            outputLines.Add("");


            System.IO.File.WriteAllLines(@"Train_NN_With_Test_2.txt", outputLines.ToArray());

            Console.Write("Press any key to quit ..");
#endif

            Console.ReadKey();
        }
예제 #4
0
        static void Main(string[] args)
        {
            // Some example documents.
            string[] documents =
            {
                //"Вандалы украли бронзовые гирлянды с памятника Лесе Украинке на кладбище в Киеве.",
                //"Украина из-за похолодания начала отбирать более 100 млн кубометров газа в сутки из подземных хранилищ.",
                //"Государственная служба Украины по лекарственным средствам и контролю за наркотиками запретила партию вакцины от кори, паротита и краснухи.",
                //"Украинский боксер Артем Далакян завоевал титул чемпиона мира по версии WBA, выиграв бой у американца Брайана Вилория.",
                //"Российским олимпийцам не разрешили пройти под национальным флагом на церемонии закрытия Олимпиады-2018",
                //"Стало известно, кто представит Украину на Евровидении-2018 (ВИДЕО)",
                //"Думаете это чудо? А, нет. Это теплоэлектростанция Ахметова.",
                //"Элина продолжает радовать всю страну! С победой!!!",
                //"После того как у украинки случился инсульт польский работодатель вывез ее в соседний район и бросил на автобусной остановке",
                //"Ранее работа мобильного оператора Vodafone-Украина была прекращена на подконтрольной сепаратистам территории Луганской области из-за обрыва линии.",
                //"Эксплуатация газопровода исключена из-за угрозы жизни и здоровью населения.",
                //"Задержанием украинского хакера в Польше занимались местные правоохранители совместно с ФБР.",
                //"В субботу, 24 февраля, вступил в силу закон о реинтеграции Донбасса",
                //"Растерзанное тело нашли родители девочки.",
                //"Экс-главу предвыборного штаба Трампа подозревают в подкупе европейских политиков с целью продвижения интересов Януковича.",
                //"В ЦИК России заверили, что выборы президента в Крыму и Севастополе пройдут, несмотря на протест Украины.",
                //"Президент США заявил, что Россия наряду с Ираном и сирийскими властями несет ответственность за гуманитарный кризис в Сирии.",
                //"Киев не вернет деньги ЕС за пограничные проекты",
                //"Украина собирается построить сверхскоростную систему поездов Hyperloop. Что это такое, зачем он Украине и каковы шансы его реализации.",
                //"История, поражающая своей глупостью. Европа сворачивает проект по развитию украинских КПП. И даже требует деньги обратно.",
                //"Военнослужащие Национальной гвардии прошлись по Кривому Рогу с советскими флагами. Больше фото на сайте",
                //"Спецпроект Проголосуй в проекте «Горнолыжные курорты» за один из понравившихся и получи в подарок уикенд в горах. Финал голосования 25 февраля. Спеши получить подарок!",
                //"Круг замкнулся. Вятрович задумался над декоммунизацией Верховной Рады.",
                //"Неожиданно. Ученые Королевского колледжа Лондона заявили об опасности, которую несут в себе фруктовые чаи.",
                //"Климкин заявил, что не призывал ввести санкции против Шредера",
                //"Население Украины продолжает сокращаться",
                //"В Украине без света остаются 59 населенных пунктов",
                //"Пожар на рынке в Черновцах: пострадали три человека",
                //"У России были намерение и мотив отравить Скрипаля – Мэй",
                //"В Киеве частично ограничат движение по Северному мосту",
                //"Порошенко прибыл с визитом в Катар",
                //"Xiaomi Mi Mix 2s показали на официальных постерах",
                //"В России за ночь появилось 1,5 миллиона избирателей – СМИ",
                //"В Украине насчитали более 3000 беженцев",
                //"Швеция вызвала посла РФ из-за заявлений о причастности к яду Новичок",
                //"Канадка удивила макияжем с оптической иллюзией",
                //"Украинские стартапы за пять лет получили $400 млн",
                //"Взрыв в жилом доме Киева: стали известны подробности",
                //"На Мадагаскар обрушился шторм: 17 погибших",
                //"Госдолг США впервые превысил $21 трлн",
                //"Турция пригрозила новыми ударами в Сирии и Ираке",
                //"Названа лучшая киберспортивная игра года",
                //"Макаревич заявил, что россияне превратились в злобных дебилов",
                //"Непогода в Днепре: власти объявили два выходных дня",
                //"Россия начала масштабные военные учения",
                "This is why Facebook's data scandal matters — and what it means for your personal information",
                "Trump's Twitter feed over the past 72 hours reeks of that attempted character assassination, writes Chris Cillizza",
                "A total WITCH HUNT with massive conflicts of interest! President Donald J. Trump tweeted Monday morning",
                "One of the students who died in Parkland was buried in Dwyane Wade's jersey. Wade dedicated his season to the student, and the NBA star has now donated $200,000 to the March for Our Lives, a protest in Washington DC this Saturday. I do more than dribble, he tells CNN.",
                "Syrian President Bashar al Assad paraded the partial success of his months-long onslaught of a rebel-held suburb of Damascus by driving himself into the enclave in a Honda Civic",
                "Claire's says it has pierced more than 100 million ears around the world",
                "Spring begins Tuesday, but winter isn't ready to retire just yet",
                "Remember that huge back tattoo Ben Affleck said was fake? Apparently not so much.",
                "A drone captured stunning cherry blossoms and other signs of spring in China",
                "We're watching the opening bell as Facebook shares are down more than 4% after a data controversy that unfolded over the weekend.",
                "Lolade Siyonbola, a black Yale University student, was napping in her dorm's common room when a white student called police. There needs to be punitive measures for people who act out of racially motivated bias,  Siyonbola told Good Morning America https://cnn.it/2GhfTeq",
                "We’re live at the White House for the press briefing.",
                "The Humbolt Broncos junior hockey team will be in the rink again in time for the fall season. The team lost 10 players, two coaches and a trainer in a devastating crash that killed 16 people.",
                "I just don't know what goes on in that White House mentality for there not being an apology for that terrible remark",
                "Howard Forever. In an inspiring commencement speech, Chadwick Boseman, the man behind the Black Panther mask, tells Howard University graduates that this is your time, ending with the movie's iconic salute. https://cnn.it/2IjiF8X",
                "This 33-year-old writer and activist has mobilized tens of thousands of Ganzans in recent weeks",
                "The Supreme Court struck down a 1992 federal law that prohibited most states from authorizing sports betting",
                "The US officially relocated its Embassy to Jerusalem, formally upending decades of American foreign policy in a move that was met with clashes and protests along the Israeli-Gaza border.",
                "The man behind the Black Panther mask gives a stirring graduation speech, ending with the iconic salute from the movie saying, Howard Forever",
                "“By moving our embassy to Jerusalem, we have shown the world once again that the United States can be trusted... The United States stands with Israel because we believe — we know — that it is the right thing to do,” says White House senior adviser Jared Kushner at the US Embassy opening in Jerusalem https://cnn.it/2L0Gssf",
                "We're watching the markets live as the Dow goes for its eighth straight day of gains — the longest winning streak for the index since November 2017 https://cnnmon.ie/2GcUovB",
                "“Be fearless… Don’t just accept the world you inherit today. Don’t just accept the status quo. No big challenge has ever been solved… unless people dare to try something different”: Apple CEO Tim Cook shared this message of inspiration to Duke University graduates https://cnn.it/2L0Uzxp",
                "We're live in Jerusalem, where the Trump administration is officially moving the US embassy in Israel from Tel Aviv, breaking with decades of established US policy and international practice. https://cnn.it/2IkRU02",
                "The Israeli army air-dropped leaflets over Gaza warning people not to approach the fence that separates Gaza from Israel.",
                "Border Patrol counted 5,984 deaths of people crossing illegally in the border region over a 16-year period. But a CNN investigation found at least 564 people who died but weren’t counted.",
                "Is a nice stroll enough to confer the life-saving benefits we know come from exercise? We posed the question to five specialists in the field.",
                "It's the first time WHO is calling for the elimination of something other than a noncommunicable disease.",
                "Hockey moms united this Mother's Day for the mothers in Canada who lost their children in the Humboldt Broncos bus crash, sending them virtual hugs and bouquets.",
                "Leaked audio from from one of Pastor John Hagee's sermons in the 1990s seemed to suggest that Adolf Hitler had been fulfilling God's will by aiding the desire of Jews to return to Israel, while Pastor Robert Jeffress has said that Mormons, Muslims and Hindus worship a false god.",
                "Iran's Foreign minister will also meet with representatives from Germany, France and the United Kingdom to discuss the future of the nuclear deal, after Donald J. Trump announced that the US would be withdrawing from it.",
                "It's a historic step in the country's mission to build a navy capable of rivaling the world's leading maritime powers",
                "The National Rifle Association says Governor Andrew Cuomo and a state agency have coerced banks and insurance companies to withhold services to the gun lobbying group",
                "Today, the Trump administration will move the US embassy in Israel to Jerusalem"
            };

            // Apply TF*IDF to the documents and get the resulting vectors.
            int length = documents.Length;

            double[][] inputs  = TFIDF.Transform(documents, 0);//вывод частотных значений слов, которые встречаются хотя бы дважды
            double[][] inputs2 = TFIDF.Normalize(inputs);
            Dictionary <string, double> vocab = TFIDF._vocabularyIDF;

            // Display the output.
            for (int index = 0; index < inputs2.Length; index++)
            {
                Console.WriteLine(documents[index]);
                Console.WriteLine();
                //foreach (double value in inputs2[index])
                //{
                //    if(value != 0)
                //        Console.Write( value + ", ");
                //}

                Console.WriteLine("\n");
            }
            List <string> words         = TFIDF.words;
            double        control_value = 0.6 / (length / (Math.Sqrt(words.Count * length)));
            //Console.WriteLine("contol value" + control_value);
            //List<double> zlk = new List<double>(vocab.Values);
            //zlk.Sort();
            //List<String> vocab2 = new List<String>();
            //foreach (var item in zlk)
            //{
            //    vocab2.Add(vocab[item]);
            //}
            var myList = vocab.ToList();

            myList.Sort((pair1, pair2) => pair1.Value.CompareTo(pair2.Value));
            Dictionary <string, double> vocab3 = myList.ToDictionary(x => x.Key, x => x.Value);
            //words.Sort();
            //var sortedElements = vocab.OrderBy(kvp => kvp.Value);
            //Dictionary<string, double> vocab2 = sortedElements;
            int k = 0;

            foreach (var word in vocab3.Keys)
            {
                if (k < words.Count / 10)
                {
                    Console.WriteLine(Math.Round(1 / vocab3[word], 3) + " " + word);
                }
                k++;
            }

            Console.WriteLine("Press any key ..");
            Console.ReadKey();
        }
예제 #5
0
        static void Main(string[] args)
        {
            string[] documents;
            // Some example documents.
            //string[] documents =
            //{
            //    "The sun in the sky is bright.",
            //    "We can see the shining sun, the bright sun."
            //};
            SqlConnection connection = new SqlConnection(conn);
            SqlCommand    command    = new SqlCommand("SELECT  top 100 [case_number] " +
                                                      " ,[description] from [dbo].[GCC_Support_Case] ", connection);
            SqlDataAdapter custAdapter   = new SqlDataAdapter();
            DataSet        customerEmail = new DataSet();

            command.CommandType = CommandType.Text;
            List <string> stList = new List <string>();

            byte[] byteData = new byte[0];
            try
            {
                if (connection.State != ConnectionState.Open)
                {
                    connection.Open();
                }
                custAdapter.SelectCommand = command;
                custAdapter.Fill(customerEmail, "tblTransaction");
                foreach (DataRow pRow in customerEmail.Tables["tblTransaction"].Rows)
                {
                    stList.Add(pRow["description"].ToString());
                }
            }
            catch (SqlException ex)
            { }
            finally
            {
                if (connection.State == ConnectionState.Open)
                {
                    connection.Close();
                }
            }
            documents = stList.ToArray();
            customerEmail.Clear();
            customerEmail.Dispose();
            // Apply TF*IDF to the documents and get the resulting vectors.
            //List<List<double>> inputs = TFIDF.Transform(documents, 0);
            TFIDF.Transform(documents, 0);
            //inputs = TFIDF.Normalize(inputs);

            // Display the output.
            //for (int index = 0; index < inputs.Length; index++)
            //{
            //    Console.WriteLine(documents[index]);

            //    foreach (double value in inputs[index])
            //    {
            //        Console.Write(value + ", ");
            //    }

            //    Console.WriteLine("\n");
            //}
            //string targetTable = "TFIDF";
            //SqlDataAdapter adapter = new SqlDataAdapter("SELECT top(0) * FROM " + targetTable, conn);
            //DataTable datatable = new DataTable();
            //adapter.Fill(datatable);
            //SqlBulkCopy SBC = new SqlBulkCopy(connection);
            //SBC.BulkCopyTimeout = 0;
            //SBC.DestinationTableName = "dbo." + targetTable;

            //List<object> colData = new List<object>();
            //connection.Open();
            //foreach (var value in TFIDF._vocabularyIDF.OrderByDescending(x => x.Value).ToList())
            //{
            //    colData.Clear();
            //    colData.Add(null);
            //    colData.Add(null);
            //    colData.Add(value.Key);
            //    colData.Add(value.Value);
            //    datatable.Rows.Add(colData.ToArray());

            //     Console.WriteLine(value.Key + "  :  " + value.Value + "\n");
            //        Console.Write(value + ", ");
            //}
            //SBC.WriteToServer(datatable);
            //connection.Close();
            Console.WriteLine("Press any key ..");
            Console.ReadKey();
        }