Exemplo n.º 1
0
        public static List <double> VectoriseStandardInput(string standardInput, string featureFile)
        {
            var featuresIdf = Bow_tfidf.GetFeaturesIdf(featureFile);
            var result      = VectoriseInput(standardInput, featuresIdf);

            return(result);
        }
Exemplo n.º 2
0
        public static void GenerateTFIDFMatrix(string inputFile, string processedFile, string featureFile, string outputFile)
        {
            // Pre-processing text
            var           vectors      = new List <Vector>();
            bool          hasValueType = true;
            List <string> input        = FileIO.ReadFileIntoVector(inputFile, out vectors, hasValueType);
            var           output       = StringHelper.ReproduceText(vectors, ConfigurationManager.AppSettings.Get("StopWordFile"));

            FileIO.WriteFile(output, processedFile);

            // Extract features
            Bow_tfidf.CreateFeatureList(processedFile, featureFile);

            // Calculate tf_idf
            //var watch = System.Diagnostics.Stopwatch.StartNew();
            Bow_tfidf.BoW_tfidf(processedFile, outputFile, featureFile, ConfigurationManager.AppSettings.Get("RoundFile"));
            //watch.Stop();
            //var elapsedMs = watch.ElapsedMilliseconds;
            //Console.WriteLine(elapsedMs);
        }
Exemplo n.º 3
0
        static void Main(string[] args)
        {
            String        inputFile         = "../../raw_text.txt";
            string        processedTextFile = "../../processedFile.txt";
            string        featureFile       = "../../featureList.txt";
            String        roundFile         = "../../round.txt";
            String        outputFile        = "../../output.txt";
            String        stopWordFile      = "../../stop-words.txt";
            String        searchFile        = "../../search.txt";
            String        similarDocFile    = "../../similarDocuments.txt";
            List <String> list = new List <string>();

            // FileIO file = new FileIO();

            // Pre-processing text
            var           vectors      = new List <Vector>();
            bool          hasValueType = true;
            List <string> input        = FileIO.ReadFileIntoVector(inputFile, out vectors, hasValueType);
            var           output       = StringHelper.ReproduceText(vectors, stopWordFile);

            FileIO.WriteFile(output, processedTextFile);

            // Extract features
            Bow_tfidf.CreateFeatureList(processedTextFile, featureFile);

            // Calculate tf_idf
            Bow_tfidf.BoW_tfidf(processedTextFile, outputFile, featureFile, roundFile);
            //var result = Bow_tfidf.tf_idf(vectors, featureFile);

            //  Search similar documents
            //var similarDocs = Vector.Search(searchFile);

            //// Wrire similar documents to file
            //using (StreamWriter wr = new StreamWriter(similarDocFile))
            //{
            //    foreach (var doc in similarDocs)
            //    {
            //        wr.WriteLine(doc.Key + ' ' + doc.Value);
            //    }
            //}
        }