Ejemplo n.º 1
0
        static void Main(string[] args)
        {
            StopWordsHandler stopword = new StopWordsHandler();

            string[] doc = new string[4] {
                "test b c c dd d d d d",
                "a b c c c d dd d d test"
                , "c d b f y teyr etre tretr gfgd c",
                "r a e e f n l i f f f f x l"
            };

            TFIDFMeasure tf = new TFIDFMeasure(doc);

            Trace.WriteLine((double)Math.Log(10000 / 50));
            Trace.WriteLine(tf.GetSimilarity(0, 1));
            string[] _3grams = NGram.GenerateNGrams("TEXT", 3);
            //
            // TODO: Add code to start application here
            //
        }
Ejemplo n.º 2
0
        public string[] Partition(string input)
        {
            Regex r = new Regex("([ \\t{}():;. \n])");

            input = input.ToLower();

            String [] tokens = r.Split(input);

            ArrayList filter = new ArrayList();

            for (int i = 0; i < tokens.Length; i++)
            {
                MatchCollection mc = r.Matches(tokens[i]);
                if (mc.Count <= 0 && tokens[i].Trim().Length > 0 &&
                    !StopWordsHandler.IsStopword(tokens[i]))
                {
                    filter.Add(tokens[i]);
                }
            }

            return(ArrayListToArray(filter));
        }