static void Main(string[] args) { StopWordsHandler stopword = new StopWordsHandler(); string[] doc = new string[4] { "test b c c dd d d d d", "a b c c c d dd d d test" , "c d b f y teyr etre tretr gfgd c", "r a e e f n l i f f f f x l" }; TFIDFMeasure tf = new TFIDFMeasure(doc); Trace.WriteLine((double)Math.Log(10000 / 50)); Trace.WriteLine(tf.GetSimilarity(0, 1)); string[] _3grams = NGram.GenerateNGrams("TEXT", 3); // // TODO: Add code to start application here // }
public string[] Partition(string input) { Regex r = new Regex("([ \\t{}():;. \n])"); input = input.ToLower(); String [] tokens = r.Split(input); ArrayList filter = new ArrayList(); for (int i = 0; i < tokens.Length; i++) { MatchCollection mc = r.Matches(tokens[i]); if (mc.Count <= 0 && tokens[i].Trim().Length > 0 && !StopWordsHandler.IsStopword(tokens[i])) { filter.Add(tokens[i]); } } return(ArrayListToArray(filter)); }