예제 #1
0
 public static double LinkFrequency(string data)
 {
     string[] prefixes =
     {
         "http://", "https", "www."
     };
     return(Convert.ToDouble(PrefixFrequency(prefixes, data)) / ContentExtraction.Words(data).Length);
 }
예제 #2
0
 public static double AttributionFrequency(string data)
 {
     string[] tags =
     {
         "[1]", "[2]", "[3]", "[4]", "[5]", "[6]", "[7]", "[8]", "[9]"
     };
     return(Convert.ToDouble(SuffixFrequency(tags, data)) /
            ContentExtraction.Words(data).Length);
 }
예제 #3
0
 public static double ConjunctionFrequency(string data)
 {
     string[] conjunctions =
     {
         "and",      "but",    "after", "when", "as", "because", "if",    "what",  "where",
         "which",    "how",    "than",  "or",   "so", "before",  "since", "while",
         "although", "though", "who",   "whose"
     };
     return(Convert.ToDouble(Frequency(conjunctions, data)) / ContentExtraction.Words(data).Length);
 }
예제 #4
0
 public static double DeterminerFrequency(string data)
 {
     string[] determiners =
     {
         "the",     "some",   "this",    "that", "every", "all",    "both", "one",
         "first",   "other",  "next",    "many", "much",  "more",   "most",
         "several", "no",     "a",       "an",   "any",   "each",   "half", "twice",
         "two",     "second", "another", "last", "few",   "little", "less",
         "least",   "own"
     };
     return(Convert.ToDouble(Frequency(determiners, data)) / ContentExtraction.Words(data).Length);
 }
예제 #5
0
 public static double PrepositionFrequency(string data)
 {
     string[] prepositions =
     {
         "about", "across", "against", "along",   "around",
         "at",    "behind", "beside",  "besides", "by",    "despite",
         "down",  "during", "for",     "from",    "in",    "inside", "into",
         "near",  "of",     "off",     "on",      "onto",  "over",   "through",
         "to",    "toward", "with",    "within",  "without"
     };
     return(Convert.ToDouble(Frequency(prepositions, data)) / ContentExtraction.Words(data).Length);
 }
예제 #6
0
 public static double PronounFrequency(string data)
 {
     string[] pronouns =
     {
         "I",          "you",       "he",         "me",        "her",     "him",      "my",    "mine",   "her",
         "hers",       "his",       "myself",     "himself",   "herself", "anything",
         "everything", "anyone",    "everyone",   "ones",      "such",    "it",
         "we",         "they",      "us",         "them",      "our",     "ours",     "their", "theirs",
         "itself",     "ourselves", "themselves", "something", "nothing",
         "someone"
     };
     return(Convert.ToDouble(Frequency(pronouns, data)) / ContentExtraction.Words(data).Length);
 }
예제 #7
0
        private static int Frequency(string[] words, string data)
        {
            int count = 0;

            string[] corpus_words = ContentExtraction.Words(data);

            foreach (string word in corpus_words)
            {
                if (words.Contains(word))
                {
                    count++;
                }
            }

            return(count);
        }
예제 #8
0
        public static double WordsOfLength(int min, int max, string data)
        {
            string[] words = ContentExtraction.Words(data);

            int count = 0;

            foreach (string word in words)
            {
                if (word.Length >= min && word.Length <= max)
                {
                    count++;
                }
            }

            return((double)count / ContentExtraction.Words(data).Length);
        }
예제 #9
0
        public static double WordsPerSentence(string data)
        {
            string[] sentences = ContentExtraction.Sentences(data);

            if (sentences.Length == 0)
            {
                return(0);
            }

            int word_count = 0;

            foreach (string sentence in sentences)
            {
                word_count += ContentExtraction.Words(sentence).Length;
            }

            return(Convert.ToDouble(word_count) / sentences.Length);
        }
예제 #10
0
        public static double CharactersPerWord(string data)
        {
            string[] words = ContentExtraction.Words(data);

            if (words.Length == 0)
            {
                return(0);
            }

            double word_length = 0;

            foreach (string word in words)
            {
                word_length += word.Length;
            }

            return(word_length / words.Length);
        }
예제 #11
0
        private static int SuffixFrequency(string[] suffixes, string data)
        {
            int count = 0;

            string[] corpus_words = ContentExtraction.Words(data);

            foreach (string word in corpus_words)
            {
                foreach (string prefix in suffixes)
                {
                    if (word.Length - prefix.Length < 0)
                    {
                        continue;
                    }

                    if (word.Substring(word.Length - prefix.Length) == prefix)
                    {
                        count++;
                    }
                }
            }

            return(count);
        }
예제 #12
0
 public static double WordCount(string data)
 {
     return(ContentExtraction.Words(data).Length);
 }