Пример #1
0
        private NormalizedVector Normalize(String id, DocumentVector doc)
        {
            NormalizedVector vec = new NormalizedVector(id);

            // find max value
            int max = 0;

            foreach (KeyValuePair <string, int> term in doc)
            {
                if (term.Value > max)
                {
                    max = term.Value;
                }
            }

            lengths.Add(id, 0);

            // add normalized frequencies
            foreach (KeyValuePair <string, int> term in doc)
            {
                double tf = term.Value / (double)max;
                vec.Add(term.Key, tf);
                lengths[id] += Math.Pow(tf, 2);
            }

            lengths[id] = Math.Sqrt(lengths[id]);

            return(vec);
        }
Пример #2
0
        private NormalizedVector Normalize(String id, DocumentVector doc)
        {
            NormalizedVector vec = new NormalizedVector(id);

            // find max value
            int max = 0;
            foreach (KeyValuePair<string, int> term in doc)
            {
                if (term.Value > max)
                {
                    max = term.Value;
                }
            }

            lengths.Add(id, 0);

            // add normalized frequencies
            foreach (KeyValuePair<string, int> term in doc)
            {
                double tf = term.Value / (double)max;
                vec.Add(term.Key, tf);
                lengths[id] += Math.Pow(tf, 2);
            }

            lengths[id] = Math.Sqrt(lengths[id]);

            return vec;
        }
Пример #3
0
        public static NormalizedVector Compute(DocumentVector df, int numDocs)
        {
            NormalizedVector idf = new NormalizedVector("InverseDocumentFrequencies");

            foreach (KeyValuePair <string, int> kvp in df)
            {
                idf.Add(kvp.Key, Math.Log(numDocs / (double)kvp.Value, 2));
            }

            return(idf);
        }
        public static NormalizedVector Compute(DocumentVector df, int numDocs)
        {
            NormalizedVector idf = new NormalizedVector("InverseDocumentFrequencies");

            foreach (KeyValuePair<string, int> kvp in df)
            {
                idf.Add(kvp.Key, Math.Log(numDocs / (double) kvp.Value, 2));
            }

            return idf;
        }
Пример #5
0
        public static NormalizedVectorCollection Compute(NormalizedVectorCollection tf, NormalizedVector idf)
        {
            NormalizedVectorCollection tfidf = new NormalizedVectorCollection();

            foreach (KeyValuePair <string, NormalizedVector> doc in tf)
            {
                NormalizedVector vec = new NormalizedVector(doc.Key);
                foreach (KeyValuePair <string, double> term in doc.Value)
                {
                    vec.Add(term.Key, term.Value * idf[term.Key]);
                }
                tfidf.Add(vec);
            }

            return(tfidf);
        }
Пример #6
0
        public static NormalizedVectorCollection Compute(NormalizedVectorCollection tf, NormalizedVector idf)
        {
            NormalizedVectorCollection tfidf = new NormalizedVectorCollection();

            foreach (KeyValuePair<string, NormalizedVector> doc in tf)
            {
                NormalizedVector vec = new NormalizedVector(doc.Key);
                foreach (KeyValuePair<string, double> term in doc.Value)
                {
                    vec.Add(term.Key, term.Value * idf[term.Key]);
                }
                tfidf.Add(vec);
            }

            return tfidf;
        }