private NormalizedVector Normalize(String id, DocumentVector doc) { NormalizedVector vec = new NormalizedVector(id); // find max value int max = 0; foreach (KeyValuePair <string, int> term in doc) { if (term.Value > max) { max = term.Value; } } lengths.Add(id, 0); // add normalized frequencies foreach (KeyValuePair <string, int> term in doc) { double tf = term.Value / (double)max; vec.Add(term.Key, tf); lengths[id] += Math.Pow(tf, 2); } lengths[id] = Math.Sqrt(lengths[id]); return(vec); }
private NormalizedVector Normalize(String id, DocumentVector doc) { NormalizedVector vec = new NormalizedVector(id); // find max value int max = 0; foreach (KeyValuePair<string, int> term in doc) { if (term.Value > max) { max = term.Value; } } lengths.Add(id, 0); // add normalized frequencies foreach (KeyValuePair<string, int> term in doc) { double tf = term.Value / (double)max; vec.Add(term.Key, tf); lengths[id] += Math.Pow(tf, 2); } lengths[id] = Math.Sqrt(lengths[id]); return vec; }
public static NormalizedVector Compute(DocumentVector df, int numDocs) { NormalizedVector idf = new NormalizedVector("InverseDocumentFrequencies"); foreach (KeyValuePair <string, int> kvp in df) { idf.Add(kvp.Key, Math.Log(numDocs / (double)kvp.Value, 2)); } return(idf); }
public static NormalizedVector Compute(DocumentVector df, int numDocs) { NormalizedVector idf = new NormalizedVector("InverseDocumentFrequencies"); foreach (KeyValuePair<string, int> kvp in df) { idf.Add(kvp.Key, Math.Log(numDocs / (double) kvp.Value, 2)); } return idf; }
public static NormalizedVectorCollection Compute(NormalizedVectorCollection tf, NormalizedVector idf) { NormalizedVectorCollection tfidf = new NormalizedVectorCollection(); foreach (KeyValuePair <string, NormalizedVector> doc in tf) { NormalizedVector vec = new NormalizedVector(doc.Key); foreach (KeyValuePair <string, double> term in doc.Value) { vec.Add(term.Key, term.Value * idf[term.Key]); } tfidf.Add(vec); } return(tfidf); }
public static NormalizedVectorCollection Compute(NormalizedVectorCollection tf, NormalizedVector idf) { NormalizedVectorCollection tfidf = new NormalizedVectorCollection(); foreach (KeyValuePair<string, NormalizedVector> doc in tf) { NormalizedVector vec = new NormalizedVector(doc.Key); foreach (KeyValuePair<string, double> term in doc.Value) { vec.Add(term.Key, term.Value * idf[term.Key]); } tfidf.Add(vec); } return tfidf; }