private NormalizedVector Normalize(String id, DocumentVector doc) { NormalizedVector vec = new NormalizedVector(id); // find max value int max = 0; foreach (KeyValuePair<string, int> term in doc) { if (term.Value > max) { max = term.Value; } } lengths.Add(id, 0); // add normalized frequencies foreach (KeyValuePair<string, int> term in doc) { double tf = term.Value / (double)max; vec.Add(term.Key, tf); lengths[id] += Math.Pow(tf, 2); } lengths[id] = Math.Sqrt(lengths[id]); return vec; }
private NormalizedVector Normalize(String id, DocumentVector doc) { NormalizedVector vec = new NormalizedVector(id); // find max value int max = 0; foreach (KeyValuePair <string, int> term in doc) { if (term.Value > max) { max = term.Value; } } lengths.Add(id, 0); // add normalized frequencies foreach (KeyValuePair <string, int> term in doc) { double tf = term.Value / (double)max; vec.Add(term.Key, tf); lengths[id] += Math.Pow(tf, 2); } lengths[id] = Math.Sqrt(lengths[id]); return(vec); }
public override void Compute() { NormalizedVectorCollection tf = (NormalizedVectorCollection)Workspace.Load("NormalizedVectors"); NormalizedVector idf = (NormalizedVector)Workspace.Load("InverseDocumentFrequencies"); NormalizedVectorCollection tfidf = Models.TFIDF.Compute(tf, idf); Workspace.Store("WeightedVectors", tfidf); }
public override void Compute() { int numDocs = (int)Workspace.Load("NumberOfDocuments"); DocumentVector df = (DocumentVector)Workspace.Load("DocumentFrequencies"); NormalizedVector idf = Models.InverseDocumentFrequency.Compute(df, numDocs); Workspace.Store("InverseDocumentFrequencies", idf); }
/* * Return the square root of the sum of the squares of the vector * sqrt(sum(v_i^2)) */ private double ComputeLength(NormalizedVector vec) { double val = 0; foreach (double term in vec.Values) { val += Math.Pow(term, 2); } return Math.Sqrt(val); }
public override void Compute() { DocumentVectorCollection queries = (DocumentVectorCollection)Workspace.Load("Queries"); NormalizedVector lengths = (NormalizedVector)Workspace.Load("DocumentLengths"); NormalizedVectorCollection docs = (NormalizedVectorCollection)Workspace.Load("Documents"); TLSimilarityMatrix sims = Models.CosineSimilarity.Compute(docs, lengths, queries); Workspace.Store("SimilarityMatrix", sims); }
/* * Return the square root of the sum of the squares of the vector * sqrt(sum(v_i^2)) */ private double ComputeLength(NormalizedVector vec) { double val = 0; foreach (double term in vec.Values) { val += Math.Pow(term, 2); } return(Math.Sqrt(val)); }
/* * Instead of looking at every term across all documents, * only look at the terms in the query, because all other terms * will be 0, resulting in q*d=0. * Typically the number of terms in a query is less than * the number of terms in a document. */ private static double ComputeProduct(DocumentVector query, NormalizedVector doc) { double val = 0; foreach (KeyValuePair<string, int> term in query) { double d; doc.TryGetValue(term.Key, out d); val += term.Value * d; } return val; }
public static NormalizedVector Compute(DocumentVector df, int numDocs) { NormalizedVector idf = new NormalizedVector("InverseDocumentFrequencies"); foreach (KeyValuePair <string, int> kvp in df) { idf.Add(kvp.Key, Math.Log(numDocs / (double)kvp.Value, 2)); } return(idf); }
public static NormalizedVector Compute(DocumentVector df, int numDocs) { NormalizedVector idf = new NormalizedVector("InverseDocumentFrequencies"); foreach (KeyValuePair<string, int> kvp in df) { idf.Add(kvp.Key, Math.Log(numDocs / (double) kvp.Value, 2)); } return idf; }
public Normalizer(DocumentVectorCollection documents) { lengths = new NormalizedVector("DocumentVectorLengths"); vectors = new NormalizedVectorCollection(); foreach (KeyValuePair<string, DocumentVector> kvp in documents) { String id = kvp.Key; NormalizedVector vec = Normalize(id, kvp.Value); vectors.Add(vec); } }
/* * Instead of looking at every term across all documents, * only look at the terms in the query, because all other terms * will be 0, resulting in q*d=0. * Typically the number of terms in a query is less than * the number of terms in a document. */ private static double ComputeProduct(DocumentVector query, NormalizedVector doc) { double val = 0; foreach (KeyValuePair <string, int> term in query) { double d; doc.TryGetValue(term.Key, out d); val += term.Value * d; } return(val); }
public Normalizer(DocumentVectorCollection documents) { lengths = new NormalizedVector("DocumentVectorLengths"); vectors = new NormalizedVectorCollection(); foreach (KeyValuePair <string, DocumentVector> kvp in documents) { String id = kvp.Key; NormalizedVector vec = Normalize(id, kvp.Value); vectors.Add(vec); } }
static void Main(string[] args) { Dictionary <string, string> config = new Dictionary <string, string>(); string relativeLocation = "..\\..\\..\\..\\"; config.Add("idPath", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoCorpusMapping.txt")); config.Add("docPath", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoCorpus.txt")); config.Add("qidPath", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoListOfFeatures.txt")); config.Add("qdocPath", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoQueries.txt")); config.Add("goldSetDir", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoFeaturesToGoldSetMethodsMapping")); config.Add("effAllPath", System.IO.Path.Combine(relativeLocation, "Experiment\\EffectivenessAllMethods.txt")); config.Add("effBestPath", System.IO.Path.Combine(relativeLocation, "Experiment\\EffectivenessBestMethods.txt")); Console.WriteLine("Running experiment..."); Console.WriteLine("Importing corpus..."); TLArtifactsCollection corpusArtifacts = Corpus.Import(config["idPath"], config["docPath"]); Console.WriteLine("Computing corpus vectors..."); Vectorizer corpusVectors = new Vectorizer(corpusArtifacts, "Ordinal"); Console.WriteLine("Computing corpus tf, df..."); Normalizer corpusTF = new Normalizer(corpusVectors.Vectors); Console.WriteLine("Computing corpus idf..."); NormalizedVector corpusIDF = InverseDocumentFrequency.Compute(corpusVectors.Frequencies, corpusVectors.Vectors.Count); Console.WriteLine("Computing corpus tf-idf..."); NormalizedVectorCollection corpusTFIDF = TFIDF.Compute(corpusTF.Vectors, corpusIDF); Console.WriteLine("Importing queries..."); TLArtifactsCollection queryArtifacts = Corpus.Import(config["qidPath"], config["qdocPath"]); Console.WriteLine("Computing corpus vectors..."); Vectorizer queryVectors = new Vectorizer(queryArtifacts, "Boolean"); Console.WriteLine("Computing similarities..."); TLSimilarityMatrix sims = CosineSimilarity.Compute(corpusTF.Vectors, corpusTF.Lengths, queryVectors.Vectors); Console.WriteLine("Importing gold set..."); TLSimilarityMatrix goldset = AnswerMapping.Import(config["goldSetDir"]); Console.WriteLine("Calculating effectiveness measures..."); Effectiveness.Export(queryArtifacts, sims, goldset, config["effAllPath"], config["effBestPath"]); Console.WriteLine("Effectiveness measures written to:\n\t" + config["effAllPath"] + "\n\t" + config["effBestPath"]); Console.WriteLine("Experiment complete."); Console.WriteLine("\nPress enter key to continue..."); Console.ReadLine(); }
public static NormalizedVectorCollection Compute(NormalizedVectorCollection tf, NormalizedVector idf) { NormalizedVectorCollection tfidf = new NormalizedVectorCollection(); foreach (KeyValuePair <string, NormalizedVector> doc in tf) { NormalizedVector vec = new NormalizedVector(doc.Key); foreach (KeyValuePair <string, double> term in doc.Value) { vec.Add(term.Key, term.Value * idf[term.Key]); } tfidf.Add(vec); } return(tfidf); }
public static NormalizedVectorCollection Compute(NormalizedVectorCollection tf, NormalizedVector idf) { NormalizedVectorCollection tfidf = new NormalizedVectorCollection(); foreach (KeyValuePair<string, NormalizedVector> doc in tf) { NormalizedVector vec = new NormalizedVector(doc.Key); foreach (KeyValuePair<string, double> term in doc.Value) { vec.Add(term.Key, term.Value * idf[term.Key]); } tfidf.Add(vec); } return tfidf; }
public static TLSimilarityMatrix Compute(NormalizedVectorCollection docs, NormalizedVector lengths, DocumentVectorCollection queries) { TLSimilarityMatrix sims = new TLSimilarityMatrix(); foreach (KeyValuePair <string, DocumentVector> QueryKVP in queries) { /* * Since tf in queries are all 1, * we can assume this term is the sqrt of the size of the dictionary */ double qVal = Math.Sqrt(QueryKVP.Value.Count); foreach (KeyValuePair <string, NormalizedVector> DocKVP in docs) { double dVal = lengths[DocKVP.Key]; double qdVec = ComputeProduct(QueryKVP.Value, DocKVP.Value); sims.AddLink(QueryKVP.Key, DocKVP.Key, qdVec / (qVal * dVal)); } } return(sims); }
public static TLSimilarityMatrix Compute(NormalizedVectorCollection docs, NormalizedVector lengths, DocumentVectorCollection queries) { TLSimilarityMatrix sims = new TLSimilarityMatrix(); foreach (KeyValuePair<string, DocumentVector> QueryKVP in queries) { /* * Since tf in queries are all 1, * we can assume this term is the sqrt of the size of the dictionary */ double qVal = Math.Sqrt(QueryKVP.Value.Count); foreach (KeyValuePair<string, NormalizedVector> DocKVP in docs) { double dVal = lengths[DocKVP.Key]; double qdVec = ComputeProduct(QueryKVP.Value, DocKVP.Value); sims.AddLink(QueryKVP.Key, DocKVP.Key, qdVec / (qVal * dVal)); } } return sims; }
private void Dash(NormalizedVector direction, float force) { rb.AddForce(direction.Value * force); framesToAvailableDash = dashEndLag; }