Пример #1
0
        private NormalizedVector Normalize(String id, DocumentVector doc)
        {
            NormalizedVector vec = new NormalizedVector(id);

            // find max value
            int max = 0;
            foreach (KeyValuePair<string, int> term in doc)
            {
                if (term.Value > max)
                {
                    max = term.Value;
                }
            }

            lengths.Add(id, 0);

            // add normalized frequencies
            foreach (KeyValuePair<string, int> term in doc)
            {
                double tf = term.Value / (double)max;
                vec.Add(term.Key, tf);
                lengths[id] += Math.Pow(tf, 2);
            }

            lengths[id] = Math.Sqrt(lengths[id]);

            return vec;
        }
Пример #2
0
        private NormalizedVector Normalize(String id, DocumentVector doc)
        {
            NormalizedVector vec = new NormalizedVector(id);

            // find max value
            int max = 0;

            foreach (KeyValuePair <string, int> term in doc)
            {
                if (term.Value > max)
                {
                    max = term.Value;
                }
            }

            lengths.Add(id, 0);

            // add normalized frequencies
            foreach (KeyValuePair <string, int> term in doc)
            {
                double tf = term.Value / (double)max;
                vec.Add(term.Key, tf);
                lengths[id] += Math.Pow(tf, 2);
            }

            lengths[id] = Math.Sqrt(lengths[id]);

            return(vec);
        }
Пример #3
0
        public override void Compute()
        {
            NormalizedVectorCollection tf    = (NormalizedVectorCollection)Workspace.Load("NormalizedVectors");
            NormalizedVector           idf   = (NormalizedVector)Workspace.Load("InverseDocumentFrequencies");
            NormalizedVectorCollection tfidf = Models.TFIDF.Compute(tf, idf);

            Workspace.Store("WeightedVectors", tfidf);
        }
        public override void Compute()
        {
            int              numDocs = (int)Workspace.Load("NumberOfDocuments");
            DocumentVector   df      = (DocumentVector)Workspace.Load("DocumentFrequencies");
            NormalizedVector idf     = Models.InverseDocumentFrequency.Compute(df, numDocs);

            Workspace.Store("InverseDocumentFrequencies", idf);
        }
Пример #5
0
 /*
  * Return the square root of the sum of the squares of the vector
  * sqrt(sum(v_i^2))
  */
 private double ComputeLength(NormalizedVector vec)
 {
     double val = 0;
     foreach (double term in vec.Values)
     {
         val += Math.Pow(term, 2);
     }
     return Math.Sqrt(val);
 }
Пример #6
0
        public override void Compute()
        {
            DocumentVectorCollection   queries = (DocumentVectorCollection)Workspace.Load("Queries");
            NormalizedVector           lengths = (NormalizedVector)Workspace.Load("DocumentLengths");
            NormalizedVectorCollection docs    = (NormalizedVectorCollection)Workspace.Load("Documents");
            TLSimilarityMatrix         sims    = Models.CosineSimilarity.Compute(docs, lengths, queries);

            Workspace.Store("SimilarityMatrix", sims);
        }
Пример #7
0
        /*
         * Return the square root of the sum of the squares of the vector
         * sqrt(sum(v_i^2))
         */
        private double ComputeLength(NormalizedVector vec)
        {
            double val = 0;

            foreach (double term in vec.Values)
            {
                val += Math.Pow(term, 2);
            }
            return(Math.Sqrt(val));
        }
 /*
  * Instead of looking at every term across all documents,
  * only look at the terms in the query, because all other terms
  * will be 0, resulting in q*d=0.
  * Typically the number of terms in a query is less than
  * the number of terms in a document.
  */
 private static double ComputeProduct(DocumentVector query, NormalizedVector doc)
 {
     double val = 0;
     foreach (KeyValuePair<string, int> term in query)
     {
         double d;
         doc.TryGetValue(term.Key, out d);
         val += term.Value * d;
     }
     return val;
 }
Пример #9
0
        public static NormalizedVector Compute(DocumentVector df, int numDocs)
        {
            NormalizedVector idf = new NormalizedVector("InverseDocumentFrequencies");

            foreach (KeyValuePair <string, int> kvp in df)
            {
                idf.Add(kvp.Key, Math.Log(numDocs / (double)kvp.Value, 2));
            }

            return(idf);
        }
        public static NormalizedVector Compute(DocumentVector df, int numDocs)
        {
            NormalizedVector idf = new NormalizedVector("InverseDocumentFrequencies");

            foreach (KeyValuePair<string, int> kvp in df)
            {
                idf.Add(kvp.Key, Math.Log(numDocs / (double) kvp.Value, 2));
            }

            return idf;
        }
Пример #11
0
        public Normalizer(DocumentVectorCollection documents)
        {
            lengths = new NormalizedVector("DocumentVectorLengths");
            vectors = new NormalizedVectorCollection();

            foreach (KeyValuePair<string, DocumentVector> kvp in documents)
            {
                String id = kvp.Key;
                NormalizedVector vec = Normalize(id, kvp.Value);
                vectors.Add(vec);
            }
        }
Пример #12
0
        /*
         * Instead of looking at every term across all documents,
         * only look at the terms in the query, because all other terms
         * will be 0, resulting in q*d=0.
         * Typically the number of terms in a query is less than
         * the number of terms in a document.
         */
        private static double ComputeProduct(DocumentVector query, NormalizedVector doc)
        {
            double val = 0;

            foreach (KeyValuePair <string, int> term in query)
            {
                double d;
                doc.TryGetValue(term.Key, out d);
                val += term.Value * d;
            }
            return(val);
        }
Пример #13
0
        public Normalizer(DocumentVectorCollection documents)
        {
            lengths = new NormalizedVector("DocumentVectorLengths");
            vectors = new NormalizedVectorCollection();

            foreach (KeyValuePair <string, DocumentVector> kvp in documents)
            {
                String           id  = kvp.Key;
                NormalizedVector vec = Normalize(id, kvp.Value);
                vectors.Add(vec);
            }
        }
Пример #14
0
        static void Main(string[] args)
        {
            Dictionary <string, string> config = new Dictionary <string, string>();
            string relativeLocation            = "..\\..\\..\\..\\";

            config.Add("idPath", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoCorpusMapping.txt"));
            config.Add("docPath", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoCorpus.txt"));
            config.Add("qidPath", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoListOfFeatures.txt"));
            config.Add("qdocPath", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoQueries.txt"));
            config.Add("goldSetDir", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoFeaturesToGoldSetMethodsMapping"));
            config.Add("effAllPath", System.IO.Path.Combine(relativeLocation, "Experiment\\EffectivenessAllMethods.txt"));
            config.Add("effBestPath", System.IO.Path.Combine(relativeLocation, "Experiment\\EffectivenessBestMethods.txt"));

            Console.WriteLine("Running experiment...");
            Console.WriteLine("Importing corpus...");
            TLArtifactsCollection corpusArtifacts = Corpus.Import(config["idPath"], config["docPath"]);

            Console.WriteLine("Computing corpus vectors...");
            Vectorizer corpusVectors = new Vectorizer(corpusArtifacts, "Ordinal");

            Console.WriteLine("Computing corpus tf, df...");
            Normalizer corpusTF = new Normalizer(corpusVectors.Vectors);

            Console.WriteLine("Computing corpus idf...");
            NormalizedVector corpusIDF = InverseDocumentFrequency.Compute(corpusVectors.Frequencies, corpusVectors.Vectors.Count);

            Console.WriteLine("Computing corpus tf-idf...");
            NormalizedVectorCollection corpusTFIDF = TFIDF.Compute(corpusTF.Vectors, corpusIDF);

            Console.WriteLine("Importing queries...");
            TLArtifactsCollection queryArtifacts = Corpus.Import(config["qidPath"], config["qdocPath"]);

            Console.WriteLine("Computing corpus vectors...");
            Vectorizer queryVectors = new Vectorizer(queryArtifacts, "Boolean");

            Console.WriteLine("Computing similarities...");
            TLSimilarityMatrix sims = CosineSimilarity.Compute(corpusTF.Vectors, corpusTF.Lengths, queryVectors.Vectors);

            Console.WriteLine("Importing gold set...");
            TLSimilarityMatrix goldset = AnswerMapping.Import(config["goldSetDir"]);

            Console.WriteLine("Calculating effectiveness measures...");
            Effectiveness.Export(queryArtifacts, sims, goldset, config["effAllPath"], config["effBestPath"]);
            Console.WriteLine("Effectiveness measures written to:\n\t" + config["effAllPath"] + "\n\t" + config["effBestPath"]);
            Console.WriteLine("Experiment complete.");

            Console.WriteLine("\nPress enter key to continue...");
            Console.ReadLine();
        }
Пример #15
0
        public static NormalizedVectorCollection Compute(NormalizedVectorCollection tf, NormalizedVector idf)
        {
            NormalizedVectorCollection tfidf = new NormalizedVectorCollection();

            foreach (KeyValuePair <string, NormalizedVector> doc in tf)
            {
                NormalizedVector vec = new NormalizedVector(doc.Key);
                foreach (KeyValuePair <string, double> term in doc.Value)
                {
                    vec.Add(term.Key, term.Value * idf[term.Key]);
                }
                tfidf.Add(vec);
            }

            return(tfidf);
        }
Пример #16
0
        public static NormalizedVectorCollection Compute(NormalizedVectorCollection tf, NormalizedVector idf)
        {
            NormalizedVectorCollection tfidf = new NormalizedVectorCollection();

            foreach (KeyValuePair<string, NormalizedVector> doc in tf)
            {
                NormalizedVector vec = new NormalizedVector(doc.Key);
                foreach (KeyValuePair<string, double> term in doc.Value)
                {
                    vec.Add(term.Key, term.Value * idf[term.Key]);
                }
                tfidf.Add(vec);
            }

            return tfidf;
        }
Пример #17
0
        public static TLSimilarityMatrix Compute(NormalizedVectorCollection docs, NormalizedVector lengths, DocumentVectorCollection queries)
        {
            TLSimilarityMatrix sims = new TLSimilarityMatrix();

            foreach (KeyValuePair <string, DocumentVector> QueryKVP in queries)
            {
                /*
                 * Since tf in queries are all 1,
                 * we can assume this term is the sqrt of the size of the dictionary
                 */
                double qVal = Math.Sqrt(QueryKVP.Value.Count);
                foreach (KeyValuePair <string, NormalizedVector> DocKVP in docs)
                {
                    double dVal  = lengths[DocKVP.Key];
                    double qdVec = ComputeProduct(QueryKVP.Value, DocKVP.Value);
                    sims.AddLink(QueryKVP.Key, DocKVP.Key, qdVec / (qVal * dVal));
                }
            }

            return(sims);
        }
        public static TLSimilarityMatrix Compute(NormalizedVectorCollection docs, NormalizedVector lengths, DocumentVectorCollection queries)
        {
            TLSimilarityMatrix sims = new TLSimilarityMatrix();

            foreach (KeyValuePair<string, DocumentVector> QueryKVP in queries)
            {
                /*
                 * Since tf in queries are all 1,
                 * we can assume this term is the sqrt of the size of the dictionary
                 */
                double qVal = Math.Sqrt(QueryKVP.Value.Count);
                foreach (KeyValuePair<string, NormalizedVector> DocKVP in docs)
                {
                    double dVal = lengths[DocKVP.Key];
                    double qdVec = ComputeProduct(QueryKVP.Value, DocKVP.Value);
                    sims.AddLink(QueryKVP.Key, DocKVP.Key, qdVec / (qVal * dVal));
                }
            }

            return sims;
        }
Пример #19
0
 private void Dash(NormalizedVector direction, float force)
 {
     rb.AddForce(direction.Value * force);
     framesToAvailableDash = dashEndLag;
 }