public override void Compute() { NormalizedVectorCollection tf = (NormalizedVectorCollection)Workspace.Load("NormalizedVectors"); NormalizedVector idf = (NormalizedVector)Workspace.Load("InverseDocumentFrequencies"); NormalizedVectorCollection tfidf = Models.TFIDF.Compute(tf, idf); Workspace.Store("WeightedVectors", tfidf); }
public override void Compute() { DocumentVectorCollection queries = (DocumentVectorCollection)Workspace.Load("Queries"); NormalizedVector lengths = (NormalizedVector)Workspace.Load("DocumentLengths"); NormalizedVectorCollection docs = (NormalizedVectorCollection)Workspace.Load("Documents"); TLSimilarityMatrix sims = Models.CosineSimilarity.Compute(docs, lengths, queries); Workspace.Store("SimilarityMatrix", sims); }
public Normalizer(DocumentVectorCollection documents) { lengths = new NormalizedVector("DocumentVectorLengths"); vectors = new NormalizedVectorCollection(); foreach (KeyValuePair <string, DocumentVector> kvp in documents) { String id = kvp.Key; NormalizedVector vec = Normalize(id, kvp.Value); vectors.Add(vec); } }
public Normalizer(DocumentVectorCollection documents) { lengths = new NormalizedVector("DocumentVectorLengths"); vectors = new NormalizedVectorCollection(); foreach (KeyValuePair<string, DocumentVector> kvp in documents) { String id = kvp.Key; NormalizedVector vec = Normalize(id, kvp.Value); vectors.Add(vec); } }
static void Main(string[] args) { Dictionary <string, string> config = new Dictionary <string, string>(); string relativeLocation = "..\\..\\..\\..\\"; config.Add("idPath", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoCorpusMapping.txt")); config.Add("docPath", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoCorpus.txt")); config.Add("qidPath", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoListOfFeatures.txt")); config.Add("qdocPath", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoQueries.txt")); config.Add("goldSetDir", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoFeaturesToGoldSetMethodsMapping")); config.Add("effAllPath", System.IO.Path.Combine(relativeLocation, "Experiment\\EffectivenessAllMethods.txt")); config.Add("effBestPath", System.IO.Path.Combine(relativeLocation, "Experiment\\EffectivenessBestMethods.txt")); Console.WriteLine("Running experiment..."); Console.WriteLine("Importing corpus..."); TLArtifactsCollection corpusArtifacts = Corpus.Import(config["idPath"], config["docPath"]); Console.WriteLine("Computing corpus vectors..."); Vectorizer corpusVectors = new Vectorizer(corpusArtifacts, "Ordinal"); Console.WriteLine("Computing corpus tf, df..."); Normalizer corpusTF = new Normalizer(corpusVectors.Vectors); Console.WriteLine("Computing corpus idf..."); NormalizedVector corpusIDF = InverseDocumentFrequency.Compute(corpusVectors.Frequencies, corpusVectors.Vectors.Count); Console.WriteLine("Computing corpus tf-idf..."); NormalizedVectorCollection corpusTFIDF = TFIDF.Compute(corpusTF.Vectors, corpusIDF); Console.WriteLine("Importing queries..."); TLArtifactsCollection queryArtifacts = Corpus.Import(config["qidPath"], config["qdocPath"]); Console.WriteLine("Computing corpus vectors..."); Vectorizer queryVectors = new Vectorizer(queryArtifacts, "Boolean"); Console.WriteLine("Computing similarities..."); TLSimilarityMatrix sims = CosineSimilarity.Compute(corpusTF.Vectors, corpusTF.Lengths, queryVectors.Vectors); Console.WriteLine("Importing gold set..."); TLSimilarityMatrix goldset = AnswerMapping.Import(config["goldSetDir"]); Console.WriteLine("Calculating effectiveness measures..."); Effectiveness.Export(queryArtifacts, sims, goldset, config["effAllPath"], config["effBestPath"]); Console.WriteLine("Effectiveness measures written to:\n\t" + config["effAllPath"] + "\n\t" + config["effBestPath"]); Console.WriteLine("Experiment complete."); Console.WriteLine("\nPress enter key to continue..."); Console.ReadLine(); }
public static NormalizedVectorCollection Compute(NormalizedVectorCollection tf, NormalizedVector idf) { NormalizedVectorCollection tfidf = new NormalizedVectorCollection(); foreach (KeyValuePair <string, NormalizedVector> doc in tf) { NormalizedVector vec = new NormalizedVector(doc.Key); foreach (KeyValuePair <string, double> term in doc.Value) { vec.Add(term.Key, term.Value * idf[term.Key]); } tfidf.Add(vec); } return(tfidf); }
public static NormalizedVectorCollection Compute(NormalizedVectorCollection tf, NormalizedVector idf) { NormalizedVectorCollection tfidf = new NormalizedVectorCollection(); foreach (KeyValuePair<string, NormalizedVector> doc in tf) { NormalizedVector vec = new NormalizedVector(doc.Key); foreach (KeyValuePair<string, double> term in doc.Value) { vec.Add(term.Key, term.Value * idf[term.Key]); } tfidf.Add(vec); } return tfidf; }
public static TLSimilarityMatrix Compute(NormalizedVectorCollection docs, NormalizedVector lengths, DocumentVectorCollection queries) { TLSimilarityMatrix sims = new TLSimilarityMatrix(); foreach (KeyValuePair <string, DocumentVector> QueryKVP in queries) { /* * Since tf in queries are all 1, * we can assume this term is the sqrt of the size of the dictionary */ double qVal = Math.Sqrt(QueryKVP.Value.Count); foreach (KeyValuePair <string, NormalizedVector> DocKVP in docs) { double dVal = lengths[DocKVP.Key]; double qdVec = ComputeProduct(QueryKVP.Value, DocKVP.Value); sims.AddLink(QueryKVP.Key, DocKVP.Key, qdVec / (qVal * dVal)); } } return(sims); }
public static TLSimilarityMatrix Compute(NormalizedVectorCollection docs, NormalizedVector lengths, DocumentVectorCollection queries) { TLSimilarityMatrix sims = new TLSimilarityMatrix(); foreach (KeyValuePair<string, DocumentVector> QueryKVP in queries) { /* * Since tf in queries are all 1, * we can assume this term is the sqrt of the size of the dictionary */ double qVal = Math.Sqrt(QueryKVP.Value.Count); foreach (KeyValuePair<string, NormalizedVector> DocKVP in docs) { double dVal = lengths[DocKVP.Key]; double qdVec = ComputeProduct(QueryKVP.Value, DocKVP.Value); sims.AddLink(QueryKVP.Key, DocKVP.Key, qdVec / (qVal * dVal)); } } return sims; }