/// <summary> /// Perform a search using the given query. /// </summary> public List<SearchResultItem> executeQuery(Query query, WeightingScheme scheme) { // Find the documents that contain terms that exists in the query. var results = new Dictionary<int,float>(); foreach (var term in query.terms) { float idf = scheme.computeIDF((float)documents.getDocuments().Count, (float)term.term.getOccurences().Count); foreach (var occurence in term.term.getOccurences()) { float dl = (float)documents.documentById(occurence.documentId).length; float termSimilarity = scheme.computeTermSimilarity((float)occurence.frequency, idf, (float)term.frequency, dl, documents.averageDocumentLength); float similarity; if (results.TryGetValue(occurence.documentId, out similarity)) { results[occurence.documentId] = similarity + termSimilarity; } else { results[occurence.documentId] = termSimilarity; } } } // Create an ordered list of resulting documents and their similarity scores. var resultList = new List<SearchResultItem>(); foreach (var i in results) { resultList.Add(new SearchResultItem(i.Key, i.Value)); } resultList.Sort((x, y) => -x.similarity.CompareTo(y.similarity)); return resultList; }
public Vec SingleSentence2Vec(string sentence, WeightingScheme weightingScheme = WeightingScheme.AVG) { Vec sentenceVector = new Vec(); List <Vec> sentenceVectorList = new List <Vec>(); string[] words = sentence.Split(' '); foreach (string word in words) { Vec vec = Word2Vec(word.ToLower()); sentenceVectorList.Add(vec); } if (weightingScheme == WeightingScheme.AVG) { int dim = sentenceVectorList[0].VecNodes.Count; double nodeTotalValue; for (int k = 0; k < dim; k++) { nodeTotalValue = 0; for (int j = 0; j < sentenceVectorList.Count; j++) { Vec curWordVec = sentenceVectorList[j]; double curNodeVal = curWordVec.VecNodes[k]; nodeTotalValue += curNodeVal; } sentenceVector.VecNodes.Add(nodeTotalValue / dim); } } return(sentenceVector); }
public List <Vec> Sentence2Vec(List <string> sentences, WeightingScheme weightingScheme = WeightingScheme.AVG) { // Inplementing TF-IDF // TFIDFGenerator tfidfGenerator = new TFIDFGenerator(); List <List <double> > weights = null;// tfidfGenerator.TFIDFWeightVectorsForSentences(sentences.ToArray()); List <List <Vec> > matixList = new List <List <Vec> >(); List <Vec> sentenceVectorList = new List <Vec>(); sentences.ForEach(sentence => { //List<Vec> sentenceVectorList = new List<Vec>(); //string[] words = sentence.Split(' '); //foreach (string word in words) //{ // Vec vec = Word2Vec(word.ToLower()); // sentenceVectorList.Add(vec); //} //matixList.Add(sentenceVectorList); }); return(sentenceVectorList); /* * List<Vec> vectorList = new List<Vec>(); * // Traverse each sentence * for (int i = 0; i < sentences.Count; i++) * { * Vec sentenceVector = null; * List<Vec> curVecList = matixList[i]; * if (weightingScheme == WeightingScheme.TFIDF) * { * // Get this sentence * List<double> weight = weights[i]; * sentenceVector = TFIDFMultiply(curVecList, weight); * } * if (weightingScheme == WeightingScheme.AVG) * { * int dim = curVecList[0].VecNodes.Count; * sentenceVector = new Vec(); * double nodeTotalValue; * for (int k = 0; k < dim; k++) * { * nodeTotalValue = 0; * for (int j = 0; j < curVecList.Count; j++) * { * Vec curWordVec = curVecList[j]; * double curNodeVal = curWordVec.VecNodes[k]; * nodeTotalValue += curNodeVal; * } * sentenceVector.VecNodes.Add(nodeTotalValue / dim); * * } * * } * vectorList.Add(sentenceVector); * } * for (int i = 0; i < vectorList.Count; i++) * { * if (this.dict.ContainsKey(sentences[i])) * { * continue; * } * else * { * this.dict.Add(sentences[i], vectorList[i]); * } * * } * return vectorList; */ }
public List <Vec> Sentence2Vec(List <string> sentences, WeightingScheme weightingScheme = WeightingScheme.AVG) { // Inplementing TF-IDF // TFIDFGenerator tfidfGenerator = new TFIDFGenerator(); List <List <double> > weights = null;// tfidfGenerator.TFIDFWeightVectorsForSentences(sentences.ToArray()); List <List <Vec> > matixList = new List <List <Vec> >(); sentences.ForEach(sentence => { List <Vec> sentenceVectorList = new List <Vec>(); string[] words = sentence.Split(' '); foreach (string word in words) { Vec vec = Word2Vec(word.ToLower()); sentenceVectorList.Add(vec); } matixList.Add(sentenceVectorList); }); List <Vec> vectorList = new List <Vec>(); // Traverse each sentence for (int i = 0; i < sentences.Count; i++) { Vec sentenceVector = null; List <Vec> curVecList = matixList[i]; if (weightingScheme == WeightingScheme.TFIDF) { // Get this sentence List <double> weight = weights[i]; sentenceVector = TFIDFMultiply(curVecList, weight); } if (weightingScheme == WeightingScheme.AVG) { int dim = curVecList[0].VecNodes.Count; sentenceVector = new Vec(); double nodeTotalValue; for (int k = 0; k < dim; k++) { nodeTotalValue = 0; for (int j = 0; j < curVecList.Count; j++) { Vec curWordVec = curVecList[j]; double curNodeVal = curWordVec.VecNodes[k]; nodeTotalValue += curNodeVal; } sentenceVector.VecNodes.Add(nodeTotalValue / dim); } } vectorList.Add(sentenceVector); } for (int i = 0; i < vectorList.Count; i++) { if (this.dict.ContainsKey(sentences[i])) { continue; } else { this.dict.Add(sentences[i], vectorList[i]); } } return(vectorList); }