private void ComputeQuestionTagSimHeriusticlly(Question question) { foreach(var candidate in candidates) { tagQuestionSim.Add(candidate.Key, Utils.LanguageModel(question.StemWords, candidate.Key)); } }
/// <summary> /// equation 2 in the paper /// </summary> /// <param name="question"></param> /// <param name="neighbours"></param> private void ComputeQuestionSim(Question question, List<Question> neighbours) { for (int j = 0; j < neighbours.Count; j++) { for(int i=0;i<QUESTIONFEATURENUM;i++) { questionsim[j] += questionSimWeights[i] * questionSimFeatures[i][j]; } } }
public void TagQuestion(Question question, List<Question> neighbours) { ConstructCandidateTable(neighbours); Init(question,neighbours); ComputeTagSim(); ComputeTagSignificance(neighbours); ComputeQuestionSim(question, neighbours); ComputeQuestionTagSimHeriusticlly(question); RankingTags(question, neighbours); }
public Vector[] ExtractQuestionSim(Question q, List<Question> neighbour) { Vector[] questionsimfeature = new Vector[QFeatureNum]; for (int i = 0; i < QFeatureNum; i++) { Vector v = RandomQuestionFeature(neighbour.Count); questionsimfeature[i] = v; } for (int i = 0; i < neighbour.Count; i++) { foreach (var t in TruthTags) { if (neighbour[i].RelatedTags.Contains(t)) questionsimfeature[0][i] = 1; } } return questionsimfeature; }
static void Test() { StreamReader sr = new StreamReader(@"..\..\..\resource\test.txt"); while(!sr.EndOfStream) { Question q=null; List<Question> neighbours = new List<Question>(); string question = sr.ReadLine(); string[] q_tmp = question.Split('\t'); q = new Question(q_tmp[0] ,q_tmp[2].Split('|').ToList() ,q_tmp[1].Split(new string[]{"; "},StringSplitOptions.RemoveEmptyEntries).ToList() ); Console.WriteLine(q_tmp[0]); while(true) { Question n; string neighbour = sr.ReadLine(); if (neighbour == "###") break; else { string[] tmp = neighbour.Split('\t'); if(tmp.Length<3) { //Console.WriteLine(neighbour); continue; } n = new Question(tmp[0], tmp[2].Split('|').ToList(), tmp[1].Split(new string[] { "; " }, StringSplitOptions.RemoveEmptyEntries).ToList()); } neighbours.Add(n); } QuestionTagging tagger = new QuestionTagging(); Console.WriteLine("Tags:"); tagger.TagQuestion(q,neighbours.GetRange(0,50)); } }
private void SampleUnrelatedTag(Question question, List<Question> neighbours) { tagFrequency.Clear(); foreach (var neighbour in neighbours) { foreach (var tag in neighbour.RelatedTags) { if (tagFrequency.ContainsKey(tag)) tagFrequency[tag]++; else tagFrequency.Add(tag, 1); } } question.UnRelatedTags = new List<string>(); var orderedtag = tagFrequency.OrderByDescending(x => x.Value).ToArray(); for(int i=0;i<question.RelatedTags.Count;i++) { question.UnRelatedTags.Add(orderedtag[i].Key); } }
/// <summary> /// equation 1 in the paper /// </summary> /// <param name="question"></param> /// <param name="neighbours"></param> private void RankingTags(Question question, List<Question> neighbours) { tagScore.Clear(); foreach (var candidate in candidates) { double q_t = 0; double q_q_t = 0; double q_t_t = 0; int tagindex = candidates[candidate.Key]; q_t = tagQuestionSim[candidate.Key]; foreach (var neighbour in neighbours) { if (neighbour.RelatedTags.Contains(candidate.Key)) { if(q_q_t < questionsim[neighbours.IndexOf(neighbour)]) q_q_t = questionsim[neighbours.IndexOf(neighbour)]; } } foreach (var tag in tagQuestionSim) { double f_qt = tag.Value; int t_index = candidates[tag.Key]; if (tagindex != t_index) { q_t_t += tagSim[tagindex, t_index]*tag.Value; } } tagScore.Add(candidate.Key, tagsignificance[candidate.Value] * (q_q_t + q_t + q_t_t)); } }
private void Init(Question question, List<Question> neighbours) { //Console.WriteLine("!!!"); tagSimFeatures = instanceTagSimFeatures[this.questions.IndexOf(question)]; questionSimFeatures = instanceQuestionSimFeatures[this.questions.IndexOf(question)]; tagQuestionFeature = new Dictionary<string, Vector>(); foreach(var candidate in candidates) { Vector tmp_v = new DenseVector(QUESTIONFEATURENUM); for (int i = 0; i < QUESTIONFEATURENUM;i++) { for(int j=0;j<neighbours.Count;j++) { if(neighbours[j].RelatedTags.Contains(candidate.Key)) tmp_v[i] = Math.Max(tmp_v[i], questionSimFeatures[i][j]); } } tagQuestionFeature.Add(candidate.Key, tmp_v); } tagSim = new DenseMatrix(candidates.Count, candidates.Count); questionsim = new DenseVector(neighbours.Count); }
/// <summary> /// this is function f in the paper /// </summary> /// <param name="question"></param> private void ComputeQuestionTagSimHeriusticlly(Question question) { tagQuestionSim = new Dictionary<string, double>(); foreach (var candidate in candidates) { tagQuestionSim.Add(candidate.Key, Utils.LanguageModel(question.StemWords, candidate.Key)); } }
private void ComputeQuestionSimFeature(Question question, List<Question> list) { this.instanceQuestionSimFeatures.Add(this.featureExtractor.ExtractQuestionSim(question, list)); }
/// <summary> /// equation 2 in the paper /// </summary> /// <param name="question"></param> /// <param name="neighbours"></param> private void ComputeQuestionSim(Question question, List<Question> neighbours) { double sum = 0; for (int i = 0; i < QUESTIONFEATURENUM; i++) { sum += Math.Exp(questionSimWeights[i]); } for (int j = 0; j < neighbours.Count; j++) { for (int i = 0; i < QUESTIONFEATURENUM; i++) { questionsim[j] += Math.Exp(questionSimWeights[i]) * questionSimFeatures[i][j] / sum; } } }
/// <summary> /// Equation One in the paper /// </summary> /// <param name="question"></param> /// <param name="neighbours"></param> private void RankingTags(Question question, List<Question> neighbours) { foreach(var candidate in candidates) { double q_t = 0; double q_q_t = 0; double q_t_t = 0; int tagindex = candidates[candidate.Key]; q_t = tagQuestionSim[candidate.Key]; foreach(var neighbour in neighbours) { if(neighbour.RelatedTags.Contains(candidate.Key)) { q_q_t += questionsim[neighbours.IndexOf(neighbour)]; } } foreach(var tag in tagQuestionSim) { double f_qt = tag.Value; int t_index = candidates[tag.Key]; if(tagindex != t_index) { q_t_t += tagSim[tagindex, t_index] * tag.Value; } } tagScore.Add(candidate.Key,tagsignificance[candidate.Value] * (q_q_t+q_t+q_t_t)); } int i = 0; var res = tagScore.OrderByDescending(x => x.Value); foreach(var ele in res) { Console.WriteLine(ele); if (i++ > 10) break; } }
private void Init(Question question, List<Question> neighbours) { featureExtractor = new RandomFeatureExtractor(); MathNet.Numerics.Distributions.ContinuousUniform normal = new MathNet.Numerics.Distributions.ContinuousUniform(); tagSimWeights = (Vector)Vector.Build.Random(TAGFEATURENUM,normal); questionSimWeights = (Vector)Vector.Build.Random(QUESTIONFEATURENUM,normal); tagSimFeatures = featureExtractor.ExtractTagSim(candidates.Keys.ToList()); questionSimFeatures = featureExtractor.ExtractQuestionSim(question, neighbours); tagSim = new DenseMatrix(candidates.Count, candidates.Count); questionsim = new DenseVector(neighbours.Count); }
public Vector[] ExtractQuestionSim(Question q, List<Question> neighbour) { Vector[] questionsimfeature = new Vector[QFeatureNum]; for (int i = 0; i < QFeatureNum ;i++) { Vector v = RandomQuestionFeature(neighbour.Count); questionsimfeature[i] = v; } return questionsimfeature; }
static void Train() { List<Question> TrainingInstances = new List<Question>(); List<List<Question>> InstancesNeighbours = new List<List<Question>>(); StreamReader sr = new StreamReader(@"..\..\..\resource\train.txt"); while (!sr.EndOfStream) { Question q = null; List<Question> neighbours = new List<Question>(); string question = sr.ReadLine(); string[] q_tmp = question.Split('\t'); q = new Question(q_tmp[0] , q_tmp[2].Split('|').ToList() , q_tmp[1].Split(new string[] { "; " }, StringSplitOptions.RemoveEmptyEntries).ToList() ); while (true) { Question n; string neighbour = sr.ReadLine(); if (neighbour == "###") break; else { string[] tmp = neighbour.Split('\t'); if (tmp.Length < 3) continue; n = new Question(tmp[0], tmp[2].Split('|').ToList(), tmp[1].Split(new string[] { "; " }, StringSplitOptions.RemoveEmptyEntries).ToList()); } neighbours.Add(n); } TrainingInstances.Add(q); InstancesNeighbours.Add(neighbours.GetRange(0,Math.Min(neighbours.Count,50))); if (TrainingInstances.Count > 100) break; Console.WriteLine("Training Instance:" + TrainingInstances.Count); // QuestionTagging tagger = new QuestionTagging(); // tagger.TagQuestion(q, neighbours.GetRange(0, 50)); } Training t = new Training(); t.TrainingInstancesInit(TrainingInstances, InstancesNeighbours); int QFeatureNum = int.Parse(ConfigurationManager.AppSettings["QFeatureNum"]); int TFeatureNum = int.Parse(ConfigurationManager.AppSettings["TFeatureNum"]); int Max_Iter = int.Parse(ConfigurationManager.AppSettings["Max_Iter"]); double LearningRate = double.Parse(ConfigurationManager.AppSettings["LearningRate"]); double Lamda = double.Parse(ConfigurationManager.AppSettings["Lamda"]); double StopGap = double.Parse(ConfigurationManager.AppSettings["StopGap"]); double decay = double.Parse(ConfigurationManager.AppSettings["decay"]); t.Train(QFeatureNum, TFeatureNum, Max_Iter, LearningRate, Lamda, StopGap,decay); StreamWriter sw = new StreamWriter("questionfeature.weight"); for(int i=0;i<QFeatureNum;i++) { sw.WriteLine(t.questionSimWeights[i]); Console.WriteLine(t.questionSimWeights[i]); } sw.Close(); sw = new StreamWriter("tagfeature.weight"); for (int i = 0; i < QFeatureNum; i++) { sw.WriteLine(t.questionSimWeights[i]); Console.WriteLine(t.tagSimWeights[i]); } sw.Close(); }