private void ComputeQuestionTagSimHeriusticlly(Question question)
        {
            foreach(var candidate in candidates)
            {
                tagQuestionSim.Add(candidate.Key,
                    Utils.LanguageModel(question.StemWords, candidate.Key));

            }
        }
 /// <summary>
 /// equation 2 in the paper
 /// </summary>
 /// <param name="question"></param>
 /// <param name="neighbours"></param>
 private void ComputeQuestionSim(Question question, List<Question> neighbours)
 {
     for (int j = 0; j < neighbours.Count; j++)
     {
         for(int i=0;i<QUESTIONFEATURENUM;i++)
         {
             questionsim[j] += questionSimWeights[i] * questionSimFeatures[i][j];
         }
     }
 }
 public void TagQuestion(Question question, List<Question> neighbours)
 {
     ConstructCandidateTable(neighbours);
     Init(question,neighbours);
     ComputeTagSim();
     ComputeTagSignificance(neighbours);
     ComputeQuestionSim(question, neighbours);
     ComputeQuestionTagSimHeriusticlly(question);
     RankingTags(question, neighbours);
 }
        public Vector[] ExtractQuestionSim(Question q, List<Question> neighbour)
        {
            Vector[] questionsimfeature = new Vector[QFeatureNum];

            for (int i = 0; i < QFeatureNum; i++)
            {
                Vector v = RandomQuestionFeature(neighbour.Count);
                questionsimfeature[i] = v;
            }

            for (int i = 0; i < neighbour.Count; i++)
            {
                foreach (var t in TruthTags)
                {
                    if (neighbour[i].RelatedTags.Contains(t))
                        questionsimfeature[0][i] = 1;
                }
            }

            return questionsimfeature;
        }
Exemple #5
0
        static void Test()
        {
            StreamReader sr = new StreamReader(@"..\..\..\resource\test.txt");
            while(!sr.EndOfStream)
            {
                Question q=null;
                List<Question> neighbours = new List<Question>();
                string question = sr.ReadLine();
                string[] q_tmp = question.Split('\t');
                q = new Question(q_tmp[0]
                    ,q_tmp[2].Split('|').ToList()
                    ,q_tmp[1].Split(new string[]{"; "},StringSplitOptions.RemoveEmptyEntries).ToList()
                    );
                Console.WriteLine(q_tmp[0]);
                while(true)
                {
                    Question n;
                    string neighbour = sr.ReadLine();
                    if (neighbour == "###")
                        break;
                    else
                    {
                        string[] tmp = neighbour.Split('\t');
                        if(tmp.Length<3)
                        {
                            //Console.WriteLine(neighbour);
                            continue;
                        }
                        n = new Question(tmp[0], tmp[2].Split('|').ToList(),
                            tmp[1].Split(new string[] { "; " }, StringSplitOptions.RemoveEmptyEntries).ToList());

                    }
                    neighbours.Add(n);
                }
                QuestionTagging tagger = new QuestionTagging();
                Console.WriteLine("Tags:");
                tagger.TagQuestion(q,neighbours.GetRange(0,50));
            }
        }
        private void SampleUnrelatedTag(Question question, List<Question> neighbours)
        {
            tagFrequency.Clear();
            foreach (var neighbour in neighbours)
            {
                foreach (var tag in neighbour.RelatedTags)
                {
                    if (tagFrequency.ContainsKey(tag))
                        tagFrequency[tag]++;
                    else
                        tagFrequency.Add(tag, 1);
                }
            }

            question.UnRelatedTags = new List<string>();
            var orderedtag = tagFrequency.OrderByDescending(x => x.Value).ToArray();
            for(int i=0;i<question.RelatedTags.Count;i++)
            {
                question.UnRelatedTags.Add(orderedtag[i].Key);
            }
        }
        /// <summary>
        /// equation 1 in the paper
        /// </summary>
        /// <param name="question"></param>
        /// <param name="neighbours"></param>
        private void RankingTags(Question question, List<Question> neighbours)
        {
            tagScore.Clear();
            foreach (var candidate in candidates)
            {
                double q_t = 0;
                double q_q_t = 0;
                double q_t_t = 0;

                int tagindex = candidates[candidate.Key];
                q_t = tagQuestionSim[candidate.Key];
                foreach (var neighbour in neighbours)
                {
                    if (neighbour.RelatedTags.Contains(candidate.Key))
                    {
                        if(q_q_t < questionsim[neighbours.IndexOf(neighbour)])
                            q_q_t = questionsim[neighbours.IndexOf(neighbour)];
                    }
                }
                foreach (var tag in tagQuestionSim)
                {
                    double f_qt = tag.Value;
                    int t_index = candidates[tag.Key];
                    if (tagindex != t_index)
                    {
                        q_t_t += tagSim[tagindex, t_index]*tag.Value;
                    }
                }
                tagScore.Add(candidate.Key, tagsignificance[candidate.Value] * (q_q_t + q_t + q_t_t));
            }
        }
        private void Init(Question question, List<Question> neighbours)
        {
            //Console.WriteLine("!!!");

            tagSimFeatures = instanceTagSimFeatures[this.questions.IndexOf(question)];
            questionSimFeatures = instanceQuestionSimFeatures[this.questions.IndexOf(question)];

            tagQuestionFeature = new Dictionary<string, Vector>();
            foreach(var candidate in candidates)
            {
                Vector tmp_v = new DenseVector(QUESTIONFEATURENUM);
                for (int i = 0; i < QUESTIONFEATURENUM;i++)
                {
                    for(int j=0;j<neighbours.Count;j++)
                    {
                        if(neighbours[j].RelatedTags.Contains(candidate.Key))
                            tmp_v[i] = Math.Max(tmp_v[i], questionSimFeatures[i][j]);
                    }
                }
                tagQuestionFeature.Add(candidate.Key, tmp_v);
            }

            tagSim = new DenseMatrix(candidates.Count, candidates.Count);
            questionsim = new DenseVector(neighbours.Count);
        }
        /// <summary>
        /// this is function f in the paper
        /// </summary>
        /// <param name="question"></param>
        private void ComputeQuestionTagSimHeriusticlly(Question question)
        {
            tagQuestionSim = new Dictionary<string, double>();
            foreach (var candidate in candidates)
            {
                tagQuestionSim.Add(candidate.Key,
                    Utils.LanguageModel(question.StemWords, candidate.Key));

            }
        }
Exemple #10
0
 private void ComputeQuestionSimFeature(Question question, List<Question> list)
 {
     this.instanceQuestionSimFeatures.Add(this.featureExtractor.ExtractQuestionSim(question, list));
 }
Exemple #11
0
 /// <summary>
 /// equation 2 in the paper
 /// </summary>
 /// <param name="question"></param>
 /// <param name="neighbours"></param>
 private void ComputeQuestionSim(Question question, List<Question> neighbours)
 {
     double sum = 0;
     for (int i = 0; i < QUESTIONFEATURENUM; i++)
     {
         sum += Math.Exp(questionSimWeights[i]);
     }
     for (int j = 0; j < neighbours.Count; j++)
     {
         for (int i = 0; i < QUESTIONFEATURENUM; i++)
         {
             questionsim[j] += Math.Exp(questionSimWeights[i]) * questionSimFeatures[i][j] / sum;
         }
     }
 }
        /// <summary>
        /// Equation One in the paper
        /// </summary>
        /// <param name="question"></param>
        /// <param name="neighbours"></param>
        private void RankingTags(Question question, List<Question> neighbours)
        {
            foreach(var candidate in candidates)
            {
                double q_t = 0;
                double q_q_t = 0;
                double q_t_t = 0;

                int tagindex = candidates[candidate.Key];
                q_t = tagQuestionSim[candidate.Key];
                foreach(var neighbour in neighbours)
                {
                    if(neighbour.RelatedTags.Contains(candidate.Key))
                    {
                        q_q_t += questionsim[neighbours.IndexOf(neighbour)];
                    }
                }
                foreach(var tag in tagQuestionSim)
                {
                    double f_qt = tag.Value;
                    int t_index = candidates[tag.Key];
                    if(tagindex != t_index)
                    {
                        q_t_t += tagSim[tagindex, t_index] * tag.Value;
                    }
                }
                tagScore.Add(candidate.Key,tagsignificance[candidate.Value] * (q_q_t+q_t+q_t_t));
            }
            int i = 0;
            var res = tagScore.OrderByDescending(x => x.Value);
            foreach(var ele in res)
            {
                Console.WriteLine(ele);
                if (i++ > 10)
                    break;
            }
        }
        private void Init(Question question, List<Question> neighbours)
        {
            featureExtractor = new RandomFeatureExtractor();
            MathNet.Numerics.Distributions.ContinuousUniform normal = new MathNet.Numerics.Distributions.ContinuousUniform();

            tagSimWeights = (Vector)Vector.Build.Random(TAGFEATURENUM,normal);
            questionSimWeights = (Vector)Vector.Build.Random(QUESTIONFEATURENUM,normal);

            tagSimFeatures = featureExtractor.ExtractTagSim(candidates.Keys.ToList());
            questionSimFeatures = featureExtractor.ExtractQuestionSim(question, neighbours);
            tagSim = new DenseMatrix(candidates.Count, candidates.Count);
            questionsim = new DenseVector(neighbours.Count);
        }
        public Vector[] ExtractQuestionSim(Question q, List<Question> neighbour)
        {
            Vector[] questionsimfeature = new Vector[QFeatureNum];

            for (int i = 0; i < QFeatureNum ;i++)
            {
                Vector v = RandomQuestionFeature(neighbour.Count);
                 questionsimfeature[i] = v;
            }

            return questionsimfeature;
        }
Exemple #15
0
        static void Train()
        {
            List<Question> TrainingInstances = new List<Question>();
            List<List<Question>> InstancesNeighbours = new List<List<Question>>();
            StreamReader sr = new StreamReader(@"..\..\..\resource\train.txt");
            while (!sr.EndOfStream)
            {
                Question q = null;
                List<Question> neighbours = new List<Question>();
                string question = sr.ReadLine();
                string[] q_tmp = question.Split('\t');
                q = new Question(q_tmp[0]
                    , q_tmp[2].Split('|').ToList()
                    , q_tmp[1].Split(new string[] { "; " }, StringSplitOptions.RemoveEmptyEntries).ToList()
                    );
                while (true)
                {
                    Question n;
                    string neighbour = sr.ReadLine();
                    if (neighbour == "###")
                        break;
                    else
                    {
                        string[] tmp = neighbour.Split('\t');
                        if (tmp.Length < 3)
                            continue;
                        n = new Question(tmp[0], tmp[2].Split('|').ToList(),
                            tmp[1].Split(new string[] { "; " }, StringSplitOptions.RemoveEmptyEntries).ToList());

                    }
                    neighbours.Add(n);
                }
                TrainingInstances.Add(q);
                InstancesNeighbours.Add(neighbours.GetRange(0,Math.Min(neighbours.Count,50)));
                if (TrainingInstances.Count > 100)
                    break;
                Console.WriteLine("Training Instance:" + TrainingInstances.Count);
               // QuestionTagging tagger = new QuestionTagging();
               // tagger.TagQuestion(q, neighbours.GetRange(0, 50));
            }

            Training t = new Training();
            t.TrainingInstancesInit(TrainingInstances, InstancesNeighbours);
            int QFeatureNum = int.Parse(ConfigurationManager.AppSettings["QFeatureNum"]);
            int TFeatureNum = int.Parse(ConfigurationManager.AppSettings["TFeatureNum"]);
            int Max_Iter = int.Parse(ConfigurationManager.AppSettings["Max_Iter"]);
            double LearningRate = double.Parse(ConfigurationManager.AppSettings["LearningRate"]);
            double Lamda = double.Parse(ConfigurationManager.AppSettings["Lamda"]);
            double StopGap = double.Parse(ConfigurationManager.AppSettings["StopGap"]);
            double decay = double.Parse(ConfigurationManager.AppSettings["decay"]);

            t.Train(QFeatureNum, TFeatureNum, Max_Iter, LearningRate, Lamda, StopGap,decay);

            StreamWriter sw = new StreamWriter("questionfeature.weight");
            for(int i=0;i<QFeatureNum;i++)
            {
                sw.WriteLine(t.questionSimWeights[i]);
                Console.WriteLine(t.questionSimWeights[i]);
            }
            sw.Close();
            sw = new StreamWriter("tagfeature.weight");
            for (int i = 0; i < QFeatureNum; i++)
            {
                sw.WriteLine(t.questionSimWeights[i]);
                Console.WriteLine(t.tagSimWeights[i]);
            }
            sw.Close();
        }