示例#1
0
        //tag sequences based on model provided
        static void Tag(Options options)
        {
            //read tag dictionary
            Dictionary<string, int> tagMap = new Dictionary<string,int>();
            Dictionary<int, string> rTagMap = new Dictionary<int, string>();

            string TagDictionary = Path.Combine(options.BasePath, "TagDictionary.txt");
            using (StreamReader reader = new StreamReader(TagDictionary))
            {
                string line;
                while ((line = reader.ReadLine()) != null)
                {
                    var tokens = Regex.Split(line, @"\s+");
                    tagMap.Add(tokens[0], int.Parse(tokens[1]));
                    rTagMap.Add(int.Parse(tokens[1]), tokens[0]);
                }
            }

            //read feature dictionary
            Dictionary<string, int> featureMap = new Dictionary<string, int>();
            if (!options.UseFeatureHashing)
            {
                string FeatureDictionary = Path.Combine(options.BasePath, "FeatureDictionary.txt");
                using (StreamReader reader = new StreamReader(FeatureDictionary))
                {
                    string line;
                    while ((line = reader.ReadLine()) != null)
                    {
                        var tokens = Regex.Split(line, @"\s+");
                        featureMap.Add(tokens[0], int.Parse(tokens[1]));
                    }
                }
            }

            //key variables
            int numTags = tagMap.Count;
            int numFeatures = options.UseFeatureHashing ? (2 << options.HashBits) : featureMap.Count;

            Model model = new Model()
            {
                alphaTagFeature = new float[numTags, numFeatures],
                alphaTagPreviousTag = new float[numTags, numTags]
            };

            //populate model
            string TagTransitionProbabilities = Path.Combine(options.BasePath, "TagTransitionProbabilities.txt");
            using (StreamReader reader = new StreamReader(TagTransitionProbabilities))
            {
                string line;
                while ((line = reader.ReadLine()) != null)
                {
                    var tokens = Regex.Split(line, @"\s+");
                    model.alphaTagPreviousTag[int.Parse(tokens[0]), int.Parse(tokens[1])] = float.Parse(tokens[2]);
                }
            }
            string TagFeatureProbabilities = Path.Combine(options.BasePath, "TagFeatureProbabilities.txt");
            using (StreamReader reader = new StreamReader(TagFeatureProbabilities))
            {
                string line;
                while ((line = reader.ReadLine()) != null)
                {
                    var tokens = Regex.Split(line, @"\s+");
                    model.alphaTagFeature[int.Parse(tokens[0]), int.Parse(tokens[1])] = float.Parse(tokens[2]);
                }
            }

            //mantian data for precision recall reports
            Dictionary<int, int> perTagCorrect = new Dictionary<int, int>();
            Dictionary<int, int> perTagCount = new Dictionary<int, int>();
            Dictionary<int, int> perTagModelCount = new Dictionary<int, int>();
            Enumerable.Range(0, numTags).ToList().ForEach(x =>
            {
                perTagCount[x] = 0;
                perTagCorrect[x] = 0;
                perTagModelCount[x] = 0;
            });

            int instanceCorrectCount = 0;
            int instanceCount = 0;

            Tagger tagger = new Tagger(numTags, model.alphaTagFeature, model.alphaTagPreviousTag);
            int[] tags = new int[Viterbi.MAX_WORDS];
            using (StreamWriter writer = new StreamWriter(options.Output))
            {
                foreach (var instance in ReadInstances(tagMap, featureMap, options))
                {
                    tagger.Label(instance.WordsWithFeatures, tags);
                    writer.WriteLine(string.Join(Environment.NewLine, Enumerable
                        .Range(0, instance.WordsWithFeatures.Length)
                        .Select(x => rTagMap[tags[x]])
                        .ToArray()));
                    writer.WriteLine();

                    bool allCorrect = true;
                    for (int i = 0; i < instance.WordsWithFeatures.Length; i++)
                    {
                        int correctTag = instance.LabelledTags[i];
                        int modelTag = tags[i];

                        perTagCount[correctTag]++;
                        perTagModelCount[modelTag]++;
                        if (correctTag == modelTag)
                        {
                            perTagCorrect[correctTag]++;
                        }
                        else
                        {
                            allCorrect = false;
                        }
                    }

                    if (allCorrect)
                        instanceCorrectCount++;
                    instanceCount++;
                }
            }
            for (int i = 0; i < numTags; i++)
            {
                Console.WriteLine(string.Join("\t", new object[] {
                        rTagMap[i],
                        perTagModelCount[i],
                        perTagCorrect[i],
                        perTagCount[i],
                        perTagCorrect[i] * 1.0 / perTagModelCount[i],
                        perTagCorrect[i] * 1.0 / perTagCount[i]
                    }.Select(x => x.ToString()).ToArray()));
            }
            Console.WriteLine(string.Join("\t", new object[] {
                instanceCorrectCount,
                instanceCount,
                instanceCorrectCount * 1.0 / instanceCount
            }.Select(x => x.ToString()).ToArray()));
        }