//tag sequences based on model provided static void Tag(Options options) { //read tag dictionary Dictionary<string, int> tagMap = new Dictionary<string,int>(); Dictionary<int, string> rTagMap = new Dictionary<int, string>(); string TagDictionary = Path.Combine(options.BasePath, "TagDictionary.txt"); using (StreamReader reader = new StreamReader(TagDictionary)) { string line; while ((line = reader.ReadLine()) != null) { var tokens = Regex.Split(line, @"\s+"); tagMap.Add(tokens[0], int.Parse(tokens[1])); rTagMap.Add(int.Parse(tokens[1]), tokens[0]); } } //read feature dictionary Dictionary<string, int> featureMap = new Dictionary<string, int>(); if (!options.UseFeatureHashing) { string FeatureDictionary = Path.Combine(options.BasePath, "FeatureDictionary.txt"); using (StreamReader reader = new StreamReader(FeatureDictionary)) { string line; while ((line = reader.ReadLine()) != null) { var tokens = Regex.Split(line, @"\s+"); featureMap.Add(tokens[0], int.Parse(tokens[1])); } } } //key variables int numTags = tagMap.Count; int numFeatures = options.UseFeatureHashing ? (2 << options.HashBits) : featureMap.Count; Model model = new Model() { alphaTagFeature = new float[numTags, numFeatures], alphaTagPreviousTag = new float[numTags, numTags] }; //populate model string TagTransitionProbabilities = Path.Combine(options.BasePath, "TagTransitionProbabilities.txt"); using (StreamReader reader = new StreamReader(TagTransitionProbabilities)) { string line; while ((line = reader.ReadLine()) != null) { var tokens = Regex.Split(line, @"\s+"); model.alphaTagPreviousTag[int.Parse(tokens[0]), int.Parse(tokens[1])] = float.Parse(tokens[2]); } } string TagFeatureProbabilities = Path.Combine(options.BasePath, "TagFeatureProbabilities.txt"); using (StreamReader reader = new StreamReader(TagFeatureProbabilities)) { string line; while ((line = reader.ReadLine()) != null) { var tokens = Regex.Split(line, @"\s+"); model.alphaTagFeature[int.Parse(tokens[0]), int.Parse(tokens[1])] = float.Parse(tokens[2]); } } //mantian data for precision recall reports Dictionary<int, int> perTagCorrect = new Dictionary<int, int>(); Dictionary<int, int> perTagCount = new Dictionary<int, int>(); Dictionary<int, int> perTagModelCount = new Dictionary<int, int>(); Enumerable.Range(0, numTags).ToList().ForEach(x => { perTagCount[x] = 0; perTagCorrect[x] = 0; perTagModelCount[x] = 0; }); int instanceCorrectCount = 0; int instanceCount = 0; Tagger tagger = new Tagger(numTags, model.alphaTagFeature, model.alphaTagPreviousTag); int[] tags = new int[Viterbi.MAX_WORDS]; using (StreamWriter writer = new StreamWriter(options.Output)) { foreach (var instance in ReadInstances(tagMap, featureMap, options)) { tagger.Label(instance.WordsWithFeatures, tags); writer.WriteLine(string.Join(Environment.NewLine, Enumerable .Range(0, instance.WordsWithFeatures.Length) .Select(x => rTagMap[tags[x]]) .ToArray())); writer.WriteLine(); bool allCorrect = true; for (int i = 0; i < instance.WordsWithFeatures.Length; i++) { int correctTag = instance.LabelledTags[i]; int modelTag = tags[i]; perTagCount[correctTag]++; perTagModelCount[modelTag]++; if (correctTag == modelTag) { perTagCorrect[correctTag]++; } else { allCorrect = false; } } if (allCorrect) instanceCorrectCount++; instanceCount++; } } for (int i = 0; i < numTags; i++) { Console.WriteLine(string.Join("\t", new object[] { rTagMap[i], perTagModelCount[i], perTagCorrect[i], perTagCount[i], perTagCorrect[i] * 1.0 / perTagModelCount[i], perTagCorrect[i] * 1.0 / perTagCount[i] }.Select(x => x.ToString()).ToArray())); } Console.WriteLine(string.Join("\t", new object[] { instanceCorrectCount, instanceCount, instanceCorrectCount * 1.0 / instanceCount }.Select(x => x.ToString()).ToArray())); }