Пример #1
0
        public void Setup(bool debug)
        {
            var readModel = new ReadModel(InputModelFile);
            var temp = new ReadModel(string.Concat(InputModelFile, ".featuresToK"));
            _weightVector = new WeightVector(temp.GetFeatureToKdDictionary());

            foreach (var pair in readModel.ModelIterator())
            {
                _weightVector.Add(pair);
            }

            _tags = new Tags(_tagList);

            _viterbiForGlobalLinearModel = new ViterbiForGlobalLinearModel(_weightVector, _tags);

            // read input file in a class and per line iterator.
            var inputData = new ReadInputData(InputTestFile);
            var writeModel = new WriteModel(_outputTestFile);
            foreach (var line in inputData.GetSentence())
            {
                List<string> debugList;
                var outputTags = _viterbiForGlobalLinearModel.Decode(line, debug, out debugList);
                if (debug)
                {
                    writeModel.WriteDataWithTagDebug(line, outputTags, debugList);
                }
                else
                {
                    writeModel.WriteDataWithTag(line, outputTags);
                }

            }
            writeModel.Flush();
        }
 public ComputeGradient(List<List<string>> inputSentence, List<List<string>> tagsList,
     List<string> tagList, double lambda, double learningParam, FeatureCache cache, WriteModel logger)
 {
     Logger = logger;
     _inputSentence = inputSentence;
     _outputTagsList = tagsList;
     _tagList = tagList;
     _lambda = lambda;
     _learningParam = learningParam;
     _cache = cache;
     forwardBackwordAlgos = new List<ForwardBackwordAlgo>();
     _weightVector = null;
     _twoGramsList = new string[4];
     _twoGramPair = new KeyValuePair<string, string>[4];
     var ngramTags = new Tags(_tagList);
     int index = 0;
     foreach (var ngram in ngramTags.GetNGramTags(2))
     {
         if (index >= _twoGramsList.Length)
         {
             Array.Resize(ref _twoGramsList, index+1);
             Array.Resize(ref _twoGramPair, index + 1);
         }
         string[] split = ngram.Split(new[] { ':' });
         _twoGramsList[index] = split[0] +"@#"+ split[1];
         _twoGramPair[index] = new KeyValuePair<string, string>(split[0], split[1]);
         index++;
     }
 }
Пример #3
0
        public string Evalulate(string keyFile, string devFile, string dumpFile)
        {
            var keyModel = new ReadModel(keyFile);
            var devModel = new ReadModel(devFile);

            var dumpOutputModel = new WriteModel(dumpFile);

            var keyIter = keyModel.ModelIterator().GetEnumerator();
            var devIter = devModel.ModelIterator().GetEnumerator();

            float expected = 0;
            float correct = 0;
            float found = 0;
            float line = 0;

            string dump;
            while (keyIter.MoveNext() && devIter.MoveNext())
            {
                var key = keyIter.Current;
                var dev = devIter.Current;
                line++;
                if (!key.Key.Equals(dev.Key))
                {
                    dump = "line: " + line + " " + key.Key + " doesn't match " + dev.Key+"\r\n";
                    dumpOutputModel.WriteLine(dump);
                    dumpOutputModel.Flush();
                    return dump;
                }

                if (key.Value.Contains("LOCATION"))
                {
                    expected++;
                    if (!dev.Value.Contains("LOCATION")) continue;
                    found++;
                    correct++;
                }
                else if (dev.Value.Contains("LOCATION"))
                {
                    found++;
                }
            }

            dump = "found: " + found + " expected: " + expected + " correct: " + correct +"\r\n";
            dumpOutputModel.WriteLine(dump);
            float precision = correct/found;
            float recall = correct/expected;
            float f1Score = (2*precision*recall)/(precision + recall);
            dump += "precision\t recall \t f1score\t\r\n";
            dumpOutputModel.WriteLine("precision\t recall \t f1score\t");
            dump += precision.ToString(CultureInfo.InvariantCulture) + "\t" +
                    recall.ToString(CultureInfo.InvariantCulture) + "\t" +
                    f1Score.ToString(CultureInfo.InvariantCulture) +"\r\n";
            dumpOutputModel.WriteLine(precision.ToString(CultureInfo.InvariantCulture)+"\t"+
                recall.ToString(CultureInfo.InvariantCulture)+ "\t" +
                f1Score.ToString(CultureInfo.InvariantCulture));
            dumpOutputModel.Flush();
            return dump;
        }
        public MapFeaturesToK(string outputFile, List<string> tagList)
        {
            _writeModel = new WriteModel(outputFile);

            _tagList = tagList;
            DictFeaturesToK = new Dictionary<string, int>();
            DictKToFeatures = new Dictionary<int, string>();
            FeatureCount = 0;
            _tags = new Tags(tagList);
        }
Пример #5
0
        public void Dump()
        {
            var output = new WriteModel(string.Concat(_outputFile, ".preceptron"));

            for (int index = 0; index < WeightVector.FeatureCount; index++)
            {
                output.WriteLine(string.Format("{0} {1} {2}", index,
                    MapFeatures.DictKToFeatures[index],
                    _useAvg ? AvgWeightVector.WeightArray[index] : WeightVector.WeightArray[index]));
            }
            output.Flush();
        }
Пример #6
0
 public void Dump(string outputFile, Dictionary<int, string> dictKtoFeature)
 {
     Console.WriteLine(DateTime.Now+" training is complete");
     var output = new WriteModel(outputFile);
     var sortedDictionary = from pair in _weightVector.WDictionary
         orderby Math.Abs(pair.Value) descending
         select pair;
     foreach (var weight in sortedDictionary)
     {
         output.WriteLine(string.Format("{0} {1} {2}", weight.Key,
             dictKtoFeature[weight.Key], weight.Value));
     }
     output.Flush();
 }
Пример #7
0
 public void Dump()
 {
     var output = new WriteModel(string.Concat(_outputFile, ".temp"));
     var sortedDictionary = from pair in WeightVector.WDictionary
                            orderby Math.Abs(pair.Value) descending
                            select pair;
     foreach (var weight in sortedDictionary)
     {
         output.WriteLine(string.Format("{0} {1}",
             MapFeatures.DictKToFeatures[weight.Key], weight.Value));
         //output.WriteLine(string.Format("{0} {1} {2}", weight.Key,
         //    MapFeatures.DictKToFeatures[weight.Key], weight.Value));
     }
     output.Flush();
 }
        public void Setup(bool debug)
        {
            Init();
            // read input file in a class and per line iterator.
            var inputData = new ReadInputData(InputTestFile);
            var writeModel = new WriteModel(_outputTestFile);
            foreach (var line in inputData.GetSentence())
            {
                List<string> debugList;
                var outputTags = ViterbiForGLM.DecodeNew(line, debug, out debugList);
                if (debug)
                {
                    writeModel.WriteDataWithTagDebug(line, outputTags, debugList);
                }
                else
                {
                    writeModel.WriteDataWithTag(line, outputTags);
                }

            }
            writeModel.Flush();
        }
        internal static void CreateInputForCRF(string input, string output)
        {
            var reader = new ReadModel(input);
            var keyWriter = new WriteModel(string.Concat(output, ".key"));
            var devWriter = new WriteModel(string.Concat(output, ".key.dev"));

            foreach (var line in reader.GetNextLine())
            {
                var words = line.Split(new[] {' '});

                if (words.Length < 4)
                    continue;

                foreach (var word in words)
                {
                    if (string.IsNullOrEmpty(word.Trim()))
                        continue;

                    if (word.EndsWith("{LOCATION}"))
                    {
                        keyWriter.WriteLine(word.Replace("{LOCATION}", "") + " " + "LOCATION");
                        devWriter.WriteLine(word.Replace("{LOCATION}", ""));
                    }
                    else if (word.EndsWith("{LOCATION}."))
                    {
                        keyWriter.WriteLine(word.Replace("{LOCATION}.", ".") + " " + "LOCATION");
                        devWriter.WriteLine(word.Replace("{LOCATION}.", "."));
                    }
                    else
                    {
                        keyWriter.WriteLine(word + " " + "OTHER");
                        devWriter.WriteLine(word);
                    }
                }
                keyWriter.WriteLine("");
                devWriter.WriteLine("");
            }
            keyWriter.Flush();
            devWriter.Flush();
        }
Пример #10
0
 public ProcessRawText(string input, string output)
 {
     _reader = new ReadModel(input);
     _writer = new WriteModel(output);
 }
        public void Parse(string input, string output)
        {
            var readModel= new ReadModel(input);
            var writeModel = new WriteModel(output);
            var writeDevModel = new WriteModel(output+".dev");
            //var tempWrite = new WriteModel(output + "tempWrite");
            var temp = new List<string>();

            foreach (var line in readModel.GetNextLine())
            {
                var newLine = RemoveTags(line);
                newLine = ReplaceTags(newLine);
                newLine = RemoveAllTags(newLine);
                if (string.IsNullOrEmpty(newLine)) continue;

                //tempWrite.WriteLine(newLine);
                var split = newLine.Split(new char[] {' '});
                temp.AddRange(split.ToList());
                //temp.Add("##NEWLINE##");
            }
            //tempWrite.Flush();
            bool location = false;
            var lastStr = string.Empty;

            foreach (var tempStr in temp)
            {
                var str = tempStr.Trim();
                if (string.IsNullOrEmpty(str))
                {
                    lastStr = "";
                    continue;
                }

                //if (str.Equals("##NEWLINE##"))
                {
                    if (!location && lastStr.EndsWith(".") && !IsSalutationAbbr(lastStr))
                    {
                        lastStr = string.Empty;
                        writeModel.WriteLine("");
                        writeDevModel.WriteLine("");
                        continue;
                    }
                }
                if (location)
                {
                    if (str.Equals("##ENDTAG##"))
                    {
                        location = false;
                        lastStr = "";
                        continue;
                    }
                    writeModel.WriteLine(str + " " + "LOCATION");
                    writeDevModel.WriteLine(str);
                    lastStr = str;
                    continue;
                }
                if (str.Equals("##LOCATIONSTARTTAG##"))
                {
                    lastStr = "";
                    location = true;
                    continue;
                }
                if (str.Equals("##ENDTAG##"))
                {
                    lastStr = "";
                    continue;
                }
                writeModel.WriteLine(str + " "+ "OTHER");
                writeDevModel.WriteLine(str);
                lastStr = str;
            }
            writeModel.Flush();
            writeDevModel.Flush();
        }
        static void TrainingTest(List<string> tags)
        {
            //const string modelFile = "../../data/gene.key.model";
            //const string input = "../../data/gene.key";

            const string modelFile = "../../data/training/tag.model.trial1";
            const string input = "../../data/training/NYT_19980403_parsed.key";
            string LoggerFile = "../../Logs/Log_"+DateTime.Now.ToFileTime()+".txt";
            const int threadCount = 1;
            var perceptron = new Perceptron(input, modelFile, tags);
            perceptron.Train();
            perceptron.ReMapFeatureToK();
            //perceptron.Dump();
            perceptron.MapFeatures.Dump();
            perceptron.ReadInputs();
            var featureCache = new FeatureCache(perceptron.InputSentences, tags,
                perceptron.MapFeatures.DictFeaturesToK);
            featureCache.CreateCache();
            var logger = new WriteModel(LoggerFile);
            var gradient = new ComputeGradient(perceptron.InputSentences, perceptron.TagsList,
                tags, .1, featureCache, logger);
            //perceptron.WeightVector.ResetAllToZero();
            gradient.RunIterations(perceptron.WeightVector, 10, threadCount);
            gradient.Dump(modelFile, perceptron.MapFeatures.DictKToFeatures);
        }
 public void Dump(string outputFile, Dictionary<int, string> dictKtoFeature)
 {
     Console.WriteLine(DateTime.Now+" training is complete");
     var output = new WriteModel(outputFile);
     for (int index = 0; index <_weightVector.FeatureCount; index++)
     {
         output.WriteLine(string.Format("{0} {1} {2}", index,
             dictKtoFeature[index], _weightVector.WeightArray[index]));
     }
     //var sortedDictionary = from pair in _weightVector.WDictionary
     //    orderby Math.Abs(pair.Value) descending
     //    select pair;
     //foreach (var weight in sortedDictionary)
     //{
     //    output.WriteLine(string.Format("{0} {1} {2}", weight.Key,
     //        dictKtoFeature[weight.Key], weight.Value));
     //}
     output.Flush();
     //Logger.Flush();
 }