Beispiel #1
0
        public bool Initialize(string strConfFileName)
        {
            //Load configuration file
            Dictionary <string, string> confDict;

            confDict = LoadConfFile(strConfFileName);
            if (confDict == null)
            {
                return(false);
            }

            //Check required item
            if (confDict.ContainsKey(KEY_LEXICAL_DICT_FILE_NAME.ToLower()) == false)
            {
                Console.WriteLine("Failed to find key {0}", KEY_LEXICAL_DICT_FILE_NAME);
                return(false);
            }

            if (confDict.ContainsKey(KEY_MODEL_FILE_NAME.ToLower()) == false)
            {
                Console.WriteLine("Failed to find key {0}", KEY_MODEL_FILE_NAME);
                return(false);
            }

            //Load temr normalizing mapping file
            LoadNormalizedMappingFile(confDict[KEY_NORMALIZED_TERM_FILE_NAME.ToLower()]);

            //Load CRF model for word formation
            crf = new CRFSharpWrapper.Decoder();
            string strModelFileName = confDict[KEY_MODEL_FILE_NAME.ToLower()];

            crf.LoadModel(strModelFileName);
            featureGenerator = new CRFSharpFeatureGenerator();

            //Load lexical dictionary
            wordseg = new WordSeg.WordSeg();
            wordseg.LoadLexicalDict(confDict[KEY_LEXICAL_DICT_FILE_NAME.ToLower()], true);

            if (confDict.ContainsKey(KEY_RUN_RANKER_MODEL.ToLower()) == true)
            {
                bRunRankerModel = bool.Parse(confDict[KEY_RUN_RANKER_MODEL.ToLower()]);
            }
            if (bRunRankerModel == false)
            {
                return(true);
            }

            //Load punct dict
            if (confDict.ContainsKey(KEY_PUNCT_DICT_FILE_NAME.ToLower()) == true)
            {
                LoadPunctDict(confDict[KEY_PUNCT_DICT_FILE_NAME.ToLower()]);
            }
            else
            {
                Console.WriteLine("Failed to find key {0}", KEY_PUNCT_DICT_FILE_NAME);
                return(false);
            }


            //Load language model
            if (confDict.ContainsKey(KEY_LANGUAGE_MODEL_FILE_NAME.ToLower()) == true)
            {
                LoadLanguageModel(confDict[KEY_LANGUAGE_MODEL_FILE_NAME.ToLower()]);
            }
            else
            {
                Console.WriteLine("Failed to find key {0}", KEY_LANGUAGE_MODEL_FILE_NAME);
                return(false);
            }

            //Load term important level percent data
            if (confDict.ContainsKey(KEY_RANKPERCENT_FILE_NAME.ToLower()) == true)
            {
                LoadRankPercent(confDict[KEY_RANKPERCENT_FILE_NAME.ToLower()]);
            }
            else
            {
                Console.WriteLine("Failed to find key {0}", KEY_RANKPERCENT_FILE_NAME);
                return(false);
            }


            //Initialize feature set for ranking
            featureList = InitFeatureList();

            if (confDict.ContainsKey(KEY_RANKMODEL_FILE_NAME.ToLower()) == false)
            {
                Console.WriteLine("Failed to find key {0}", KEY_RANKMODEL_FILE_NAME);
                return(false);
            }

            if (confDict.ContainsKey(KEY_ACTIVEFEATURE_FILE_NAME.ToLower()) == false)
            {
                Console.WriteLine("Failed to find key {0}", KEY_ACTIVEFEATURE_FILE_NAME);
                return(false);
            }

            //Load ranking model
            LoadRankModel(confDict[KEY_RANKMODEL_FILE_NAME.ToLower()], confDict[KEY_ACTIVEFEATURE_FILE_NAME.ToLower()]);

            return(true);
        }
Beispiel #2
0
        bool Decode(CRFSharpWrapper.DecoderArgs options)
        {
            var parallelOption = new ParallelOptions();

            if (File.Exists(options.strInputFileName) == false)
            {
                Console.WriteLine("FAILED: Open {0} file failed.", options.strInputFileName);
                return false;
            }

            if (File.Exists(options.strModelFileName) == false)
            {
                Console.WriteLine("FAILED: Open {0} file failed.", options.strModelFileName);
                return false;
            }

            var sr = new StreamReader(options.strInputFileName);
            StreamWriter sw = null, swSeg = null;

            if (options.strOutputFileName != null && options.strOutputFileName.Length > 0)
            {
                sw = new StreamWriter(options.strOutputFileName);
            }
            if (options.strOutputSegFileName != null && options.strOutputSegFileName.Length > 0)
            {
                swSeg = new StreamWriter(options.strOutputSegFileName);
            }

            //Create CRFSharp wrapper instance. It's a global instance
            var crfWrapper = new CRFSharpWrapper.Decoder();
            //Load model from file
            if (crfWrapper.LoadModel(options.strModelFileName) == false)
            {
                return false;
            }

            var queueRecords = new ConcurrentQueue<List<List<string>>>();
            var queueSegRecords = new ConcurrentQueue<List<List<string>>>();

            parallelOption.MaxDegreeOfParallelism = options.thread;
            Parallel.For(0, options.thread, parallelOption, t =>
                {

                    //Create decoder tagger instance. If the running environment is multi-threads, each thread needs a separated instance
                    var tagger = crfWrapper.CreateTagger(options.nBest, options.maxword);
                    tagger.set_vlevel(options.probLevel);

                    //Initialize result
                    var crf_out = new crf_seg_out[options.nBest];
                    for (var i = 0; i < options.nBest; i++)
                    {
                        crf_out[i] = new crf_seg_out(tagger.crf_max_word_num);
                    }

                    var inbuf = new List<List<string>>();
                    while (true)
                    {
                        lock (rdLocker)
                        {
                            if (ReadRecord(inbuf, sr) == false)
                            {
                                break;
                            }

                            queueRecords.Enqueue(inbuf);
                            queueSegRecords.Enqueue(inbuf);
                        }

                        //Call CRFSharp wrapper to predict given string's tags
                        if (swSeg != null)
                        {
                            crfWrapper.Segment(crf_out, tagger, inbuf);
                        }
                        else
                        {
                            crfWrapper.Segment((crf_term_out[])crf_out, (DecoderTagger)tagger, inbuf);
                        }

                        List<List<string>> peek = null;
                        //Save segmented tagged result into file
                        if (swSeg != null)
                        {
                            var rstList = ConvertCRFTermOutToStringList(inbuf, crf_out);
                            while (peek != inbuf)
                            {
                                queueSegRecords.TryPeek(out peek);
                            }
                            for (int index = 0; index < rstList.Count; index++)
                            {
                                var item = rstList[index];
                                swSeg.WriteLine(item);
                            }
                            queueSegRecords.TryDequeue(out peek);
                            peek = null;
                        }

                        //Save raw tagged result (with probability) into file
                        if (sw != null)
                        {
                            while (peek != inbuf)
                            {
                                queueRecords.TryPeek(out peek);
                            }
                            OutputRawResultToFile(inbuf, crf_out, tagger, sw);
                            queueRecords.TryDequeue(out peek);

                        }
                    }
                });


            sr.Close();

            if (sw != null)
            {
                sw.Close();
            }
            if (swSeg != null)
            {
                swSeg.Close();
            }

            return true;
        }
        bool Decode(CRFSharpWrapper.DecoderArgs options)
        {
            var parallelOption = new ParallelOptions();
            var watch          = Stopwatch.StartNew();

            if (File.Exists(options.strInputFileName) == false)
            {
                Logger.WriteLine("FAILED: Open {0} file failed.", options.strInputFileName);
                return(false);
            }

            if (File.Exists(options.strModelFileName) == false)
            {
                Logger.WriteLine("FAILED: Open {0} file failed.", options.strModelFileName);
                return(false);
            }

            var          sr = new StreamReader(options.strInputFileName);
            StreamWriter sw = null, swSeg = null;

            if (options.strOutputFileName != null && options.strOutputFileName.Length > 0)
            {
                sw = new StreamWriter(options.strOutputFileName);
            }
            if (options.strOutputSegFileName != null && options.strOutputSegFileName.Length > 0)
            {
                swSeg = new StreamWriter(options.strOutputSegFileName);
            }

            //Create CRFSharp wrapper instance. It's a global instance
            var crfWrapper = new CRFSharpWrapper.Decoder();

            //Load encoded model from file
            Logger.WriteLine("Loading model from {0}", options.strModelFileName);
            crfWrapper.LoadModel(options.strModelFileName);

            var queueRecords    = new ConcurrentQueue <List <List <string> > >();
            var queueSegRecords = new ConcurrentQueue <List <List <string> > >();

            parallelOption.MaxDegreeOfParallelism = options.thread;
            Parallel.For(0, options.thread, parallelOption, t =>
            {
                //Create decoder tagger instance. If the running environment is multi-threads, each thread needs a separated instance
                var tagger = crfWrapper.CreateTagger(options.nBest, options.maxword);
                tagger.set_vlevel(options.probLevel);

                //Initialize result
                var crf_out = new crf_seg_out[options.nBest];
                for (var i = 0; i < options.nBest; i++)
                {
                    crf_out[i] = new crf_seg_out(tagger.crf_max_word_num);
                }

                var inbuf = new List <List <string> >();
                while (true)
                {
                    lock (rdLocker)
                    {
                        if (ReadRecord(inbuf, sr) == false)
                        {
                            break;
                        }

                        queueRecords.Enqueue(inbuf);
                        queueSegRecords.Enqueue(inbuf);
                    }

                    //Call CRFSharp wrapper to predict given string's tags
                    if (swSeg != null)
                    {
                        crfWrapper.Segment(crf_out, tagger, inbuf);
                    }
                    else
                    {
                        crfWrapper.Segment((crf_term_out[])crf_out, (DecoderTagger)tagger, inbuf);
                    }

                    List <List <string> > peek = null;
                    //Save segmented tagged result into file
                    if (swSeg != null)
                    {
                        var rstList = ConvertCRFTermOutToStringList(inbuf, crf_out);
                        while (peek != inbuf)
                        {
                            queueSegRecords.TryPeek(out peek);
                        }
                        for (int index = 0; index < rstList.Count; index++)
                        {
                            var item = rstList[index];
                            swSeg.WriteLine(item);
                        }
                        queueSegRecords.TryDequeue(out peek);
                        peek = null;
                    }

                    //Save raw tagged result (with probability) into file
                    if (sw != null)
                    {
                        while (peek != inbuf)
                        {
                            queueRecords.TryPeek(out peek);
                        }
                        OutputRawResultToFile(inbuf, crf_out, tagger, sw);
                        queueRecords.TryDequeue(out peek);
                    }
                }
            });


            sr.Close();

            if (sw != null)
            {
                sw.Close();
            }
            if (swSeg != null)
            {
                swSeg.Close();
            }
            watch.Stop();
            Logger.WriteLine("Elapsed: {0} ms", watch.ElapsedMilliseconds);

            return(true);
        }
Beispiel #4
0
        bool Decode(CRFSharpWrapper.DecoderArgs options)
        {
            ParallelOptions parallelOption = new ParallelOptions();

            if (File.Exists(options.strInputFileName) == false)
            {
                Console.WriteLine("FAILED: Open {0} file failed.", options.strInputFileName);
                return(false);
            }

            if (File.Exists(options.strModelFileName) == false)
            {
                Console.WriteLine("FAILED: Open {0} file failed.", options.strModelFileName);
                return(false);
            }

            StreamReader sr = new StreamReader(options.strInputFileName);
            StreamWriter sw = null, swSeg = null;

            if (options.strOutputFileName != null && options.strOutputFileName.Length > 0)
            {
                sw = new StreamWriter(options.strOutputFileName);
            }
            if (options.strOutputSegFileName != null && options.strOutputSegFileName.Length > 0)
            {
                swSeg = new StreamWriter(options.strOutputSegFileName);
            }

            //Create CRFSharp wrapper instance. It's a global instance
            CRFSharpWrapper.Decoder crfWrapper = new CRFSharpWrapper.Decoder();
            //Load model from file
            if (crfWrapper.LoadModel(options.strModelFileName) == false)
            {
                return(false);
            }

            ConcurrentQueue <List <List <string> > > queueRecords = new ConcurrentQueue <List <List <string> > >();
            ConcurrentQueue <List <List <string> > > queueSegRecords = new ConcurrentQueue <List <List <string> > >();

            parallelOption.MaxDegreeOfParallelism = options.thread;
            Parallel.For(0, options.thread, parallelOption, t =>
            {
                //Create decoder tagger instance. If the running environment is multi-threads, each thread needs a separated instance
                SegDecoderTagger tagger = crfWrapper.CreateTagger(options.nBest);
                tagger.set_vlevel(options.probLevel);

                //Initialize result
                crf_seg_out[] crf_out = new crf_seg_out[options.nBest];
                for (int i = 0; i < options.nBest; i++)
                {
                    crf_out[i] = new crf_seg_out();
                }

                List <List <string> > inbuf = new List <List <string> >();
                while (true)
                {
                    lock (rdLocker)
                    {
                        if (ReadRecord(inbuf, sr) == false)
                        {
                            break;
                        }

                        queueRecords.Enqueue(inbuf);
                        queueSegRecords.Enqueue(inbuf);
                    }

                    //Call CRFSharp wrapper to predict given string's tags
                    if (swSeg != null)
                    {
                        crfWrapper.Segment(crf_out, tagger, inbuf);
                    }
                    else
                    {
                        crfWrapper.Segment((crf_term_out[])crf_out, (DecoderTagger)tagger, inbuf);
                    }

                    List <List <string> > peek = null;
                    //Save segmented tagged result into file
                    if (swSeg != null)
                    {
                        List <string> rstList = ConvertCRFTermOutToStringList(inbuf, crf_out);
                        while (peek != inbuf)
                        {
                            queueSegRecords.TryPeek(out peek);
                        }
                        foreach (string item in rstList)
                        {
                            swSeg.WriteLine(item);
                        }
                        queueSegRecords.TryDequeue(out peek);
                        peek = null;
                    }

                    //Save raw tagged result (with probability) into file
                    if (sw != null)
                    {
                        while (peek != inbuf)
                        {
                            queueRecords.TryPeek(out peek);
                        }
                        OutputRawResultToFile(inbuf, crf_out, tagger, sw);
                        queueRecords.TryDequeue(out peek);
                    }
                }
            });


            sr.Close();

            if (sw != null)
            {
                sw.Close();
            }
            if (swSeg != null)
            {
                swSeg.Close();
            }

            return(true);
        }