Exemplo n.º 1
0
 /// <summary>
 ///     加载纠正词典
 /// </summary>
 private static void InitAmbiguityLibrary()
 {
     var ambiguityLibrary = MyStaticValue.AmbiguityLibrary;
     if (string.IsNullOrWhiteSpace(ambiguityLibrary))
     {
         MyStaticValue.Librarylog.Warn("init ambiguity  warning :" + ambiguityLibrary +
                                       " because : file not found or failed to read !");
         return;
     }
     ambiguityLibrary = MyStaticValue.AmbiguityLibrary;
     var file = new FileInfo(ambiguityLibrary);
     if (file.Exists)
     {
         try
         {
             AmbiguityForest = StaticLibrary.MakeForest(ambiguityLibrary);
         }
         catch (Exception e)
         {
             MyStaticValue.Librarylog.Warn("init ambiguity  error :" + new FileInfo(ambiguityLibrary).FullName +
                                           " because : not find that file or can not to read !");
             Trace.WriteLine(e);
         }
         MyStaticValue.Librarylog.Info("init ambiguityLibrary ok!");
     }
     else
     {
         MyStaticValue.Librarylog.Warn("init ambiguity  warning :" + new FileInfo(ambiguityLibrary).FullName +
                                       " because : file not found or failed to read !");
     }
 }
Exemplo n.º 2
0
        /// <summary>
        /// 传入value数组.构造树
        /// </summary>
        /// <param name="values"></param>
        /// <returns></returns>

        public static Forest MakeForest(List<Value> values)
        {
            var forest = new Forest();
            foreach (var value in values)
            {
                InsertWord(forest, value.ToString());
            }
            return forest;
        }
Exemplo n.º 3
0
        public void Test1()
        {

            // 增加新词,中间按照'\t'隔开
            UserDefineLibrary.InsertWord("ansj中文分词", "userDefine", 1000);
            var terms = ToAnalysis.Parse("我觉得Ansj中文分词是一个不错的系统!我是王婆!");
            foreach (var term in terms)
            {
                Debug.WriteLine(term.RealName, "增加新词");
            }

            // 删除词语,只能删除.用户自定义的词典.
            UserDefineLibrary.RemoveWord("ansj中文分词");
            terms = ToAnalysis.Parse("我觉得ansj中文分词是一个不错的系统!我是王婆!");
            foreach (var term in terms)
            {
                Debug.WriteLine(term.RealName, "删除用户自定义词典例子");
            }

            // 歧义词
            var value = new Value("济南下车", "济南", "n", "下车", "v");
            terms = ToAnalysis.Parse("我经济南下车到广州.中国经济南下势头迅猛!");
            foreach (var term in terms)
            {
                Debug.WriteLine(term.RealName, "歧义词1");
            }
            StaticLibrary.InsertWord(UserDefineLibrary.AmbiguityForest, value);
            terms = ToAnalysis.Parse("我经济南下车到广州.中国经济南下势头迅猛!");
            foreach (var term in terms)
            {
                Debug.WriteLine(term.RealName, "歧义词2");
            }

            // 多用户词典
            var str = "神探夏洛克这部电影作者.是一个dota迷";
            terms = ToAnalysis.Parse(str);
            foreach (var term in terms)
            {
                Debug.WriteLine(term.RealName, "多用户词典1");
            }
            // 两个词汇 神探夏洛克 douta迷
            var dic1 = new Forest();
            StaticLibrary.InsertWord(dic1, new Value("神探夏洛克", "define", "1000"));
            var dic2 = new Forest();
            StaticLibrary.InsertWord(dic2, new Value("dota迷", "define", "1000"));
            terms = ToAnalysis.Parse(str, dic1, dic2);
            foreach (var term in terms)
            {
                Debug.WriteLine(term.RealName, "多用户词典2");
            }
        }
Exemplo n.º 4
0
        /// <summary>
        /// 词典树的构造方法
        /// </summary>
        /// <param name="br"></param>
        /// <param name="forest"></param>
        /// <returns></returns>

        private static Forest MakeLibrary(StreamReader br, Forest forest)
        {
            if (br == null) return forest;
            try
            {
                string temp = null;
                while ((temp = br.ReadLine()) != null)
                {
                    InsertWord(forest, temp);
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
            finally
            {
                br.Close();
            }
            return forest;
        }
Exemplo n.º 5
0
 /// <summary>
 ///     加载用户自定义词典和补充词典
 /// </summary>
 private static void InitUserLibrary()
 {
     try
     {
         Forest = new Forest();
         // 加载用户自定义词典
         var userLibrary = MyStaticValue.UserLibrary;
         LoadLibrary(Forest, userLibrary);
     }
     catch (Exception e)
     {
         Trace.WriteLine(e);
     }
 }
Exemplo n.º 6
0
 public static string[] GetParams(Forest forest, string word)
 {
     IWoodInterface temp = forest;
     for (var i = 0; i < word.Length; i++)
     {
         temp = temp.Get(word[i]);
         if (temp == null)
         {
             return null;
         }
     }
     if (temp.Status > 1)
     {
         return temp.Param;
     }
     return null;
 }
Exemplo n.º 7
0
 /// <summary>
 ///     加载词典,传入一本词典的路径.或者目录.词典后缀必须为.dic
 /// </summary>
 /// <param name="forest"></param>
 /// <param name="path"></param>
 public static void LoadLibrary(Forest forest, string path)
 {
     // 加载用户自定义词典
     if (path != null)
     {
         path = "Resources/" + path;
         var file = new FileInfo(path);
         if (!File.Exists(path) && !Directory.Exists(path))
         {
             MyStaticValue.Librarylog.Warn("init userLibrary  warning :" + file.FullName +
                                           " because : file not found or failed to read !");
             return;
         }
         if (file.Exists)
         {
             LoadFile(forest, file);
         }
         else if (Directory.Exists(path))
         {
             var files = new DirectoryInfo(path).GetFiles();
             for (var i = 0; i < files.Length; i++)
             {
                 if (files[i].Name.Trim().EndsWith(".dic"))
                 {
                     LoadFile(forest, files[i]);
                 }
             }
         }
         else
         {
             MyStaticValue.Librarylog.Warn("init user library  error :" + path +
                                           " because : not find that file !");
         }
     }
 }
Exemplo n.º 8
0
        /// <summary>
        ///     单个文件加载词典
        /// </summary>
        /// <param name="forest"></param>
        /// <param name="file"></param>
        public static void LoadFile(Forest forest, FileInfo file)
        {
            if (!file.Exists)
            {
                MyStaticValue.Librarylog.Warn("file in path " + file.FullName + " can not to read!");
                return;
            }
            TextReader br = null;
            try
            {
                br = IOUtil.GetReader(new FileStream(file.FullName, FileMode.Open, FileAccess.Read, FileShare.Read),
                    Encoding.UTF8);
                string temp;
                while ((temp = br.ReadLine()) != null)
                {
                    if (string.IsNullOrWhiteSpace(temp))
                    {
                    }
                    var strs = temp.Split('\t');

                    strs[0] = strs[0].ToLower();

                    // 如何核心辞典存在那么就放弃
                    if (MyStaticValue.IsSkipUserDefine && DatDictionary.GetId(strs[0]) > 0)
                    {
                        continue;
                    }

                    Value value;
                    if (strs.Length != 3)
                    {
                        value = new Value(strs[0], DefaultNature, DefaultFreqStr);
                    }
                    else
                    {
                        value = new Value(strs[0], strs[1], strs[2]);
                    }
                    StaticLibrary.InsertWord(forest, value);
                }
                MyStaticValue.Librarylog.Info("init user userLibrary ok path is : " + file.FullName);
            }
            catch (Exception e)
            {
                Trace.WriteLine(e);
            }
            finally
            {
                IOUtil.Close(br);
            }
        }
Exemplo n.º 9
0
 public GetWord(Forest forest, char[] chars)
 {
     _chars = chars;
     _forest = forest;
     _branch = forest;
 }
Exemplo n.º 10
0
 public GetWord(Forest forest, string content)
 {
     _chars = content.ToCharArray();
     _forest = forest;
     _branch = forest;
 }
Exemplo n.º 11
0
 public static void InsertWord(Forest forest, Value value)
 {
     InsertWord(forest, value.Keyword, value.Paramers);
 }
Exemplo n.º 12
0
        /// <summary>
        /// 删除一个词
        /// </summary>
        /// <param name="forest"></param>
        /// <param name="word"></param>
        public static void RemoveWord(Forest forest, string word)
        {
            IWoodInterface branch = forest;
            var chars = word.ToCharArray();

            for (var i = 0; i < chars.Length; i++)
            {
                if (branch == null)
                    return;
                if (chars.Length == i + 1)
                {
                    branch.Add(new Branch(chars[i], -1, null));
                }
                branch = branch.Get(chars[i]);
            }
        }