Ejemplo n.º 1
0
 /// <summary>
 /// 是否存在指定前缀的词性
 /// </summary>
 /// <param name="prefix"></param>
 /// <returns></returns>
 public bool PrefixMatch(string prefix)
 {
     for (int i = 0; i < natures.Length; i++)
     {
         if (NatureHelper.StartsWith(natures[i], prefix))
         {
             return(true);
         }
     }
     return(false);
 }
Ejemplo n.º 2
0
        private static bool Load(string path, Nature defNat, SortedDictionary <string, WordAttr> dict)
        {
            try
            {
                var splitter = new[] { ' ', '\t' };
                if (path.EndsWith(".csv"))
                {
                    splitter = new[] { ',' }
                }
                ;

                foreach (var line in File.ReadLines(path))
                {
                    var segs = line.Split(splitter);
                    if (segs.Length == 0)
                    {
                        continue;
                    }

                    if (Config.NormalizeChar)
                    {
                        segs[0] = CharTable.Convert(segs[0]);
                    }

                    var      natCount = (segs.Length - 1) / 2;
                    WordAttr attr;
                    if (natCount == 0)
                    {
                        attr = new WordAttr(defNat);
                    }
                    else
                    {
                        attr = new WordAttr(natCount);
                        for (int i = 0; i < natCount; i++)
                        {
                            attr.natures[i] = NatureHelper.GetOrCreate(segs[1 + (i << 1)]);
                            attr.freqs[i]   = int.Parse(segs[(i + 1) << 1]);
                            attr.totalFreq += attr.freqs[i];
                        }
                    }
                    dict[segs[0]] = attr;
                }
                return(true);
            }
            catch (Exception e)
            {
                return(false);
            }
        }
Ejemplo n.º 3
0
        private static bool Load()
        {
            if (LoadDat(Config.Custom_Dict_Path[0]))
            {
                return(true);
            }

            dat = new DoubleArrayTrie <WordAttr>();

            var dict = new SortedDictionary <string, WordAttr>(StrComparer.Default);

            try
            {
                for (var i = 0; i < Config.Custom_Dict_Path.Length; i++)
                {
                    var p        = Config.Custom_Dict_Path[i];  // 当前自定义词典文件路径
                    var defNat   = Nature.n;
                    int spaceIdx = p.IndexOf(' ');
                    if (spaceIdx > 0)
                    {
                        // 有默认词性
                        var nat = p.Substring(spaceIdx + 1);    // 空格之后为词性
                        p      = p.Substring(0, spaceIdx);      //
                        defNat = NatureHelper.GetOrCreate(nat);
                    }
                    Load(p, defNat, dict);
                    //bool success =
                    //if(!success)
                    // log warning "loading file failed: " + p
                }
                if (dict.Count == 0)
                {
                    // log warning "no items loaded"
                    dict[Constants.TAG_OTHER] = null;   // 当作空白占位符
                }

                dat.Build(dict);

                SaveDat(Config.Custom_Dict_Path[0], dict);
                return(true);
            }
            catch (Exception e)
            {
                return(false);
            }
        }
Ejemplo n.º 4
0
        private static bool LoadDat(string path)
        {
            try
            {
                var ba = ByteArray.Create(path + Predefine.BIN_EXT);
                if (ba == null)
                {
                    return(false);
                }

                int size = ba.NextInt();
                if (size < 0)    // 一种兼容措施,当Size小于零表示文件头存储了-Size个用户词性
                {
                    while (size < 0)
                    {
                        var customNat = ba.NextString();
                        NatureHelper.GetOrCreate(customNat);    // register user-defined nature
                        size++;
                    }
                    size = ba.NextInt();
                }
                var attrs = new WordAttr[size];

                for (int i = 0; i < size; i++)       // 加载values
                {
                    var totalFreq = ba.NextInt();
                    var len       = ba.NextInt();
                    attrs[i]           = new WordAttr(len);
                    attrs[i].totalFreq = totalFreq;
                    for (int j = 0; j < len; j++)
                    {
                        attrs[i].natures[j] = (Nature)ba.NextInt();
                        attrs[i].freqs[j]   = ba.NextInt();
                    }
                }
                return(dat.Load(ba, attrs));     // 加载keys
            }
            catch (Exception e)
            {
                return(false);
            }
        }
Ejemplo n.º 5
0
        public static Attribute Create(string natWithFreq)
        {
            try
            {
                var segs     = natWithFreq.Split(' ');
                int natCount = segs.Length / 2;

                var attr = new Attribute(natCount);
                for (int i = 0; i < natCount; i++)
                {
                    attr.natures[i] = NatureHelper.GetOrCreate(segs[i << 1]);  //LexiconUtil.Str2Nat(segs[2 * i], null);
                    attr.freqs[i]   = int.Parse(segs[2 * i + 1]);
                    attr.totalFreq += attr.freqs[i];
                }
                return(attr);
            }
            catch (Exception e)
            {
                // log warning "creating Attribute failed";
                return(null);
            }
        }