예제 #1
0
        public virtual bool LoadFromBin(ByteArray ba)
        {
            if (ba == null)
            {
                return(false);
            }
            try
            {
                var size = ba.NextInt_HighFirst();          // 使用Model目标的数据文件均与相应的原数据文件兼容,所以这里只讨论高位在前的情况
                id2tag = new string[size];
                tag2id = new Dictionary <string, int>(size);

                for (int i = 0; i < size; i++)
                {
                    id2tag[i]         = ba.NextUTFStr(true);
                    tag2id[id2tag[i]] = i;
                }

                var ffs = new FeatureFunction[ba.NextInt_HighFirst()];
                for (int i = 0; i < ffs.Length; i++)
                {
                    ffs[i] = new FeatureFunction();
                    ffs[i].Load(ba);
                }

                _ffTrie.Load(ba, ffs, true);
                size    = ba.NextInt_HighFirst();
                _ftList = new List <FeatureTemplate>(size);
                for (int i = 0; i < size; i++)
                {
                    var ft = new FeatureTemplate();
                    ft.Load(ba);
                    _ftList.Add(ft);
                }
                size = ba.NextInt_HighFirst();
                if (size == 0)
                {
                    return(true);
                }

                _matrix = new double[size][];
                for (int i = 0; i < size; i++)
                {
                    _matrix[i] = new double[size];
                    for (int j = 0; j < size; j++)
                    {
                        _matrix[i][j] = ba.NextDouble_HighFirst();
                    }
                }
                return(true);
            }
            catch (Exception e)
            {
                return(false);
            }
        }
예제 #2
0
        public virtual void LoadFromTxt(string path)
        {
            //if (Load(ByteArray.Create(path + Predefine.BIN_EXT))) return;

            var lines = File.ReadAllLines(path);

            // print lines[0];          // version
            // print lines[1];          // cost-factor
            // print lines[2];          // "maxid:xx"
            // print lines[3];          // xsize
            // lines[4]: blank line

            tag2id = new Dictionary <string, int>();
            int    i    = 5;
            string line = null;

            for (; i < lines.Length; i++)
            {
                line = lines[i];
                if (string.IsNullOrEmpty(line))
                {
                    break;
                }
                tag2id[line] = i - 5;
            }
            int size = tag2id.Count;

            id2tag = new string[size];      //! id2tag的元素赋值为什么不放入上一个循环中? tag2id.Count必然比keyvaluepair.value最大值大1,否则下面给id2tag元素赋值会出错
            foreach (var p in tag2id)
            {
                id2tag[p.Value] = p.Key;
            }

            var ffMap  = new SortedDictionary <string, FeatureFunction>(StrComparer.Default);
            var ffList = new List <FeatureFunction>();

            _ftList = new List <FeatureTemplate>();
            i++;                                    // 跳过当前的空白行
            for (; i < lines.Length; i++)
            {
                line = lines[i];
                if (string.IsNullOrEmpty(line))
                {
                    break;
                }
                if ("B" != line)
                {
                    _ftList.Add(FeatureTemplate.Create(line));
                }
                else
                {
                    _matrix = new double[size][];               //? 第一次遇到"B"行,表示接来下是_matrix数据信息,于是,初始化_matrix方阵
                }
            }
            i++;                                    // 跳过当前空白行
            if (_matrix != null)
            {
                i++;                                // 如果提供了_matrix数据信息,则接下来一行为 0 B,跳过
            }
            for (; i < lines.Length; i++)
            {
                line = lines[i];
                if (string.IsNullOrEmpty(line))
                {
                    break;
                }
                var args  = line.Split(new[] { ' ' }, 2);
                var chars = args[1].ToCharArray();
                var ff    = new FeatureFunction(chars, size);
                ffMap[args[1]] = ff;
                ffList.Add(ff);
            }
            i++;                                    // 跳过当前空白行
            if (_matrix != null)
            {
                for (int k = 0; k < size; k++)
                {
                    _matrix[k] = new double[size];
                    for (int j = 0; j < size; j++)
                    {
                        _matrix[k][j] = double.Parse(lines[i++]);
                    }
                }
            }

            for (int k = 0; k < ffList.Count; k++)
            {
                var ff = ffList[k];
                for (int j = 0; j < size; j++)
                {
                    ff.w[j] = double.Parse(lines[i++]);
                }
            }
            if (i < lines.Length)
            {
                // log  "文本读取有残留,可能出现问题"
            }
            _ffTrie.Build(ffMap);

            // 缓存bin数据文件
            var fs = new FileStream(path + Predefine.BIN_EXT, FileMode.Create, FileAccess.Write);

            try
            {
                Save(fs);
                fs.Close();
            }
            catch (Exception e)
            {
                // log e
                fs.Close();
                File.Delete(path + Predefine.BIN_EXT);
            }
            OnLoadTxtFinished();
        }