public static ErrorSet Compile(string crfModelDir, Stream outputStream, Collection<string> addedFileNames, Language lang) { if (string.IsNullOrEmpty(crfModelDir)) { throw new ArgumentNullException("crfModelDir"); } if (outputStream == null) { throw new ArgumentNullException("outputStream"); } if (addedFileNames == null) { throw new ArgumentNullException("addedFileNames"); } ErrorSet errorSet = new ErrorSet(); if (!Directory.Exists(crfModelDir)) { errorSet.Add(CrfModelCompilerError.DataFolderNotFound, crfModelDir); } else { BinaryWriter bw = new BinaryWriter(outputStream, Encoding.Unicode); List<byte[]> crfModels = new List<byte[]>(); List<string> crfTags = new List<string>(); Dictionary<string, string> localizedNameMapping = null; // if language = zh-cn or ja-jp, we should use their localized name as crf model tag. if (lang == Language.ZhCN || lang == Language.JaJP) { string crfModelNameMappingFile = Path.Combine(new DirectoryInfo(crfModelDir).Parent.FullName, "CRFLocalizedMapping.txt"); if (File.Exists(crfModelNameMappingFile)) { // If the mapping file is existed, load it. localizedNameMapping = LocalizeCRFModelName(crfModelNameMappingFile, errorSet); } else { errorSet.Add(CrfModelCompilerError.MappingFileNotFound, crfModelNameMappingFile); } } string[] crfModelFileNames = Directory.GetFiles(crfModelDir, "*.crf", SearchOption.TopDirectoryOnly); foreach (string crfModelFileName in crfModelFileNames) { if (localizedNameMapping != null) { // If mapping is existed, replace the crfTag name. string crfModelName = Path.GetFileName(crfModelFileName); if (localizedNameMapping.ContainsKey(crfModelName)) { crfTags.Add(localizedNameMapping[crfModelName].ToUpper()); // case insensitive } } else { crfTags.Add(Path.GetFileNameWithoutExtension(crfModelFileName).ToUpper()); // case insensitive } using (FileStream fs = new FileStream(crfModelFileName, FileMode.Open, FileAccess.Read)) using (BinaryReader br = new BinaryReader(fs)) { crfModels.Add(br.ReadBytes((int)fs.Length)); } addedFileNames.Add(crfModelFileName); } using (StringPool crfModelSp = new StringPool()) { using (StringPool crfTagSp = new StringPool()) { Collection<int> crfModelsOffsets = new Collection<int>(); Collection<int> crfTagsOffsets = new Collection<int>(); // Add models to StringPool foreach (byte[] model in crfModels) { crfModelsOffsets.Add(crfModelSp.PutBuffer(model)); } // Add tags to StringPool StringPool.WordsToStringPool(crfTags, crfTagSp, crfTagsOffsets); uint tagOffset = 0; uint modelOffset = 0; bw.Write(tagOffset); bw.Write(modelOffset); bw.Write((uint)crfTagsOffsets.Count); for (int i = 0; i < crfModelsOffsets.Count; i++) { uint offset = (uint)crfModelsOffsets[i]; if ((offset % 4) != 0) { errorSet.Add(CrfModelCompilerError.InvalidCrfModel, crfModelFileNames[i]); } bw.Write(offset); } foreach (int offset in crfTagsOffsets) { bw.Write((uint)offset); } modelOffset = (uint)bw.BaseStream.Position; Debug.Assert((modelOffset % 4) == 0); byte[] crfModelPool = crfModelSp.ToArray(); bw.Write(crfModelPool, 0, crfModelPool.Length); tagOffset = (uint)bw.BaseStream.Position; Debug.Assert((tagOffset % 4) == 0); byte[] crfTagPool = crfTagSp.ToArray(); bw.Write(crfTagPool, 0, crfTagPool.Length); bw.Flush(); // Update offset value. bw.Seek(0, SeekOrigin.Begin); bw.Write(tagOffset); bw.Write(modelOffset); bw.Flush(); } } } return errorSet; }