private void SaveIndexTableItem(BinaryWriter writer, IndexTableItem item) { writer.Write(item.nCount); for (int i = 0; i < item.nCount; i++) { SaveWordItem(writer, item.WordItems[i]); } }
//==================================================================== // Func Name : Load // Description: Load the dictionary from the file .dct // Parameters : sFilename: the file name // Returns : success or fail //==================================================================== public bool Load(string sFilename, bool bReset) { int frequency, wordLength, pos; //频率、词长、读取词性 bool isSuccess = true; FileStream fileStream = null; BinaryReader binReader = null; try { fileStream = new FileStream(sFilename, FileMode.Open, FileAccess.Read); if (fileStream == null) { return(false); } binReader = new BinaryReader(fileStream, Encoding.GetEncoding("gb2312")); indexTable = new IndexTableItem[Predefine.CC_NUM]; bReleased = false; for (int i = 0; i < Predefine.CC_NUM; i++) { //读取以该汉字打头的词有多少个 indexTable[i] = new IndexTableItem(); indexTable[i].nCount = binReader.ReadInt32(); if (indexTable[i].nCount <= 0) { continue; } indexTable[i].WordItems = new WordItem[indexTable[i].nCount]; for (int j = 0; j < indexTable[i].nCount; j++) { indexTable[i].WordItems[j] = new WordItem(); frequency = binReader.ReadInt32(); //读取频率 wordLength = binReader.ReadInt32(); //读取词长 pos = binReader.ReadInt32(); //读取词性 if (wordLength > 0) { indexTable[i].WordItems[j].sWord = Utility.ByteArray2String(binReader.ReadBytes(wordLength)); } else { indexTable[i].WordItems[j].sWord = ""; } //Reset the frequency if (bReset) { indexTable[i].WordItems[j].nFrequency = 0; } else { indexTable[i].WordItems[j].nFrequency = frequency; } indexTable[i].WordItems[j].nWordLen = wordLength; indexTable[i].WordItems[j].nPOS = pos; } } } catch (Exception e) { Console.WriteLine(e.Message); isSuccess = false; } finally { if (binReader != null) { binReader.Close(); } if (fileStream != null) { fileStream.Close(); } } return(isSuccess); }
private void MergeAndSaveIndexTableItem(BinaryWriter writer, IndexTableItem item, ModifyTableItem modifyItem) { int j, nCount; //频率、词长、读取词性 WordChain pCur; //计算修改后有效词块的数目 nCount = item.nCount + modifyItem.nCount - modifyItem.nDelete; writer.Write(nCount); pCur = modifyItem.pWordItemHead; j = 0; //对原表中的词块和修改表中的词块进行遍历,并把修改后的添加到原表中 while (pCur != null && j < item.nCount) { //如果修改表中的词小于原表中对应位置的词或者长度相等但nHandle值比原表中的小,则把修改表中的写入到词典文件当中. if (Utility.CCStringCompare(pCur.data.sWord, item.WordItems[j].sWord) < 0 || ((pCur.data.sWord == item.WordItems[j].sWord) && (pCur.data.nPOS < item.WordItems[j].nPOS))) { //Output the modified data to the file SaveWordItem(writer, pCur.data); pCur = pCur.next; } //频度nFrequecy等于-1说明该词已被删除,跳过它 else if (item.WordItems[j].nFrequency == -1) { j++; } //如果修改表中的词长度比原表中的长度大或 长度相等但句柄值要多,就把原表的词写入的词典文件中 else if (Utility.CCStringCompare(pCur.data.sWord, item.WordItems[j].sWord) > 0 || ((pCur.data.sWord == item.WordItems[j].sWord) && (pCur.data.nPOS > item.WordItems[j].nPOS))) { //Output the index table data to the file SaveWordItem(writer, item.WordItems[j]); j++; } } //如果归并结束后indexTable有剩余,则继续写完indexTable中的数据 if (j < item.nCount) { for (int i = j; i < item.nCount; i++) { if (item.WordItems[j].nFrequency != -1) { SaveWordItem(writer, item.WordItems[i]); } } } //否则继续写完modifyTable中的数据 else { while (pCur != null) { //Output the modified data to the file SaveWordItem(writer, pCur.data); pCur = pCur.next; } } }
//==================================================================== // Func Name : Load // Description: Load the dictionary from the file .dct // Parameters : sFilename: the file name // Returns : success or fail //==================================================================== public bool Load(string sFilename, bool bReset) { int frequency, wordLength, pos; //频率、词长、读取词性 bool isSuccess = true; FileStream fileStream = null; BinaryReader binReader = null; try { fileStream = new FileStream(sFilename, FileMode.Open, FileAccess.Read); if (fileStream == null) return false; binReader = new BinaryReader(fileStream, Encoding.GetEncoding("gb2312")); indexTable = new IndexTableItem[Predefine.CC_NUM]; bReleased = false; for (int i = 0; i < Predefine.CC_NUM; i++) { //读取以该汉字打头的词有多少个 indexTable[i] = new IndexTableItem(); indexTable[i].nCount = binReader.ReadInt32(); if (indexTable[i].nCount <= 0) continue; indexTable[i].WordItems = new WordItem[indexTable[i].nCount]; for (int j = 0; j < indexTable[i].nCount; j++) { indexTable[i].WordItems[j] = new WordItem(); frequency = binReader.ReadInt32(); //读取频率 wordLength = binReader.ReadInt32(); //读取词长 pos = binReader.ReadInt32(); //读取词性 if (wordLength > 0) indexTable[i].WordItems[j].sWord = Utility.ByteArray2String(binReader.ReadBytes(wordLength)); else indexTable[i].WordItems[j].sWord = ""; //Reset the frequency if (bReset) indexTable[i].WordItems[j].nFrequency = 0; else indexTable[i].WordItems[j].nFrequency = frequency; indexTable[i].WordItems[j].nWordLen = wordLength; indexTable[i].WordItems[j].nPOS = pos; } } } catch (Exception e) { Console.WriteLine(e.Message); isSuccess = false; } finally { if (binReader != null) binReader.Close(); if (fileStream != null) fileStream.Close(); } return isSuccess; }
private void SaveIndexTableItem(BinaryWriter writer, IndexTableItem item) { writer.Write(item.nCount); for (int i = 0; i < item.nCount; i++) SaveWordItem(writer, item.WordItems[i]); }
private void MergeAndSaveIndexTableItem(BinaryWriter writer, IndexTableItem item, ModifyTableItem modifyItem) { int j, nCount; //频率、词长、读取词性 WordChain pCur; //计算修改后有效词块的数目 nCount = item.nCount + modifyItem.nCount - modifyItem.nDelete; writer.Write(nCount); pCur = modifyItem.pWordItemHead; j = 0; //对原表中的词块和修改表中的词块进行遍历,并把修改后的添加到原表中 while (pCur != null && j < item.nCount) { //如果修改表中的词小于原表中对应位置的词或者长度相等但nHandle值比原表中的小,则把修改表中的写入到词典文件当中. if (Utility.CCStringCompare(pCur.data.sWord, item.WordItems[j].sWord) < 0 || ((pCur.data.sWord == item.WordItems[j].sWord) && (pCur.data.nPOS < item.WordItems[j].nPOS))) { //Output the modified data to the file SaveWordItem(writer, pCur.data); pCur = pCur.next; } //频度nFrequecy等于-1说明该词已被删除,跳过它 else if (item.WordItems[j].nFrequency == -1) j++; //如果修改表中的词长度比原表中的长度大或 长度相等但句柄值要多,就把原表的词写入的词典文件中 else if (Utility.CCStringCompare(pCur.data.sWord, item.WordItems[j].sWord) > 0 || ((pCur.data.sWord == item.WordItems[j].sWord) && (pCur.data.nPOS > item.WordItems[j].nPOS))) { //Output the index table data to the file SaveWordItem(writer, item.WordItems[j]); j++; } } //如果归并结束后indexTable有剩余,则继续写完indexTable中的数据 if (j < item.nCount) { for (int i = j; i < item.nCount; i++) if (item.WordItems[j].nFrequency != -1) SaveWordItem(writer, item.WordItems[i]); } //否则继续写完modifyTable中的数据 else while (pCur != null) { //Output the modified data to the file SaveWordItem(writer, pCur.data); pCur = pCur.next; } }
private void MergeAndSaveIndexTableItem(BinaryWriter writer, IndexTableItem item, ModifyTableItem modifyItem) { int j, nCount; //Ƶ�ʡ��ʳ�����ȡ���� WordChain pCur; //�����ĺ���Ч�ʿ����Ŀ nCount = item.nCount + modifyItem.nCount - modifyItem.nDelete; writer.Write(nCount); pCur = modifyItem.pWordItemHead; j = 0; //��ԭ���еĴʿ���ı��еĴʿ���б���,�����ĺ����ӵ�ԭ���� while (pCur != null && j < item.nCount) { //����ı��еĴ�С��ԭ���ж�Ӧλ�õĴʻ��߳�����ȵ�nHandleֵ��ԭ���е�С,����ı��е�д�뵽�ʵ��ļ�����. if (Utility.CCStringCompare(pCur.data.sWord, item.WordItems[j].sWord) < 0 || ((pCur.data.sWord == item.WordItems[j].sWord) && (pCur.data.nPOS < item.WordItems[j].nPOS))) { //Output the modified data to the file SaveWordItem(writer, pCur.data); pCur = pCur.next; } //Ƶ��nFrequecy����-1˵���ô��ѱ�ɾ��,������ else if (item.WordItems[j].nFrequency == -1) j++; //����ı��еĴʳ��ȱ�ԭ���еij��ȴ�� ������ȵ����ֵҪ��,�Ͱ�ԭ��Ĵ�д��Ĵʵ��ļ��� else if (Utility.CCStringCompare(pCur.data.sWord, item.WordItems[j].sWord) > 0 || ((pCur.data.sWord == item.WordItems[j].sWord) && (pCur.data.nPOS > item.WordItems[j].nPOS))) { //Output the index table data to the file SaveWordItem(writer, item.WordItems[j]); j++; } } //����鲢������indexTable��ʣ�࣬�����д��indexTable�е����� if (j < item.nCount) { for (int i = j; i < item.nCount; i++) if (item.WordItems[j].nFrequency != -1) SaveWordItem(writer, item.WordItems[i]); } //�������д��modifyTable�е����� else while (pCur != null) { //Output the modified data to the file SaveWordItem(writer, pCur.data); pCur = pCur.next; } }