private static void CPPictclas() { if (!ICTCLAS_Init(null)) { System.Console.WriteLine("Init ICTCLAS failed!"); System.Console.Read(); } StreamReader sr = null; StreamWriter sw = new StreamWriter(outDir+".txt",false,System.Text.Encoding.Default); Stopwatch sp = new Stopwatch(); sp.Start(); int nWrdcnt; //对fileList中的每个文件进行分词,结果写入文本文件 for (int i = 0; i < fileList.Count; i += 1) { sr = new StreamReader(fileList[i], System.Text.Encoding.Default); string input = ""; input = sr.ReadLine(); result_t []result; while (input != null) { if (input == "") { input = sr.ReadLine(); continue; } try { result = new result_t[input.Length]; nWrdcnt = ICTCLAS_ParagraphProcessAW(input, result, eCodeType.CODE_TYPE_GB, 1); } catch (Exception e) { Console.WriteLine(e.Message); continue; } byte[] mybyte = System.Text.Encoding.Default.GetBytes(input); byte[] byteWord = new byte[1] ; for (int j = 0; j < nWrdcnt; ++j) { try { byteWord = new byte[result[j].length]; } catch (Exception e) { Console.WriteLine(e.Message); } Array.Copy(mybyte, result[j].start, byteWord, 0, result[j].length); string watch = System.Text.Encoding.Default.GetString(byteWord); if (watch == " ") continue; sw.Write(System.Text.Encoding.Default.GetString(byteWord)+" "); } sw.WriteLine(""); input = sr.ReadLine(); } sr.Close(); } sw.Close(); sp.Stop(); Console.WriteLine("ICTCLAS:" + sp.ElapsedMilliseconds + "ms" + " Word Segmentation Speed: " + (float)totalFileSizeInByte / 1024 / sp.ElapsedMilliseconds * 1000 + " KB/s"); ICTCLAS_Exit(); }
private static void CPPictclas() { if (!ICTCLAS_Init(null)) { System.Console.WriteLine("Init ICTCLAS failed!"); System.Console.Read(); } StreamReader sr = null; StreamWriter sw = new StreamWriter(outDir + ".txt", false, System.Text.Encoding.Default); Stopwatch sp = new Stopwatch(); sp.Start(); int nWrdcnt; //对fileList中的每个文件进行分词,结果写入文本文件 for (int i = 0; i < fileList.Count; i += 1) { sr = new StreamReader(fileList[i], System.Text.Encoding.Default); string input = ""; input = sr.ReadLine(); result_t [] result; while (input != null) { if (input == "") { input = sr.ReadLine(); continue; } try { result = new result_t[input.Length]; nWrdcnt = ICTCLAS_ParagraphProcessAW(input, result, eCodeType.CODE_TYPE_GB, 1); } catch (Exception e) { Console.WriteLine(e.Message); continue; } byte[] mybyte = System.Text.Encoding.Default.GetBytes(input); byte[] byteWord = new byte[1]; for (int j = 0; j < nWrdcnt; ++j) { try { byteWord = new byte[result[j].length]; } catch (Exception e) { Console.WriteLine(e.Message); } Array.Copy(mybyte, result[j].start, byteWord, 0, result[j].length); string watch = System.Text.Encoding.Default.GetString(byteWord); if (watch == " ") { continue; } sw.Write(System.Text.Encoding.Default.GetString(byteWord) + " "); } sw.WriteLine(""); input = sr.ReadLine(); } sr.Close(); } sw.Close(); sp.Stop(); Console.WriteLine("ICTCLAS:" + sp.ElapsedMilliseconds + "ms" + " Word Segmentation Speed: " + (float)totalFileSizeInByte / 1024 / sp.ElapsedMilliseconds * 1000 + " KB/s"); ICTCLAS_Exit(); }