Exemplo n.º 1
0
        private static void CPPictclas()
        {
            if (!ICTCLAS_Init(null))
            {
                System.Console.WriteLine("Init ICTCLAS failed!"); 
                System.Console.Read();
            }

            StreamReader sr = null;
            StreamWriter sw = new StreamWriter(outDir+".txt",false,System.Text.Encoding.Default);
            Stopwatch sp = new Stopwatch();
            sp.Start();
            int nWrdcnt;

            //对fileList中的每个文件进行分词,结果写入文本文件
            for (int i = 0; i < fileList.Count; i += 1)
            {
                sr = new StreamReader(fileList[i], System.Text.Encoding.Default);
                string input = "";
                input = sr.ReadLine();
                result_t []result;

                while (input != null)
                {
                    if (input == "")
                    {
                        input = sr.ReadLine();
                        continue;
                    }
                    try
                    {
                        result = new result_t[input.Length];                      
                        nWrdcnt = ICTCLAS_ParagraphProcessAW(input, result, eCodeType.CODE_TYPE_GB, 1);
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine(e.Message);
                        continue;
                    }
                    byte[] mybyte = System.Text.Encoding.Default.GetBytes(input);
                    byte[] byteWord = new byte[1] ;
                    for (int j = 0; j < nWrdcnt; ++j)
                    {
                        try
                        {
                            byteWord = new byte[result[j].length];
                        }
                        catch (Exception e)
                        {
                            Console.WriteLine(e.Message);
                        }
                        Array.Copy(mybyte, result[j].start, byteWord, 0, result[j].length);
                        string watch = System.Text.Encoding.Default.GetString(byteWord);
                        if (watch == " ")
                            continue;
                        sw.Write(System.Text.Encoding.Default.GetString(byteWord)+" ");

                    }
                    sw.WriteLine("");
                    input = sr.ReadLine();
                }
                sr.Close();
            }
            sw.Close();
            sp.Stop();
            Console.WriteLine("ICTCLAS:" + sp.ElapsedMilliseconds + "ms" + " Word Segmentation Speed: " + (float)totalFileSizeInByte / 1024 / sp.ElapsedMilliseconds * 1000 + " KB/s");
            ICTCLAS_Exit();
        }
Exemplo n.º 2
0
        private static void CPPictclas()
        {
            if (!ICTCLAS_Init(null))
            {
                System.Console.WriteLine("Init ICTCLAS failed!");
                System.Console.Read();
            }

            StreamReader sr = null;
            StreamWriter sw = new StreamWriter(outDir + ".txt", false, System.Text.Encoding.Default);
            Stopwatch    sp = new Stopwatch();

            sp.Start();
            int nWrdcnt;

            //对fileList中的每个文件进行分词,结果写入文本文件
            for (int i = 0; i < fileList.Count; i += 1)
            {
                sr = new StreamReader(fileList[i], System.Text.Encoding.Default);
                string input = "";
                input = sr.ReadLine();
                result_t [] result;

                while (input != null)
                {
                    if (input == "")
                    {
                        input = sr.ReadLine();
                        continue;
                    }
                    try
                    {
                        result  = new result_t[input.Length];
                        nWrdcnt = ICTCLAS_ParagraphProcessAW(input, result, eCodeType.CODE_TYPE_GB, 1);
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine(e.Message);
                        continue;
                    }
                    byte[] mybyte   = System.Text.Encoding.Default.GetBytes(input);
                    byte[] byteWord = new byte[1];
                    for (int j = 0; j < nWrdcnt; ++j)
                    {
                        try
                        {
                            byteWord = new byte[result[j].length];
                        }
                        catch (Exception e)
                        {
                            Console.WriteLine(e.Message);
                        }
                        Array.Copy(mybyte, result[j].start, byteWord, 0, result[j].length);
                        string watch = System.Text.Encoding.Default.GetString(byteWord);
                        if (watch == " ")
                        {
                            continue;
                        }
                        sw.Write(System.Text.Encoding.Default.GetString(byteWord) + " ");
                    }
                    sw.WriteLine("");
                    input = sr.ReadLine();
                }
                sr.Close();
            }
            sw.Close();
            sp.Stop();
            Console.WriteLine("ICTCLAS:" + sp.ElapsedMilliseconds + "ms" + " Word Segmentation Speed: " + (float)totalFileSizeInByte / 1024 / sp.ElapsedMilliseconds * 1000 + " KB/s");
            ICTCLAS_Exit();
        }