public void DoBreakOnFile(string inputFile, string outputFile, Markets market, int column = 0) { string mkt = MarketToString(market); using (StreamReader sr = new StreamReader(inputFile)) { PrepareDirectory(outputFile); using (StreamWriter sw = new StreamWriter(outputFile)) { while (!sr.EndOfStream) { string query = sr.ReadLine().Trim(); if (string.IsNullOrEmpty(query)) { continue; } string[] tmp = query.Split(new char[] { '\t' }); if (column + 1 > tmp.Length) { continue; } string wbToken = ExternalWordBreaker.BreakWords(tmp[column], mkt); tmp[column] = wbToken; string output = MakeTsvString(tmp); sw.WriteLine(output); } } } }
public string DoBreakOnString(string input, Markets market = Markets.zhCN) { string mkt = MarketToString(market); string wbToken = ExternalWordBreaker.BreakWords(input, mkt); return(wbToken); }
public static WordBreaker GetInstance() { if (_instance == null) { lock (_locker) { if (_instance == null) { _instance = new WordBreaker(); } } } return(_instance); }
private WordBreaker() { ExternalWordBreaker.Initialize(); }