public virtual void Build() { LineNumberReader infile = null; PrintWriter outfile = null; string currentInfile = string.Empty; try { outfile = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFileName), "UTF-8"))); foreach (File path in pathsToData) { infile = new LineNumberReader(new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8"))); currentInfile = path.GetPath(); while (infile.Ready()) { List <Word> sent = SentenceUtils.ToUntaggedList(infile.ReadLine().Split("\\s+")); foreach (Word token in sent) { Matcher hasArabic = utf8ArabicChart.Matcher(token.Word()); if (hasArabic.Find()) { token.SetWord(escaper.Apply(token.Word())); token.SetWord(lexMapper.Map(null, token.Word())); } } outfile.Println(SentenceUtils.ListToString(sent)); } toStringBuffer.Append(string.Format(" Read %d input lines from %s", infile.GetLineNumber(), path.GetPath())); } infile.Close(); } catch (UnsupportedEncodingException e) { System.Console.Error.Printf("%s: Filesystem does not support UTF-8 output\n", this.GetType().FullName); Sharpen.Runtime.PrintStackTrace(e); } catch (FileNotFoundException) { System.Console.Error.Printf("%s: Could not open %s for writing\n", this.GetType().FullName, outFileName); } catch (IOException) { System.Console.Error.Printf("%s: Error reading from %s (line %d)\n", this.GetType().FullName, currentInfile, infile.GetLineNumber()); } catch (Exception e) { System.Console.Error.Printf("%s: Input sentence from %s contains token mapped to null (line %d)\n", this.GetType().FullName, currentInfile, infile.GetLineNumber()); Sharpen.Runtime.PrintStackTrace(e); } finally { if (outfile != null) { outfile.Close(); } } }