public static void Translate( string Filename, string FilenameGracesJapanese, string outputToOtherGracesJapanese = null ) { SetupReplacements(); CleanGoogleTranslatedString( "" ); GraceNoteDatabaseEntry[] rawEntries = GraceNoteDatabaseEntry.GetAllEntriesFromDatabase( "Data Source=" + Filename, "Data Source=" + FilenameGracesJapanese ); TranslatableGraceNoteEntry[] entries = new TranslatableGraceNoteEntry[rawEntries.Length]; for ( int i = 0; i < entries.Length; ++i ) { entries[i] = new TranslatableGraceNoteEntry(); entries[i].Entry = rawEntries[i]; } WebClient webClient = new WebClient(); webClient.BaseAddress = "http://translate.google.com"; System.Random rng = new System.Random(); foreach ( TranslatableGraceNoteEntry e in entries ) { // THIS IS CERTAINLY EASIER TO SOLVE WITH A REGEX // but I've been f****d by greedy regexes ( "a<b>c<d>e".Remove("<*>") -> "ae" instead of "ace" ) so bleeh string VariableLess = e.Entry.TextJP.Replace( "<CLT>", "" ); for ( int i = 0; i < 100; ++i ) { VariableLess = VariableLess.Replace( "<CLT " + i.ToString( "D2" ) + ">", "" ); } e.NewLineAtEnd = VariableLess.EndsWith( "\n" ); e.NewLineCount = VariableLess.Count( ch => ch == '\n' ); e.Entry.TextJP = e.Entry.TextJP.Replace( "\n", "" ); if ( e.Entry.TextJP.Length >= 2 && e.Entry.TextJP[e.Entry.TextJP.Length - 2] == '\\' ) { e.PreserveStringAtEnd = e.Entry.TextJP.Substring( e.Entry.TextJP.Length - 2 ); e.Entry.TextJP = e.Entry.TextJP.Substring( 0, e.Entry.TextJP.Length - 2 ); } if ( e.Entry.TextJP.StartsWith( "#" ) ) { e.PreserveStringAtStart = "#"; e.Entry.TextJP = e.Entry.TextJP.Substring( 1 ); } for ( int i = 0; i < ReplacementOriginals.Count; ++i ) { e.Entry.TextJP = e.Entry.TextJP.Replace( ReplacementOriginals[i], ReplacementSubstitutes[i] ); } } SQLiteConnection otherGJconn = null; if ( outputToOtherGracesJapanese != null ) { if ( !File.Exists( outputToOtherGracesJapanese ) ) { System.IO.File.WriteAllBytes( outputToOtherGracesJapanese, Properties.Resources.gngj_template ); } otherGJconn = new SQLiteConnection( "Data Source=" + outputToOtherGracesJapanese ); otherGJconn.Open(); } SQLiteConnection conn = new SQLiteConnection( "Data Source=" + Filename ); conn.Open(); FileStream FailLog = new FileStream( "googletranslate.log", FileMode.Append ); StreamWriter LogWriter = new StreamWriter( FailLog ); int entryCount = 0; foreach ( TranslatableGraceNoteEntry e in entries ) { entryCount++; if ( e.Entry.Status == -1 ) { continue; } if ( e.Entry.UpdatedBy == "GoogleTranslate" ) { continue; } if ( e.Entry.TextJP.Trim() == "" ) { continue; } if ( otherGJconn != null ) { long exists = (long)SqliteUtil.SelectScalar( otherGJconn, "SELECT COUNT(1) FROM Japanese WHERE ID = ?", new object[1] { e.Entry.JPID } ); if ( exists > 0 ) { continue; } } string jp = e.Entry.TextJP; string startTag = ""; if ( jp.StartsWith( "<" ) && jp.Contains( '>' ) ) { int idx = jp.IndexOf( '>' ); startTag = jp.Substring( 0, idx + 1 ); jp = jp.Substring( idx + 1 ); } try { Console.WriteLine( "Processing Entry " + entryCount + "/" + entries.Length + "..." ); webClient.Encoding = Encoding.UTF8; webClient.Headers["User-Agent"] = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.3) Gecko/20100402 Namoroka/3.6.3 (.NET CLR 3.5.30729)"; webClient.QueryString.Clear(); webClient.QueryString["client"] = "t"; webClient.QueryString["sl"] = "ja"; webClient.QueryString["tl"] = "en"; webClient.QueryString["text"] = jp; string translateResult = webClient.DownloadString( "/translate_a/t" ); string english = CleanGoogleTranslatedString( translateResult ); e.Entry.TextEN = startTag + english; if ( otherGJconn != null ) { ReinsertEntry( otherGJconn, e, true ); } else { ReinsertEntry( conn, e ); } } catch ( WebException ex ) { LogWriter.WriteLine( "Failure in File " + Filename + ":" ); LogWriter.WriteLine( "ID: " + e.Entry.ID ); LogWriter.WriteLine( jp ); LogWriter.WriteLine( ex.ToString() ); LogWriter.WriteLine(); Console.WriteLine( ex.ToString() ); Console.WriteLine(); } System.Threading.Thread.Sleep( rng.Next( 2000, 8000 ) ); } LogWriter.Close(); FailLog.Close(); conn.Close(); if ( otherGJconn != null ) { otherGJconn.Close(); } return; }
public static void ReinsertEntry( SQLiteConnection conn, TranslatableGraceNoteEntry e, bool isGracesJapanese = false ) { Console.WriteLine( "ENGLISH WITHOUT LINEBREAKS:" ); Console.WriteLine( e.Entry.TextEN ); int NewLineCount = e.NewLineCount; if ( e.NewLineAtEnd ) { NewLineCount--; } if ( NewLineCount != 0 ) { int approxCharsPerLine = e.Entry.TextEN.Length / ( NewLineCount + 1 ); List<string> parts = new List<string>( NewLineCount + 1 ); int loc = 0; while ( loc < e.Entry.TextEN.Length ) { int start = loc; loc += approxCharsPerLine; while ( loc < e.Entry.TextEN.Length && e.Entry.TextEN[loc] != ' ' ) { loc++; } if ( loc >= e.Entry.TextEN.Length ) { loc = e.Entry.TextEN.Length; } string sub = e.Entry.TextEN.Substring( start, loc - start ); parts.Add( sub ); loc++; } e.Entry.TextEN = ""; foreach ( string part in parts ) { e.Entry.TextEN = e.Entry.TextEN + part + "\n"; } e.Entry.TextEN = e.Entry.TextEN.TrimEnd( '\n' ); } if ( e.NewLineAtEnd ) { e.Entry.TextEN = e.Entry.TextEN + '\n'; } e.Entry.TextEN = e.PreserveStringAtStart + e.Entry.TextEN + e.PreserveStringAtEnd; Console.WriteLine( "JAPANESE:" ); Console.WriteLine( e.Entry.TextJP ); Console.WriteLine( "ENGLISH WITH LINEBREAKS:" ); Console.WriteLine( e.Entry.TextEN ); Console.WriteLine( "----------------------------------------" ); Console.WriteLine(); Object[] param = new Object[2]; param[0] = e.Entry.TextEN; if ( isGracesJapanese ) { param[1] = e.Entry.JPID; SqliteUtil.Update( conn, "INSERT INTO Japanese ( string, ID, debug ) VALUES ( ?, ?, 0 )", param ); } else { param[1] = e.Entry.ID; SqliteUtil.Update( conn, "UPDATE Text SET english = ?, UpdatedBy = 'GoogleTranslate', UpdatedTimestamp = " + Util.DateTimeToUnixTime( DateTime.Now ) + " WHERE ID = ?", param ); } }