public MarianCustomizer( MTModel model, MTModel customModel, ParallelFilePair inputPair, ParallelFilePair indomainValidPair, string customLabel, bool includePlaceholderTags, bool includeTagPairs, List <string> postCustomizationBatch, IsoLanguage sourceLanguage, IsoLanguage targetLanguage, bool guidedAlignment = false) { this.model = model; this.customModel = customModel; this.modelDir = new DirectoryInfo(model.InstallDir); this.customDir = new DirectoryInfo(this.customModel.InstallDir); this.customSource = inputPair.Source; this.customTarget = inputPair.Target; this.customLabel = customLabel; this.includePlaceholderTags = includePlaceholderTags; this.includeTagPairs = includeTagPairs; this.inDomainValidationSource = indomainValidPair.Source; this.inDomainValidationTarget = indomainValidPair.Target; this.sourceLanguage = sourceLanguage; this.targetLanguage = targetLanguage; this.guidedAlignment = guidedAlignment; }
public string Customize( string tokenCode, List <Tuple <string, string> > input, List <Tuple <string, string> > validation, List <string> uniqueNewSegments, string srcLangCode, string trgLangCode, string modelTag, bool includePlaceholderTags, bool includeTagPairs) { if (!TokenCodeGenerator.Instance.TokenCodeIsValid(tokenCode)) { return(null); } var sourceLang = new IsoLanguage(srcLangCode); var targetLang = new IsoLanguage(trgLangCode); if (!this.ModelManager.FinetuningOngoing && !this.ModelManager.BatchTranslationOngoing) { this.ModelManager.StartCustomization( input, validation, uniqueNewSegments, sourceLang, targetLang, modelTag, includePlaceholderTags, includeTagPairs); return("fine-tuning started"); } else { //TODO: need to queue up customization, i.e. save data for starting later throw new FaultException($"Batch translation or customization already in process in the MT engine"); } }
public MarianBatchTranslator( string modelDir, IsoLanguage sourceLang, IsoLanguage targetLang, bool includePlaceholderTags, bool includeTagPairs) { this.SourceCode = sourceLang.ShortestIsoCode; this.TargetCode = targetLang.ShortestIsoCode; this.includePlaceholderTags = includePlaceholderTags; this.includeTagPairs = includeTagPairs; this.modelDir = new DirectoryInfo(modelDir); this.SystemName = $"{this.SourceCode}-{this.TargetCode}_" + this.modelDir.Name; //Check if batch.yml exists, if not create it from decode.yml var batchYaml = this.modelDir.GetFiles("batch.yml"); if (batchYaml.Length == 0) { var decoderYaml = this.modelDir.GetFiles("decoder.yml").Single(); var deserializer = new Deserializer(); var decoderSettings = deserializer.Deserialize <MarianDecoderConfig>(decoderYaml.OpenText()); decoderSettings.miniBatch = "16"; decoderSettings.log = Path.Combine(this.modelDir.FullName, "batch.log"); decoderSettings.alignment = "hard"; var serializer = new Serializer(); var configPath = Path.Combine(this.modelDir.FullName, "batch.yml"); using (var writer = File.CreateText(configPath)) { serializer.Serialize(writer, decoderSettings, typeof(MarianDecoderConfig)); } } }
public Stream TranslateStream(string tokenCode = "", string input = "", string srcLangCode = "", string trgLangCode = "", string modelTag = "") { var sourceLang = new IsoLanguage(srcLangCode); var targetLang = new IsoLanguage(trgLangCode); /*WebOperationContext.Current.OutgoingResponse.ContentType = "text/plain; charset=utf-8"; * WebOperationContext.Current.OutgoingResponse.Headers.Add("Connection: close"); * WebOperationContext.Current.OutgoingResponse.Headers.Add("Access-Control-Allow-Origin: *"); * * //This is for Wordfast Anywhere (probably other versions as well) compatibility, for some reason it doesn't accept a response with * //the default Server header. * WebOperationContext.Current.OutgoingResponse.Headers.Add(HttpResponseHeader.Server.ToString(), string.Empty); */ var translation = this.mtProvider.Translate(input, sourceLang, targetLang, modelTag).Result; var stream = new MemoryStream(Encoding.UTF8.GetBytes(translation.Translation)); return(stream); var response = Request.CreateResponse <Stream>(HttpStatusCode.OK, stream); response.Headers.Add("Access-Control-Allow-Origin", "*"); //return response; }
public Translation TranslateJson(string tokenCode, string input, string srcLangCode, string trgLangCode, string modelTag) { var sourceLang = new IsoLanguage(srcLangCode); var targetLang = new IsoLanguage(trgLangCode); var translation = this.ModelManager.Translate(input, sourceLang, targetLang, modelTag); return(new Translation(translation.Result)); }
public Translation TranslateJson(string tokenCode, string input, string srcLangCode, string trgLangCode, string modelTag) { WebOperationContext.Current.OutgoingResponse.Headers.Add("Access-Control-Allow-Origin: *"); var sourceLang = new IsoLanguage(srcLangCode); var targetLang = new IsoLanguage(trgLangCode); var translation = this.ModelManager.Translate(input, sourceLang, targetLang, modelTag); return(new Translation(translation.Result.Translation)); }
/// <summary> /// Call this method to get the translation for a single string. /// </summary> /// <param name="tokenCode">The token code.</param> /// <param name="input">The input string.</param> /// <param name="srcLangCode">The code of the source language.</param> /// <param name="trgLangCode">The code of the target language.</param> /// <returns>The translated input string.</returns> public string Translate(string tokenCode, string input, string srcLangCode, string trgLangCode, string modelTag) { if (!TokenCodeGenerator.Instance.TokenCodeIsValid(tokenCode)) { return(null); } var sourceLang = new IsoLanguage(srcLangCode); var targetLang = new IsoLanguage(trgLangCode); return(this.ModelManager.Translate(input, sourceLang, targetLang, modelTag).Result); }
public string CheckModelStatus(string tokenCode, string sourceCode, string targetCode, string modelTag) { if (!TokenCodeGenerator.Instance.TokenCodeIsValid(tokenCode)) { return(null); } var sourceLang = new IsoLanguage(sourceCode); var targetLang = new IsoLanguage(targetCode); return(this.ModelManager.CheckModelStatus(sourceLang, targetLang, modelTag)); }
public HttpResponseMessage TranslateJson(string tokenCode = "", string input = "", string srcLangCode = "", string trgLangCode = "", string modelTag = "") { //HttpContext.Current.Response.Headers.Add("Access-Control-Allow-Origin", "*"); var sourceLang = new IsoLanguage(srcLangCode); var targetLang = new IsoLanguage(trgLangCode); var translation = this.mtProvider.Translate(input, sourceLang, targetLang, modelTag); var response = Request.CreateResponse <Translation>(HttpStatusCode.OK, new Translation(translation.Result.Translation)); response.Headers.Add("Access-Control-Allow-Origin", "*"); return(response); //return new Translation(translation.Result.Translation); }
public static ParallelFilePair ParseTmxToParallelFiles( string tmxFile, IsoLanguage sourceLang, IsoLanguage targetLang, bool includePlaceholderTags, bool includeTagPairs) { var sourceFile = new FileInfo($"{tmxFile}.{sourceLang.ShortestIsoCode}.txt"); var targetFile = new FileInfo($"{tmxFile}.{targetLang.ShortestIsoCode}.txt"); XDocument tmx; try { tmx = XDocument.Load(tmxFile); } catch (System.Xml.XmlException ex) { Log.Error($"{tmxFile} is not a valid tmx file"); return(null); } var tus = tmx.Descendants("tu"); using (var sourceWriter = sourceFile.CreateText()) using (var targetWriter = targetFile.CreateText()) { foreach (var tu in tus) { var sourceSeg = tu.Descendants("seg").FirstOrDefault( x => sourceLang.IsCompatibleTmxLang(x.Parent.Attribute(XNamespace.Xml + "lang").Value.ToLower())); var targetSeg = tu.Descendants("seg").FirstOrDefault( x => targetLang.IsCompatibleTmxLang(x.Parent.Attribute(XNamespace.Xml + "lang").Value.ToLower())); if (sourceSeg != null && targetSeg != null) { var sourceText = TmxToTxtParser.FilterTextAndTags(sourceSeg, includePlaceholderTags, includeTagPairs); sourceWriter.WriteLine(sourceText); var targetText = TmxToTxtParser.FilterTextAndTags(targetSeg, includePlaceholderTags, includeTagPairs); targetWriter.WriteLine(targetText); } } } return(new ParallelFilePair(sourceFile, targetFile)); }
public Stream TranslateStream(string tokenCode, string input, string srcLangCode, string trgLangCode, string modelTag) { var sourceLang = new IsoLanguage(srcLangCode); var targetLang = new IsoLanguage(trgLangCode); WebOperationContext.Current.OutgoingResponse.ContentType = "text/plain; charset=utf-8"; WebOperationContext.Current.OutgoingResponse.Headers.Add("Connection: close"); WebOperationContext.Current.OutgoingResponse.Headers.Add("Access-Control-Allow-Origin: *"); //This is for Wordfast Anywhere (probably other versions as well) compatibility, for some reason it doesn't accept a response with //the default Server header. WebOperationContext.Current.OutgoingResponse.Headers.Add(HttpResponseHeader.Server.ToString(), string.Empty); var translation = this.ModelManager.Translate(input, sourceLang, targetLang, modelTag).Result; return(new MemoryStream(Encoding.UTF8.GetBytes(translation))); }
/// <summary> /// Call this method to get the translation for multiple strings in batch. /// NOTE: this is not currently used, for pretranslation PretranslateBatch is better. /// </summary> /// <param name="tokenCode">The token code.</param> /// <param name="input">The input strings.</param> /// <param name="srcLangCode">The code of the source language.</param> /// <param name="trgLangCode">The code of the target language.</param> /// <returns>The translated input strings.</returns> public List <string> BatchTranslate(string tokenCode, List <string> input, string srcLangCode, string trgLangCode, string modelTag) { if (!TokenCodeGenerator.Instance.TokenCodeIsValid(tokenCode)) { return(null); } var sourceLang = new IsoLanguage(srcLangCode); var targetLang = new IsoLanguage(trgLangCode); List <string> translations = new List <string>(); foreach (var sourceSegment in input) { translations.Add(this.ModelManager.Translate(sourceSegment, sourceLang, targetLang, modelTag).Result); } return(translations); }
/// <summary> /// This will send a batch to the MT engine for pretranslation, which means /// the translations will be immediately available when it is requested /// </summary> /// <param name="tokenCode"></param> /// <param name="input"></param> /// <param name="srcLangCode"></param> /// <param name="trgLangCode"></param> public string PreOrderBatch(string tokenCode, List <string> input, string srcLangCode, string trgLangCode, string modelTag) { if (!TokenCodeGenerator.Instance.TokenCodeIsValid(tokenCode)) { return(""); } var sourceLang = new IsoLanguage(srcLangCode); var targetLang = new IsoLanguage(trgLangCode); if (input.Count == 0) { return("input was empty"); } foreach (var inputString in input) { this.ModelManager.Translate(inputString, sourceLang, targetLang, modelTag); } /* Batch preordering was done earlier with batch translation, but it doesn't seem * to be much quicker than normal translation, and it has to problem of providing all * the translations at once in the end. Using normal translation means the MT is ready * as soon as a sentence gets translated (you could do this for batch translation as well * by adding an outputline handler, but it's not implemented yet). Batch translation should be * much quicker, need to test for correct parameters, so stick with this. Using normal translate * is also more robust, one less thing to break. * if (!this.ModelManager.BatchTranslationOngoing && !this.ModelManager.CustomizationOngoing) * { * this.ModelManager.PreTranslateBatch(input, sourceLang, targetLang, modelTag); * return "batch translation started"; * } * else * { * return "batch translation or customization already in process"; * }*/ return("preorder received"); }
//this static constructor parses the iso table files that are embedded as resources static IsoLanguage() { IsoLanguage.ParseIso639_3(); IsoLanguage.ParseIso639_5(); }
internal bool IsCompatibleLanguage(IsoLanguage lang) { //TODO: add language group compatibility matching here return(this.ShortestIsoCode == lang.ShortestIsoCode); }