public Segment BuildSegment(System.Globalization.CultureInfo culture, params object[] elements) { var segment = new Segment(culture); foreach (var element in elements) { switch (element) { case string _: { segment.Add(new Text(element as string)); break; } case Tag _: { segment.Add(element as Tag); break; } case SegmentElement _: { segment.Add(element as SegmentElement); break; } default: { throw new InvalidOperationException("Unexpected parameter"); } } } return(segment); }
/// <summary> /// Converts plain text into Segment /// </summary> /// <param name="text">Text to be converted in Segment</param> /// <returns cref="Segment"/> public static Segment ParseLine(string text) { var segment = new Segment(); // Allow for including any character as long as it's within quotes. If there are no quotes, default to // allow any character except > if (string.IsNullOrEmpty(text)) { return(segment); } var tags = Regex.Split(text, @"(<[^>""]*>|<.*?"".*?""\/>)"); var startingTags = new Stack <Tag>(); foreach (var tag in tags) { if (tag == string.Empty) { continue; } var parsedTag = ParseTag(tag); if (parsedTag == null) { segment.Add(tag); continue; } if (parsedTag.Type == TagType.Start) { startingTags.Push(parsedTag); } else if (parsedTag.Type == TagType.End && startingTags.Count == 0) { throw new Exception($"Line does not have matching starting and ending tags: {text}"); } else if (parsedTag.Type == TagType.End) { if (parsedTag.Anchor == startingTags.Peek().Anchor) { var correspondingStartTag = startingTags.Pop(); parsedTag.TagID = correspondingStartTag.TagID; } else { segment.Add(tag); continue; } } segment.Add(parsedTag); } if (startingTags.Count != 0) { throw new Exception($"Line does not have matching starting and ending tags: {text}"); } return(segment); }
public void ReturnSegmentWithImbricatedStartAndEndTagAndInBetweenText_WhenSegmentHasImbricatedStartAndEndTagsAndInBetweenText() { //Arrange var segment = new Segment(); segment.Add("I "); segment.Add(new Tag(TagType.Start, "1", 1)); segment.Add("have "); segment.Add(new Tag(TagType.Start, "2", 2)); segment.Add("one "); segment.Add(new Tag(TagType.End, "2", 2)); segment.Add("new"); segment.Add(new Tag(TagType.End, "1", 1)); segment.Add("tag"); var converter = new SegmentConverter(segment); converter.ConvertSourceSegmentToText(); //Act var result = converter.ConvertTargetTextToSegment("Ich<tg1-1>habe<tg2-2>einen</tg2-2>neues</tg1-1>tag"); //Assert Assert.Equal(9, result.Elements.Count); Assert.Equal("Ich <1 id=1>habe <2 id=2>einen </2>neues</1>tag", result.ToString()); }
/// <summary> /// Returns a tagged segments from a target string containing markup, where the target string represents the translation of the class instance's source segment /// </summary> /// <param name="returnedText"></param> /// <returns></returns> public Segment GetTaggedSegment(string returnedText) { try { _returnedText = returnedText; var segment = new Segment(); if (_formattingAndTagUsage) { var targetElements = GetTargetElements(); foreach (var text in targetElements) { if (_dict.ContainsKey(text)) //if our text in question is in the tagText list { var padLeft = _dict[text].PadLeft; var padRight = _dict[text].PadRight; if (padLeft.Length > 0) { segment.Add(padLeft); //add leading space if applicable in the source text } segment.Add(_dict[text].SdlTag); //add the actual tag element after casting it back to a Tag if (padRight.Length > 0) { segment.Add(padRight); //add trailing space if applicable in the source text } } else { //if it is not in the list of tagTexts then the element is just the text if (text.Trim().Length <= 0) { continue; } segment.Add(text.Trim()); //add to the segment } } } else { if (_returnedText.Trim().Length <= 0) { segment.Add(_returnedText.Trim()); } } return(segment); } catch (Exception ex) { throw new Exception($"GetTaggedSegment method: {ex.Message}\n { ex.StackTrace}"); } }
public SearchResults SearchSegment(SearchSettings settings, Segment segment) { var translation = new Segment(_languageDirection.TargetCulture); var results = new SearchResults { SourceSegment = segment.Duplicate() }; try { // if there are match in tm the provider will not search the segment #region "Confirmation Level" if (!_options.ResendDrafts && _inputTu.ConfirmationLevel != ConfirmationLevel.Unspecified) { translation.Add(PluginResources.TranslationLookupDraftNotResentMessage); //later get these strings from resource file results.Add(CreateSearchResult(segment, translation)); return(results); } var newseg = segment.Duplicate(); if (newseg.HasTags) { var tagPlacer = new DeepLTranslationProviderTagPlacer(newseg); var translatedText = LookupDeepl(tagPlacer.PreparedSourceText); if (!string.IsNullOrEmpty(translatedText)) { translation = tagPlacer.GetTaggedSegment(translatedText); results.Add(CreateSearchResult(newseg, translation)); return(results); } } else { var sourcetext = newseg.ToPlain(); var translatedText = LookupDeepl(sourcetext); if (!string.IsNullOrEmpty(translatedText)) { translation.Add(translatedText); results.Add(CreateSearchResult(newseg, translation)); return(results); } } #endregion } catch (Exception e) { Log.Logger.Error($"SearchSegment method: {e.Message}\n {e.StackTrace}"); } return(results); }
/// <summary> /// Returns a tagged segments from a target string containing markup, where the target string represents the translation of the class instance's source segment /// </summary> /// <param name="returnedText"></param> /// <returns></returns> public Segment GetTaggedSegment(string returnedText) { //decode the returned text _returnedText = DecodeReturnedText(returnedText); //our dictionary, dict, is already built var segment = new Segment(); //our segment to return var targetElements = GetTargetElements(); //get our array of elements..it will be array of tagtexts and text in the order received from google //build our segment looping through elements for (var i = 0; i < targetElements.Length; i++) { var text = targetElements[i]; //the text to be compared/added if (_tagsDictionary.ContainsKey(text)) //if our text in question is in the tagtext list { try { var padleft = _tagsDictionary[text].PadLeft; var padright = _tagsDictionary[text].PadRight; if (padleft.Length > 0) { segment.Add(padleft); //add leading space if applicable in the source text } segment.Add(_tagsDictionary[text].SdlTag); //add the actual tag element after casting it back to a Tag if (padright.Length > 0) { segment.Add(padright); //add trailing space if applicable in the source text } } #pragma warning disable CS0168 // 声明了变量“e”,但从未使用过 catch (Exception e) #pragma warning restore CS0168 // 声明了变量“e”,但从未使用过 { } } else { //if it is not in the list of tagtexts then the element is just the text if (text.Trim().Length > 0) //if the element is something other than whitespace, i.e. some text in addition { text = text.Trim(); //trim out extra spaces, since they are dealt with by associating them with the tags segment.Add(text); //add to the segment } } } if (segment.Elements.Any()) { segment = RemoveTrailingClosingTags(segment); } return(segment); //this will return a tagged segment }
public void ReturnStringWithSingleTag_WhenSegmentHasOneTagAtTheEnd(TagType tagType) { //Arrange var segment = new Segment(); segment.Add("I contain one tag"); segment.Add(new Tag(tagType, "1", 1)); //Act var converter = new SegmentConverter(segment); //Assert Assert.Equal("I contain one tag<tg1-1/>", converter.ConvertSourceSegmentToText()); }
public SearchResults SearchSegment(SearchSettings settings, Segment segment) { var translation = new Segment(_languageDirection.TargetCulture); var results = new SearchResults() { SourceSegment = segment.Duplicate() }; // if there are match in tm the provider will not search the segment #region "Confirmation Level" if (_inputTu.ConfirmationLevel != ConfirmationLevel.Unspecified) { translation.Add(PluginResources.TranslationLookupDraftNotResentMessage); //later get these strings from resource file results.Add(CreateSearchResult(segment, translation, segment.ToString())); return(results); } var newseg = segment.Duplicate(); if (newseg.HasTags) { var tagPlacer = new DeepLTranslationProviderTagPlacer(newseg); var translatedText = LookupDeepl(tagPlacer.PreparedSourceText); translation = tagPlacer.GetTaggedSegment(translatedText); //translation.Add(translatedText); results.Add(CreateSearchResult(newseg, translation, newseg.ToPlain())); return(results); } else { // TO BE IMPLEMENTED: // If there are tags in segment. We need to wait for information regarding how DeepL handels tags //for simple text var sourceLang = SourceLanguage.ToString(); var targetLang = TargetLanguage.ToString(); //a new seg avoids modifying the current segment object var sourcetext = newseg.ToPlain(); var translatedText = LookupDeepl(sourcetext); translation.Add(translatedText); results.Add(CreateSearchResult(newseg, translation, newseg.ToPlain())); return(results); } #endregion }
/// <summary> /// Returns a tagged segments from a target string containing markup, where the target string represents the translation of the class instance's source segment /// </summary> /// <param name="_returnedText"></param> /// <returns></returns> public Segment GetTaggedSegment(string _returnedText) { //decode the returned text returnedText = DecodeReturnedText(_returnedText); //our dictionary, dict, is already built Segment segment = new Segment(); //our segment to return string[] _targetElements = GetTargetElements(); //get our array of elements..it will be array of tagtexts and text in the order received from google //build our segment looping through elements for (int i = 0; i < _targetElements.Length; i++) { string text = _targetElements[i]; //the text to be compared/added if (dict.ContainsKey(text)) //if our text in question is in the tagtext list { try { string padleft = dict[text].padLeft; string padright = dict[text].padRight; if (padleft.Length > 0) { segment.Add(padleft); //add leading space if applicable in the source text } segment.Add(dict[text].SdlTag); //add the actual tag element after casting it back to a Tag if (padright.Length > 0) { segment.Add(padright); //add trailing space if applicable in the source text } //segment.Add(" ");//add a space after each tag } catch { } } else { //if it is not in the list of tagtexts then the element is just the text if (text.Trim().Length > 0) //if the element is something other than whitespace, i.e. some text in addition { text = text.Trim(); //trim out extra spaces, since they are dealt with by associating them with the tags segment.Add(text); //add to the segment } } } //Microsoft sends back closing tags that need to be removed segment = RemoveTrailingClosingTags(segment); return(segment); //this will return a tagged segment }
public SearchResults SearchSegment(SearchSettings settings, Segment segment) { lock (lockGuard) { string dl_trans; _visitor.Reset(); foreach (var element in segment.Elements) { element.AcceptSegmentElementVisitor(_visitor); } DeepLSamplerTranslationProvider.log.WriteLine("SearchSegment executed for source: " + _visitor.PlainText, true); SearchResults results = new SearchResults(); results.SourceSegment = segment.Duplicate(); // Look up the currently selected segment in the collection (normal segment lookup). if (settings.Mode == SearchMode.NormalSearch) { Segment translation = new Segment(_languageDirection.TargetCulture); dl_trans = DeepLSamplerTranslationProvider.deepL.translateText(_visitor.PlainText); translation.Add(dl_trans); results.Add(CreateSearchResult(segment, translation, _visitor.PlainText, segment.HasTags)); DeepLSamplerTranslationProvider.log.WriteLine("--> SearchSegment gets translation: ***" + dl_trans + "*** for source: " + _visitor.PlainText, true); } return(results); } }
public static Segment ToSegment(this string toConvert) { Segment wrapper = new Segment(); wrapper.Add(toConvert); return(wrapper); }
public Goto(List <JsmInstruction> instructions, Int32 from, Int32 label) { _instructions = instructions; _segment = new ExecutableSegment(from, from + 1); _segment.Add(_instructions[from]); _label = label; }
private SearchResult CreateSearchResult(Segment segment, TartuNLPTagPlacer tagPlacer, string translation) { var targetSegment = tagPlacer.GetTaggedSegment(translation); // handle cases where input and output tags did not match if (targetSegment == null) { var sourceStrings = new List <string> { segment.ToPlain() }; translation = SearchInServer(sourceStrings)[0]; targetSegment = new Segment(_languageDirection.TargetCulture); targetSegment.Add(translation); } var translationUnit = new TranslationUnit { SourceSegment = segment.Duplicate(), TargetSegment = targetSegment }; translationUnit.ResourceId = new PersistentObjectToken(translationUnit.GetHashCode(), Guid.Empty); translationUnit.Origin = TranslationUnitOrigin.MachineTranslation; var searchResult = new SearchResult(translationUnit) { ScoringResult = new ScoringResult() }; return(searchResult); }
public void VisitTagPair(ITagPair tagPair) { TagId++; var tagId = tagPair.StartTagProperties.TagId.Id; if (!IgnoreTags) { var tag = new Tag( TagType.Start, tagId, TagId, 0, null, tagPair.StartTagProperties.CanHide); Segment?.Add(tag); } VisitChilderen(tagPair); if (!IgnoreTags) { var tag = new Tag( TagType.End, tagId, TagId, 0, null, tagPair.EndTagProperties.CanHide); Segment?.Add(tag); } }
public ReadOnlySequence <char> Retrieve(IEnumerable <int> indices) { Segment <char> root = null; Segment <char> current = null; int len = 0; foreach (var idx in indices) { var tail = buffer.Slice(idx); len = tail.Span.IndexOf('\0'); var text = tail.Slice(0, len); if (root == null) { root = new Segment <char>(text); current = root; } else { current = current.Add(text); } } return(new ReadOnlySequence <char>(root, 0, current, len)); }
/// <summary> /// Microsoft always adds closing tags, but we don't keep track of our tags that way..so the segments always have garbage text at the end with the closing tag markup...this method removes them /// </summary> /// <param name="segment"></param> /// <returns></returns> public Segment RemoveTrailingClosingTags(Segment segment) { try { var element = segment.Elements[segment.Elements.Count - 1]; //get last element var str = element.ToString(); var pattern = @"\</tg[0-9]*\>"; //we want to find "</tg" + {any number} + ">" var rgx = new Regex(pattern); var elType = element.GetType(); var matches = rgx.Matches(str); if (elType.ToString().Equals("Sdl.LanguagePlatform.Core.Text") && matches.Count > 0) //if a text element containing matches { foreach (Match myMatch in matches) { str = str.Replace(myMatch.Value, ""); //puts our separator around tagtexts } segment.Elements.Remove(element); segment.Add(str.TrimStart()); } return(segment); } catch (Exception ex) { _logger.Error($"{_constants.RemoveTrailingClosingTags} {ex.Message}\n { ex.StackTrace}"); return(segment); } }
public SearchResults SearchText(SearchSettings settings, string segment) { Segment segment2 = new Segment(this._languages.SourceCulture); segment2.Add(segment); return(this.SearchSegment(settings, segment2, true)); }
public SearchResult CreateSearchResult(WordItem word, string sourceSegmentText) { var searchSegment = new Segment(_sourceLanguage); searchSegment.Add(sourceSegmentText); var translationSegment = new Segment(_targetLanguage); translationSegment.Add(word.Word); var unit = new TranslationUnit { SourceSegment = searchSegment, TargetSegment = translationSegment, ConfirmationLevel = ConfirmationLevel.Translated, Origin = TranslationUnitOrigin.MachineTranslation }; unit.ResourceId = new PersistentObjectToken(unit.GetHashCode(), Guid.Empty); var searchResult = new SearchResult(unit); // We do not currently support scoring, so always say that we're 25% sure on this translation. searchResult.ScoringResult = new ScoringResult() { BaseScore = 25 }; return(searchResult); }
public SearchResults SearchText(SearchSettings settings, string segment) { Segment toTranslate = new Segment(); toTranslate.Add(segment); return(SearchSegment(settings, toTranslate)); }
/// <summary> /// Microsoft always adds closing tags, but we don't keep track of our tags that way..so the segments always have garbage text at the end with the closing tag markup...this method removes them /// </summary> /// <param name="segment"></param> /// <returns></returns> public Segment RemoveTrailingClosingTags(Segment segment) { #region RemoveTrailingClosingTags var element = segment.Elements[segment.Elements.Count - 1]; //get last element var str = element.ToString(); var tagsCount = segment.GetTagCount(); var pattern = @"\</tg[0-9]*\>"; //we want to find "</tg" + {any number} + ">" var rgx = new Regex(pattern); var elType = element.GetType(); var matches = rgx.Matches(str); if (elType.ToString().Equals("Sdl.LanguagePlatform.Core.Text") && matches.Count > 0) //if a text element containing matches { foreach (Match myMatch in matches) { str = str.Replace(myMatch.Value, ""); //puts our separator around tagtexts } segment.Elements.Remove(element); segment.Add(str.TrimStart()); } #endregion return(segment); }
private List <SearchResults> GetPreTranslationSearchResults(List <PreTranslateSegment> preTranslateList) { var resultsList = new List <SearchResults>(); foreach (var preTranslate in preTranslateList) { var translation = new Segment(_languageDirection.TargetCulture); var newSeg = preTranslate.TranslationUnit.SourceSegment.Duplicate(); if (newSeg.HasTags) { var tagPlacer = new BeGlobalTagPlacer(newSeg); translation = tagPlacer.GetTaggedSegment(preTranslate.PlainTranslation); preTranslate.TranslationSegment = translation; } else { translation.Add(preTranslate.PlainTranslation); } var searchResult = CreateSearchResult(newSeg, translation); var results = new SearchResults { SourceSegment = newSeg }; results.Add(searchResult); resultsList.Add(results); } return(resultsList); }
public SearchResults SearchText(SearchSettings settings, string segment) { Segment s = new Segment(_languageDirection.SourceCulture); s.Add(segment); return(SearchSegment(settings, s)); }
public SearchResults SearchText(SearchSettings settings, string segment) { var currentSegment = new Segment(_languageDirection.SourceCulture); currentSegment.Add(segment); return(SearchSegment(settings, currentSegment)); }
public void ReturnStringWithStartTag_WhenSegmentHasOnlyStartTag() { //Arrange var segment = new Segment(); segment.Add("I contain"); segment.Add(new Tag(TagType.Start, "1", 1)); segment.Add("one "); segment.Add("tag"); //Act var converter = new SegmentConverter(segment); //Assert Assert.Equal("I contain<tg1-1>one tag", converter.ConvertSourceSegmentToText()); }
/// <summary> /// Creates the segment. /// </summary> /// <param name="scoringResult">The scoring result.</param> /// <param name="searchSettings">The search settings.</param> /// <param name="matchResult">The match result.</param> /// <param name="extractedTags">The extracted tags.</param> /// <param name="culture">The culture.</param> /// <returns> /// New segment /// </returns> public static Segment CreateSegment( ScoringResult scoringResult, SearchSettings searchSettings, string matchResult, CultureInfo culture) { var segment = new Segment(culture); // matches all xml tags with attributes inside MatchCollection matches = SegmentParser.XmlRegex.Matches(matchResult); if (matches.Count > 0) { int prev_ind = 0; int matchesCount = matches.Count; for (int i = 0; i < matchesCount; i++) { var match = matches[i]; var tagElement = SegmentParser.GetTag(match.Value); string adjacent = matchResult.Substring(prev_ind, match.Index - prev_ind); adjacent = SegmentParser.UnescapeLiterals(adjacent); // move index prev_ind += adjacent.Length; prev_ind += match.Length; // add elements segment.Add(adjacent); segment.Add(tagElement); } // text after last match var lastMatch = matches[matchesCount - 1]; var lastPosition = lastMatch.Index + lastMatch.Length; string text = matchResult.Substring(lastPosition, matchResult.Length - lastPosition); segment.Add(SegmentParser.UnescapeLiterals(text)); } else { // no tags, add plain text segment.Add(SegmentParser.UnescapeLiterals(matchResult)); } return(segment); }
// ---------------------------------------------------------------------------------------- /// <!-- FillSegments --> /// <summary> /// /// </summary> /// <param name="path"></param> /// <param name="enRef"></param> private void FillSegments(string path, EndemeReference enRef, bool rawSource) { string[] seq = path.Split("|!*+^".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < seq.Length; ++i) { Segment.Add(BuildSegment(seq[i], enRef, rawSource)); } }
/// <summary> /// Translate a string of text. /// </summary> /// <param name="settings"></param> /// <param name="segment"></param> /// <returns></returns> public SearchResults SearchText(SearchSettings settings, string segment) { Log.Logger.Trace(""); var seg = new Segment(languageDirection.SourceCulture); seg.Add(segment); return(SearchSegment(settings, seg)); }
/// <summary> /// Returns a tagged segments from a target string containing markup, where the target string represents the translation of the class instance's source segment /// </summary> /// <param name="returnedText"></param> /// <returns></returns> public Segment GetTaggedSegment(string returnedText) { //decode the returned text _returnedText = DecodeReturnedText(returnedText); //our dictionary, dict, is already built var segment = new Segment(); //our segment to return var targetElements = GetTargetElements(); //get our array of elements..it will be array of tagtexts and text in the order received from google //build our segment looping through elements for (var i = 0; i < targetElements.Length; i++) { var text = targetElements[i]; //the text to be compared/added if (_tagsDictionary.ContainsKey(text)) //if our text in question is in the tagtext list { try { var padleft = _tagsDictionary[text].PadLeft; var padright = _tagsDictionary[text].PadRight; if (padleft.Length > 0) { segment.Add(padleft); //add leading space if applicable in the source text } segment.Add(_tagsDictionary[text].SdlTag); //add the actual tag element after casting it back to a Tag if (padright.Length > 0) { segment.Add(padright); //add trailing space if applicable in the source text } } catch (Exception e) { Console.WriteLine(e); } } else { //if it is not in the list of tagtexts then the element is just the text if (text.Trim().Length <= 0) { continue; } text = text.Trim(); //trim out extra spaces, since they are dealt with by associating them with the tags segment.Add(text); //add to the segment } } return(segment); //this will return a tagged segment }
public static Segment ParseLine(string text) { Segment seg = new Segment(); // Allow for including any character as long as it's within quotes. If there are no quotes, default to // allow any character except > string[] tags = Regex.Split(text, @"(<[^>""]*>|<.*?"".*?""\/>)"); Stack <Tag> startingTags = new Stack <Tag>(); foreach (string tag in tags) { if (tag == string.Empty) { continue; } Tag parsedTag = ParseTag(tag); if (parsedTag == null) { seg.Add(tag); continue; } if (parsedTag.Type == TagType.Start) { startingTags.Push(parsedTag); } else if (parsedTag.Type == TagType.End && startingTags.Count == 0) { throw new System.Exception(string.Format("Line does not have matching starting and ending tags: {0}", text)); } else if (parsedTag.Type == TagType.End) { Tag correspondingStartTag = startingTags.Pop(); parsedTag.TagID = correspondingStartTag.TagID; } seg.Add(parsedTag); } if (startingTags.Count != 0) { throw new System.Exception(string.Format("Line does not have matching starting and ending tags: {0}", text)); } return(seg); }
/// <summary> /// Returns a tagged segments from a target string containing markup, where the target string represents the translation of the class instance's source segment /// </summary> /// <param name="returnedText"></param> /// <returns></returns> public Segment GetTaggedSegment(string returnedText) { try { //decode the returned text _returnedText = DecodeReturnedText(returnedText); //our dictionary, dict, is already built var segment = new Segment(); //our segment to return var targetElements = GetTargetElements(); //get our array of elements..it will be array of tagtexts and text in the order received from google //build our segment looping through elements for (var i = 0; i < targetElements.Length; i++) { var text = targetElements[i]; //the text to be compared/added if (dict.ContainsKey(text)) //if our text in question is in the tagtext list { var padleft = dict[text].PadLeft; var padright = dict[text].PadRight; if (padleft.Length > 0) { segment.Add(padleft); //add leading space if applicable in the source text } segment.Add(dict[text].SdlTag); //add the actual tag element after casting it back to a Tag if (padright.Length > 0) { segment.Add(padright); //add trailing space if applicable in the source text } } else { //if it is not in the list of tagtexts then the element is just the text if (text.Trim().Length > 0) //if the element is something other than whitespace, i.e. some text in addition { text = text.Trim(); //trim out extra spaces, since they are dealt with by associating them with the tags segment.Add(text); //add to the segment } } } return(segment); } catch (Exception ex) { _logger.Error($"{_constants.GetTaggedSegment} {ex.Message}\n { ex.StackTrace}"); return(new Segment()); } }
/// <summary> /// Returns a tagged segments from a target string containing markup, where the target string represents the translation of the class instance's source segment /// </summary> /// <param name="_returnedText"></param> /// <returns></returns> public Segment GetTaggedSegment(string _returnedText) { //decode the returned text returnedText = DecodeReturnedText(_returnedText); //our dictionary, dict, is already built Segment segment = new Segment(); //our segment to return string[] _targetElements = GetTargetElements();//get our array of elements..it will be array of tagtexts and text in the order received from google //build our segment looping through elements for (int i = 0; i < _targetElements.Length; i++) { string text = _targetElements[i]; //the text to be compared/added if (dict.ContainsKey(text)) //if our text in question is in the tagtext list { try { string padleft = dict[text].padLeft; string padright = dict[text].padRight; if (padleft.Length > 0) segment.Add(padleft); //add leading space if applicable in the source text segment.Add(dict[text].SdlTag); //add the actual tag element after casting it back to a Tag if (padright.Length > 0) segment.Add(padright); //add trailing space if applicable in the source text //segment.Add(" ");//add a space after each tag } catch { } } else { //if it is not in the list of tagtexts then the element is just the text if (text.Trim().Length > 0) //if the element is something other than whitespace, i.e. some text in addition { text = text.Trim(); //trim out extra spaces, since they are dealt with by associating them with the tags segment.Add(text); //add to the segment } } } //Microsoft sends back closing tags that need to be removed segment = RemoveTrailingClosingTags(segment); return segment; //this will return a tagged segment }
/// <summary> /// Microsoft always adds closing tags, but we don't keep track of our tags that way..so the segments always have garbage text at the end with the closing tag markup...this method removes them /// </summary> /// <param name="segment"></param> /// <returns></returns> public Segment RemoveTrailingClosingTags(Segment segment) { #region RemoveTrailingClosingTags SegmentElement element = segment.Elements[segment.Elements.Count - 1]; //get last element string str = element.ToString(); int tagsCount = segment.GetTagCount(); string pattern = @"\</tg[0-9]*\>"; //we want to find "</tg" + {any number} + ">" Regex rgx = new Regex(pattern); System.Type elType = element.GetType(); MatchCollection matches = rgx.Matches(str); if (elType.ToString().Equals("Sdl.LanguagePlatform.Core.Text") && matches.Count > 0) //if a text element containing matches { foreach (Match myMatch in matches) { str = str.Replace(myMatch.Value, ""); //puts our separator around tagtexts } segment.Elements.Remove(element); segment.Add(str.TrimStart()); } #endregion return segment; }
public SearchResults SearchSegment(SearchSettings settings, Segment segment) { //FUTURE: consider making GT and MT lookup classes static utility classes Segment translation = new Segment(_languageDirection.TargetCulture);//this will be the target segment #region "SearchResultsObject" SearchResults results = new SearchResults(); results.SourceSegment = segment.Duplicate(); #endregion #region "Confirmation Level" if (!_options.ResendDrafts && inputTu.ConfirmationLevel != ConfirmationLevel.Unspecified) //i.e. if it's status is other than untranslated { //don't do the lookup, b/c we don't need to pay google to translate text already translated if we edit a segment translation.Add(PluginResources.TranslationLookupDraftNotResentMessage); //later get these strings from resource file results.Add(CreateSearchResult(segment, translation, segment.ToString())); return results; } #endregion // Look up the currently selected segment in the collection (normal segment lookup). #region "SegmentLookup" string sourceLang = SourceLanguage.ToString(); string targetLang = TargetLanguage.ToString(); string translatedText = ""; //a new seg avoids modifying the current segment object Segment newseg = segment.Duplicate(); //do preedit if checked bool sendTextOnly = _options.SendPlainTextOnly || !newseg.HasTags; if (!sendTextOnly) { //do preedit with tagged segment if (_options.UsePreEdit) { if (preLookupSegmentEditor == null) preLookupSegmentEditor = new SegmentEditor(_options.PreLookupFilename); newseg = getEditedSegment(preLookupSegmentEditor, newseg); } //return our tagged target segment MtTranslationProviderTagPlacer tagplacer = new MtTranslationProviderTagPlacer(newseg); ////tagplacer is constructed and gives us back a properly marked up source string for google if (_options.SelectedProvider == MtTranslationOptions.ProviderType.GoogleTranslate) { translatedText = LookupGT(tagplacer.PreparedSourceText, _options, "html"); } else if (_options.SelectedProvider == MtTranslationOptions.ProviderType.MicrosoftTranslator) { translatedText = LookupMST(tagplacer.PreparedSourceText, _options, "text/html"); } //now we send the output back to tagplacer for our properly tagged segment translation = tagplacer.GetTaggedSegment(translatedText).Duplicate(); //now do post-edit if that option is checked if (_options.UsePostEdit) { if (postLookupSegmentEditor == null) postLookupSegmentEditor = new SegmentEditor(_options.PostLookupFilename); translation = getEditedSegment(postLookupSegmentEditor, translation); } } else //only send plain text { string sourcetext = newseg.ToPlain(); //do preedit with string if (_options.UsePreEdit) { if (preLookupSegmentEditor == null) preLookupSegmentEditor = new SegmentEditor(_options.PreLookupFilename); sourcetext = getEditedString(preLookupSegmentEditor, sourcetext); //change our source segment so it gets sent back with modified text to show in translation results window that it was changed before sending newseg.Clear(); newseg.Add(sourcetext); } //now do lookup if (_options.SelectedProvider == MtTranslationOptions.ProviderType.GoogleTranslate) { translatedText = LookupGT(sourcetext, _options, "html"); //plain?? } else if (_options.SelectedProvider == MtTranslationOptions.ProviderType.MicrosoftTranslator) { translatedText = LookupMST(sourcetext, _options, "text/plain"); } //now do post-edit if that option is checked if (_options.UsePostEdit) { if (postLookupSegmentEditor == null) postLookupSegmentEditor = new SegmentEditor(_options.PostLookupFilename); translatedText = getEditedString(postLookupSegmentEditor, translatedText); } translation.Add(translatedText); } results.Add(CreateSearchResult(newseg, translation, newseg.ToPlain())); #endregion #region "Close" return results; #endregion }
/// <summary> /// Used to do batch find-replace on a segment with tags. /// </summary> /// <param name="inSegment"></param> /// <param name="fileName"></param> /// <returns></returns> private Segment getEditedSegment(SegmentEditor editor, Segment inSegment) { Segment newSeg = new Segment(inSegment.Culture); foreach (SegmentElement element in inSegment.Elements) { System.Type elType = element.GetType(); if (elType.ToString() != "Sdl.LanguagePlatform.Core.Tag") //if other than tag, make string and edit it { string temp = editor.EditText(element.ToString()); newSeg.Add(temp); //add edited text to segment } else { newSeg.Add(element); //if tag just add the tag } } return newSeg; }