public SearchResults SearchText(SearchSettings settings, string segment) { Segment s = new Sdl.LanguagePlatform.Core.Segment(_languageDirection.SourceCulture); s.Add(segment); return(SearchSegment(settings, s)); }
public SearchResults SearchText(SearchSettings settings, string segment) { var s = new Segment(_languageDirection.SourceCulture); s.Add(segment); return(SearchSegment(settings, s)); }
static string Segment2Html(Sdl.LanguagePlatform.Core.Segment segment) { var stringBuilder = new StringBuilder(); foreach (var element in segment.Elements) { var text = element as Sdl.LanguagePlatform.Core.Text; if (text != null) { stringBuilder.Append(text.Value.XmlEscape()); } else { var tag = element as Sdl.LanguagePlatform.Core.Tag; if (tag != null) { switch (tag.Type) { case Sdl.LanguagePlatform.Core.TagType.Start: stringBuilder.AppendFormat("<span class='{0}' id='{1}'>", tag.Type, tag.Anchor); break; case Sdl.LanguagePlatform.Core.TagType.End: stringBuilder.AppendFormat("</span>"); break; case Sdl.LanguagePlatform.Core.TagType.Standalone: case Sdl.LanguagePlatform.Core.TagType.TextPlaceholder: case Sdl.LanguagePlatform.Core.TagType.LockedContent: //stringBuilder.AppendFormat("<span class='{0}' id='{1}'></span>",=tag.Type, tag.Anchor); stringBuilder.AppendFormat("<span class='{0}' id='{1}'></span> ", tag.Type, tag.Anchor); break; default: break; } } } } return(stringBuilder.ToString()); }
// a naive tokenizer implementation to treat a single text run as one token. private List <Token> Tokenize(Segment segment) { var tokens = new List <Token>(); var run = 0; foreach (var element in segment.Elements) { var text = element as Text; if (text == null || string.IsNullOrEmpty(text.Value)) { continue; } var token = new global::Sdl.LanguagePlatform.Core.Tokenization.SimpleToken(text.Value) { Span = new SegmentRange(run, 0, text.Value.Length - 1) }; tokens.Add(token); run++; } return(tokens); }
public SearchResults SearchText(SearchSettings settings, string segment) { Segment s = new Sdl.LanguagePlatform.Core.Segment(_languageDirection.SourceCulture); s.Add(segment); return SearchSegment(settings, s); }
/// <summary> /// Performs the actual search by looping through the /// delimited segment pairs contained in the text file. /// Depening on the search mode, a segment lookup (with exact machting) or a source / target /// concordance search is done. /// </summary> /// <param name="settings"></param> /// <param name="segment"></param> /// <returns></returns> #region "SearchSegment" public SearchResults SearchSegment(SearchSettings settings, Segment segment) { // Loop through segment elements to 'filter out' e.g. tags in order to // make certain that only plain text information is retrieved for // this simplified implementation. #region "SegmentElements" _visitor.Reset(); foreach (var element in segment.Elements) { element.AcceptSegmentElementVisitor(_visitor); } #endregion #region "SearchResultsObject" var results = new SearchResults(); results.SourceSegment = segment.Duplicate(); #endregion #region | Taus.TM.Provider | var tausTmProvider = new Processor(); var searchSettings = new Sdl.Community.Taus.TM.Provider.Settings.SearchSettings { Timeout = Convert.ToInt32(_options.SearchTimeout), AppKey = _options.ConnectionAppKey, UserName = _options.ConnectionUserName, Password = _options.ConnectionUserPassword, AuthKey = _options.ConnectionAuthKey, Limit = settings.MaxResults }; searchSettings.SearchSections.Add(new SegmentSection(true, _visitor.PlainText)); if (settings.Mode == SearchMode.TargetConcordanceSearch) { searchSettings.SourceLanguageId = _languageDirection.TargetCultureName; searchSettings.TargetLanguageId = _languageDirection.SourceCultureName; } else { searchSettings.SourceLanguageId = _languageDirection.SourceCultureName; searchSettings.TargetLanguageId = _languageDirection.TargetCultureName; } searchSettings.IndustryId = Convert.ToInt64(_options.SearchCriteriaIndustryId) > 0 ? _options.SearchCriteriaIndustryId : string.Empty; searchSettings.ContentTypeId = Convert.ToInt64(_options.SearchCriteriaContentTypeId) > 0 ? _options.SearchCriteriaContentTypeId : string.Empty; searchSettings.ProviderId = Convert.ToInt64(_options.SearchCriteriaProviderId) > 0 ? _options.SearchCriteriaProviderId : string.Empty; searchSettings.OwnerId = Convert.ToInt64(_options.SearchCriteriaOwnerId) > 0 ? _options.SearchCriteriaOwnerId : string.Empty; searchSettings.ProductId = Convert.ToInt64(_options.SearchCriteriaProductId) > 0 ? _options.SearchCriteriaProductId : string.Empty; searchSettings.PenaltySettings.MissingFormattingPenalty = 1; searchSettings.PenaltySettings.DifferentFormattingPenalty = 1; var scoreType = Processor.ScoreType.Concordance; if (settings.Mode == SearchMode.NormalSearch) { scoreType = Processor.ScoreType.Lookup; } var searchResult = tausTmProvider.SearchSegment(searchSettings, scoreType); #endregion switch (searchResult.Status) { case "timed out": //ignore break; case "200": foreach (var searchResultSegment in searchResult.Segments) { #region "TargetConcordanceSearch" switch (settings.Mode) { case SearchMode.TargetConcordanceSearch: { var resultSegment = new Sdl.Community.Taus.TM.Provider.Segment.Segment(); resultSegment.ContentType = searchResultSegment.ContentType; resultSegment.Id = searchResultSegment.Id; resultSegment.Industry = searchResultSegment.Industry; resultSegment.MatchPercentage = searchResultSegment.MatchPercentage; resultSegment.Owner = searchResultSegment.Owner; resultSegment.Product = searchResultSegment.Product; resultSegment.Provider = searchResultSegment.Provider; resultSegment.SourceLanguage = searchResultSegment.TargetLanguage; resultSegment.TargetLanguage = searchResultSegment.SourceLanguage; resultSegment.SourceSections = searchResultSegment.TargetSections; resultSegment.TargetSections = searchResultSegment.SourceSections; resultSegment.SourceText = searchResultSegment.TargetText; resultSegment.TargetText = searchResultSegment.SourceText; var result = CreateSearchResult(settings, segment, resultSegment, _visitor.PlainText, false); if (result != null) { results.Add(result); } } break; case SearchMode.ConcordanceSearch: { var result = CreateSearchResult(settings, segment, searchResultSegment, _visitor.PlainText, false); if (result != null) { results.Add(result); } } break; case SearchMode.NormalSearch: { var result = CreateSearchResult(settings, segment, searchResultSegment, _visitor.PlainText, segment.HasTags); if (result != null) { results.Add(result); } } break; } #endregion } break; default: throw new Exception(string.Format("Query Exception: Status={0}, Reason={1}", searchResult.Status, searchResult.Reason)); } return(results); }
/// <summary> /// Creates the translation unit as it is later shown in the Translation Results /// window of SDL Trados Studio. This member also determines the match score /// (in our implementation always 100%, as only exact matches are supported) /// as well as the confirmation lelvel, i.e. Translated. /// </summary> /// <param name="settings"></param> /// <param name="searchSegment"></param> /// <param name="searchResultSegment"></param> /// <param name="sourceSegment"></param> /// <param name="formattingPenalty"></param> /// <returns></returns> #region "CreateSearchResult" private SearchResult CreateSearchResult(SearchSettings settings, Segment searchSegment, Sdl.Community.Taus.TM.Provider.Segment.Segment searchResultSegment, string sourceSegment, bool formattingPenalty) { #region "TranslationUnit" var tu = new TranslationUnit(); var searchSegmentSource = new Segment(); var searchSegmentTarget = new Segment(); searchSegmentSource.Add(searchResultSegment.SourceText); searchSegmentTarget.Add(searchResultSegment.TargetText); tu.SourceSegment = searchSegmentSource; tu.TargetSegment = searchSegmentTarget; tu.ResourceId = new PersistentObjectToken(tu.GetHashCode(), Guid.Empty); #endregion #region "TuProperties" var score = Convert.ToInt32(searchResultSegment.MatchPercentage); tu.Origin = TranslationUnitOrigin.TM; #region | properties | if (searchResultSegment.Industry.Name.Trim() != string.Empty) { var fieldValue = new MultiplePicklistFieldValue("industry"); fieldValue.Add(searchResultSegment.Industry.Name + " [" + searchResultSegment.Industry.Id + "]"); tu.FieldValues.Add(fieldValue); } if (searchResultSegment.ContentType.Name.Trim() != string.Empty) { var fieldValue = new MultiplePicklistFieldValue("contentType"); fieldValue.Add(searchResultSegment.ContentType.Name + " [" + searchResultSegment.ContentType.Id + "]"); tu.FieldValues.Add(fieldValue); } if (searchResultSegment.Owner.Name.Trim() != string.Empty) { var fieldValue = new MultiplePicklistFieldValue("owner"); fieldValue.Add(searchResultSegment.Owner.Name + " [" + searchResultSegment.Owner.Id + "]"); tu.FieldValues.Add(fieldValue); } if (searchResultSegment.Product.Name.Trim() != string.Empty) { var fieldValue = new MultiplePicklistFieldValue("product"); fieldValue.Add(searchResultSegment.Product.Name + " [" + searchResultSegment.Product.Id + "]"); tu.FieldValues.Add(fieldValue); } if (searchResultSegment.Provider.Name.Trim() != string.Empty) { var fieldValue = new MultiplePicklistFieldValue("provider"); fieldValue.Add(searchResultSegment.Provider.Name + " [" + searchResultSegment.Provider.Id + "]"); tu.FieldValues.Add(fieldValue); } #endregion var searchResult = new SearchResult(tu) { ScoringResult = new ScoringResult { BaseScore = score } }; if (settings.Mode == SearchMode.ConcordanceSearch) { searchSegmentSource.Tokens = Tokenize(searchSegmentSource); searchResult.ScoringResult.MatchingConcordanceRanges = CollectConcordanceMatchRanges(searchSegmentSource, _visitor.PlainText); } else if (settings.Mode == SearchMode.TargetConcordanceSearch) { searchSegmentTarget.Tokens = Tokenize(searchSegmentTarget); searchResult.ScoringResult.MatchingConcordanceRanges = CollectConcordanceMatchRanges(searchSegmentTarget, _visitor.PlainText); } var providerPenalty = settings.FindPenalty(PenaltyType.ProviderPenalty); if (providerPenalty != null && providerPenalty.Malus > 0) { var penalty = new Penalty(PenaltyType.ProviderPenalty, settings.FindPenalty(PenaltyType.ProviderPenalty).Malus); searchResult.ScoringResult.ApplyPenalty(penalty); } if (searchResult.ScoringResult.BaseScore >= settings.MinScore) { searchResult.TranslationProposal = searchResult.MemoryTranslationUnit; if (searchSegment.HasTags) { #region "Draft" tu.ConfirmationLevel = ConfirmationLevel.Draft; #endregion #region "FormattingPenalty" var penalty = new Penalty(PenaltyType.MemoryTagsDeleted, settings.FindPenalty(PenaltyType.MemoryTagsDeleted).Malus); searchResult.ScoringResult.ApplyPenalty(penalty); #endregion } else { tu.ConfirmationLevel = ConfirmationLevel.Translated; } } else { searchResult = null; } #endregion return(searchResult); }
private List <SegmentRange> CollectConcordanceMatchRanges(Segment segment, string searchString) { var words = GetListOfSimpleWords(searchString); var concordanceMatchRanges = new List <SegmentRange>(); foreach (var word in words) { var run = 0; var searchLength = word.Length; var wordBuilder = string.Empty; foreach (var element in segment.Elements) { var text = element as Text; if (text == null || string.IsNullOrEmpty(text.Value)) { continue; } var index = text.Value.IndexOf(word, StringComparison.OrdinalIgnoreCase); while (index >= 0 && index < text.Value.Length) { var segmentRange = new SegmentRange(run, index, index + searchLength - 1); #region | test boundry | var prefixBoundry = true; //test that the beginning is a boundry character if (index > 0) { var c = Convert.ToChar(text.Value.Substring(index - 1, 1)); if (!char.IsWhiteSpace(c) && !char.IsPunctuation(c)) { prefixBoundry = false; } } else if (wordBuilder != string.Empty) { var c = Convert.ToChar(wordBuilder.Substring(wordBuilder.Length - 1)); if (!char.IsWhiteSpace(c) && !char.IsPunctuation(c)) { prefixBoundry = false; } } var suffixBountry = true; if (index + searchLength + 1 < text.Value.Length) { var c = Convert.ToChar(text.Value.Substring(index + searchLength, 1)); if (!char.IsWhiteSpace(c) && !char.IsPunctuation(c)) { suffixBountry = false; } } #endregion if (prefixBoundry && suffixBountry) { concordanceMatchRanges.Add(segmentRange); } index += searchLength; if (index < text.Value.Length) { index = text.Value.IndexOf(word, index, StringComparison.OrdinalIgnoreCase); } } run++; wordBuilder += text.Value; } } return(concordanceMatchRanges); }
static Sdl.LanguagePlatform.Core.Segment TranslatedHtml2Segment(Sdl.LanguagePlatform.Core.Segment sourceSegment, string translatedText) { var htmlTagName = "span"; // the only we feed for translation is span, so we expect the translation only has span tags too. var xmlFragment = "<segment>" + translatedText + "</segment>"; var xmlReader = new System.Xml.XmlTextReader(xmlFragment, System.Xml.XmlNodeType.Element, null); var tagStack = new Stack <Sdl.LanguagePlatform.Core.Tag>(); var translatedSegment = new Sdl.LanguagePlatform.Core.Segment(); try { while (xmlReader.Read()) { switch (xmlReader.NodeType) { case System.Xml.XmlNodeType.Element: if (xmlReader.Name == htmlTagName) { var tagClass = xmlReader.GetAttribute("class"); var tagType = (Sdl.LanguagePlatform.Core.TagType) Enum.Parse(typeof(Sdl.LanguagePlatform.Core.TagType), tagClass); int id = Convert.ToInt32(xmlReader.GetAttribute("id")); Sdl.LanguagePlatform.Core.Tag sourceTag = sourceSegment.FindTag(tagType, id); if (tagType != Sdl.LanguagePlatform.Core.TagType.Standalone && !xmlReader.IsEmptyElement) { tagStack.Push(sourceTag); } translatedSegment.Add(sourceTag.Duplicate()); if (tagType != Sdl.LanguagePlatform.Core.TagType.Standalone && xmlReader.IsEmptyElement) // the API translated <span></span> to <span/> (it does that if the tag is empty). // must fetch the end tag as there is no EndElement to triger the next case block. { var endTag = sourceSegment.FindTag(Sdl.LanguagePlatform.Core.TagType.End, id); translatedSegment.Add(endTag.Duplicate()); } } break; case System.Xml.XmlNodeType.EndElement: { if (xmlReader.Name == htmlTagName) { var startTag = tagStack.Pop(); if (startTag.Type != Sdl.LanguagePlatform.Core.TagType.Standalone) { var endTag = sourceSegment.FindTag( Sdl.LanguagePlatform.Core.TagType.End, startTag.Anchor); if (endTag != null) { translatedSegment.Add(endTag.Duplicate()); } } } } break; case System.Xml.XmlNodeType.Text: translatedSegment.Add(xmlReader.Value); break; case System.Xml.XmlNodeType.Whitespace: translatedSegment.Add(xmlReader.Value); break; default: break; } } } catch (Exception) { var paintextSegment = new Sdl.LanguagePlatform.Core.Segment(); string plaitext = Regex.Replace(translatedText, "<[^>]+>", ""); paintextSegment.Add(plaitext); return(paintextSegment); } return(translatedSegment); }