Example #1
0
        public SearchResults SearchText(SearchSettings settings, string segment)
        {
            Segment s = new Sdl.LanguagePlatform.Core.Segment(_languageDirection.SourceCulture);

            s.Add(segment);
            return(SearchSegment(settings, s));
        }
Example #2
0
        public SearchResults SearchText(SearchSettings settings, string segment)
        {
            var s = new Segment(_languageDirection.SourceCulture);

            s.Add(segment);
            return(SearchSegment(settings, s));
        }
        static string Segment2Html(Sdl.LanguagePlatform.Core.Segment segment)
        {
            var stringBuilder = new StringBuilder();

            foreach (var element in segment.Elements)
            {
                var text = element as Sdl.LanguagePlatform.Core.Text;
                if (text != null)
                {
                    stringBuilder.Append(text.Value.XmlEscape());
                }
                else
                {
                    var tag = element as Sdl.LanguagePlatform.Core.Tag;
                    if (tag != null)
                    {
                        switch (tag.Type)
                        {
                        case Sdl.LanguagePlatform.Core.TagType.Start:
                            stringBuilder.AppendFormat("<span class='{0}' id='{1}'>",
                                                       tag.Type, tag.Anchor);
                            break;

                        case Sdl.LanguagePlatform.Core.TagType.End:
                            stringBuilder.AppendFormat("</span>");
                            break;

                        case Sdl.LanguagePlatform.Core.TagType.Standalone:
                        case Sdl.LanguagePlatform.Core.TagType.TextPlaceholder:
                        case Sdl.LanguagePlatform.Core.TagType.LockedContent:
                            //stringBuilder.AppendFormat("<span class='{0}' id='{1}'></span>",=tag.Type, tag.Anchor);
                            stringBuilder.AppendFormat("<span class='{0}' id='{1}'></span> ",
                                                       tag.Type, tag.Anchor);
                            break;

                        default:
                            break;
                        }
                    }
                }
            }
            return(stringBuilder.ToString());
        }
Example #4
0
        // a naive tokenizer implementation to treat a single text run as one token.
        private List <Token> Tokenize(Segment segment)
        {
            var tokens = new List <Token>();
            var run    = 0;

            foreach (var element in segment.Elements)
            {
                var text = element as Text;
                if (text == null || string.IsNullOrEmpty(text.Value))
                {
                    continue;
                }
                var token = new global::Sdl.LanguagePlatform.Core.Tokenization.SimpleToken(text.Value)
                {
                    Span = new SegmentRange(run, 0, text.Value.Length - 1)
                };
                tokens.Add(token);
                run++;
            }
            return(tokens);
        }
 public SearchResults SearchText(SearchSettings settings, string segment)
 {
     Segment s = new Sdl.LanguagePlatform.Core.Segment(_languageDirection.SourceCulture);
     s.Add(segment);
     return SearchSegment(settings, s);
 }
Example #6
0
        /// <summary>
        /// Performs the actual search by looping through the
        /// delimited segment pairs contained in the text file.
        /// Depening on the search mode, a segment lookup (with exact machting) or a source / target
        /// concordance search is done.
        /// </summary>
        /// <param name="settings"></param>
        /// <param name="segment"></param>
        /// <returns></returns>
        #region "SearchSegment"


        public SearchResults SearchSegment(SearchSettings settings, Segment segment)
        {
            // Loop through segment elements to 'filter out' e.g. tags in order to
            // make certain that only plain text information is retrieved for
            // this simplified implementation.
            #region "SegmentElements"
            _visitor.Reset();
            foreach (var element in segment.Elements)
            {
                element.AcceptSegmentElementVisitor(_visitor);
            }
            #endregion

            #region "SearchResultsObject"
            var results = new SearchResults();

            results.SourceSegment = segment.Duplicate();



            #endregion



            #region  |  Taus.TM.Provider  |


            var tausTmProvider = new Processor();
            var searchSettings = new Sdl.Community.Taus.TM.Provider.Settings.SearchSettings
            {
                Timeout  = Convert.ToInt32(_options.SearchTimeout),
                AppKey   = _options.ConnectionAppKey,
                UserName = _options.ConnectionUserName,
                Password = _options.ConnectionUserPassword,
                AuthKey  = _options.ConnectionAuthKey,
                Limit    = settings.MaxResults
            };



            searchSettings.SearchSections.Add(new SegmentSection(true, _visitor.PlainText));

            if (settings.Mode == SearchMode.TargetConcordanceSearch)
            {
                searchSettings.SourceLanguageId = _languageDirection.TargetCultureName;
                searchSettings.TargetLanguageId = _languageDirection.SourceCultureName;
            }
            else
            {
                searchSettings.SourceLanguageId = _languageDirection.SourceCultureName;
                searchSettings.TargetLanguageId = _languageDirection.TargetCultureName;
            }



            searchSettings.IndustryId    = Convert.ToInt64(_options.SearchCriteriaIndustryId) > 0 ? _options.SearchCriteriaIndustryId : string.Empty;
            searchSettings.ContentTypeId = Convert.ToInt64(_options.SearchCriteriaContentTypeId) > 0 ? _options.SearchCriteriaContentTypeId : string.Empty;
            searchSettings.ProviderId    = Convert.ToInt64(_options.SearchCriteriaProviderId) > 0 ? _options.SearchCriteriaProviderId : string.Empty;
            searchSettings.OwnerId       = Convert.ToInt64(_options.SearchCriteriaOwnerId) > 0 ? _options.SearchCriteriaOwnerId : string.Empty;
            searchSettings.ProductId     = Convert.ToInt64(_options.SearchCriteriaProductId) > 0 ? _options.SearchCriteriaProductId : string.Empty;


            searchSettings.PenaltySettings.MissingFormattingPenalty   = 1;
            searchSettings.PenaltySettings.DifferentFormattingPenalty = 1;


            var scoreType = Processor.ScoreType.Concordance;
            if (settings.Mode == SearchMode.NormalSearch)
            {
                scoreType = Processor.ScoreType.Lookup;
            }

            var searchResult = tausTmProvider.SearchSegment(searchSettings, scoreType);

            #endregion


            switch (searchResult.Status)
            {
            case "timed out":
                //ignore
                break;

            case "200":
                foreach (var searchResultSegment in searchResult.Segments)
                {
                    #region "TargetConcordanceSearch"
                    switch (settings.Mode)
                    {
                    case SearchMode.TargetConcordanceSearch:
                    {
                        var resultSegment = new Sdl.Community.Taus.TM.Provider.Segment.Segment();

                        resultSegment.ContentType     = searchResultSegment.ContentType;
                        resultSegment.Id              = searchResultSegment.Id;
                        resultSegment.Industry        = searchResultSegment.Industry;
                        resultSegment.MatchPercentage = searchResultSegment.MatchPercentage;
                        resultSegment.Owner           = searchResultSegment.Owner;
                        resultSegment.Product         = searchResultSegment.Product;
                        resultSegment.Provider        = searchResultSegment.Provider;

                        resultSegment.SourceLanguage = searchResultSegment.TargetLanguage;
                        resultSegment.TargetLanguage = searchResultSegment.SourceLanguage;


                        resultSegment.SourceSections = searchResultSegment.TargetSections;
                        resultSegment.TargetSections = searchResultSegment.SourceSections;


                        resultSegment.SourceText = searchResultSegment.TargetText;
                        resultSegment.TargetText = searchResultSegment.SourceText;


                        var result = CreateSearchResult(settings, segment, resultSegment, _visitor.PlainText, false);
                        if (result != null)
                        {
                            results.Add(result);
                        }
                    }
                    break;

                    case SearchMode.ConcordanceSearch:
                    {
                        var result = CreateSearchResult(settings, segment, searchResultSegment, _visitor.PlainText, false);
                        if (result != null)
                        {
                            results.Add(result);
                        }
                    }
                    break;

                    case SearchMode.NormalSearch:
                    {
                        var result = CreateSearchResult(settings, segment, searchResultSegment, _visitor.PlainText, segment.HasTags);
                        if (result != null)
                        {
                            results.Add(result);
                        }
                    }
                    break;
                    }
                    #endregion
                }
                break;

            default:
                throw new Exception(string.Format("Query Exception: Status={0}, Reason={1}", searchResult.Status,
                                                  searchResult.Reason));
            }


            return(results);
        }
Example #7
0
        /// <summary>
        /// Creates the translation unit as it is later shown in the Translation Results
        /// window of SDL Trados Studio. This member also determines the match score
        /// (in our implementation always 100%, as only exact matches are supported)
        /// as well as the confirmation lelvel, i.e. Translated.
        /// </summary>
        /// <param name="settings"></param>
        /// <param name="searchSegment"></param>
        /// <param name="searchResultSegment"></param>
        /// <param name="sourceSegment"></param>
        /// <param name="formattingPenalty"></param>
        /// <returns></returns>

        #region "CreateSearchResult"
        private SearchResult CreateSearchResult(SearchSettings settings, Segment searchSegment, Sdl.Community.Taus.TM.Provider.Segment.Segment searchResultSegment,
                                                string sourceSegment, bool formattingPenalty)
        {
            #region "TranslationUnit"
            var tu = new TranslationUnit();



            var searchSegmentSource = new Segment();
            var searchSegmentTarget = new Segment();



            searchSegmentSource.Add(searchResultSegment.SourceText);
            searchSegmentTarget.Add(searchResultSegment.TargetText);


            tu.SourceSegment = searchSegmentSource;
            tu.TargetSegment = searchSegmentTarget;

            tu.ResourceId = new PersistentObjectToken(tu.GetHashCode(), Guid.Empty);

            #endregion


            #region "TuProperties"
            var score = Convert.ToInt32(searchResultSegment.MatchPercentage);
            tu.Origin = TranslationUnitOrigin.TM;

            #region  |  properties  |
            if (searchResultSegment.Industry.Name.Trim() != string.Empty)
            {
                var fieldValue = new MultiplePicklistFieldValue("industry");
                fieldValue.Add(searchResultSegment.Industry.Name + " [" + searchResultSegment.Industry.Id + "]");
                tu.FieldValues.Add(fieldValue);
            }
            if (searchResultSegment.ContentType.Name.Trim() != string.Empty)
            {
                var fieldValue = new MultiplePicklistFieldValue("contentType");
                fieldValue.Add(searchResultSegment.ContentType.Name + " [" + searchResultSegment.ContentType.Id + "]");
                tu.FieldValues.Add(fieldValue);
            }
            if (searchResultSegment.Owner.Name.Trim() != string.Empty)
            {
                var fieldValue = new MultiplePicklistFieldValue("owner");
                fieldValue.Add(searchResultSegment.Owner.Name + " [" + searchResultSegment.Owner.Id + "]");
                tu.FieldValues.Add(fieldValue);
            }
            if (searchResultSegment.Product.Name.Trim() != string.Empty)
            {
                var fieldValue = new MultiplePicklistFieldValue("product");
                fieldValue.Add(searchResultSegment.Product.Name + " [" + searchResultSegment.Product.Id + "]");
                tu.FieldValues.Add(fieldValue);
            }
            if (searchResultSegment.Provider.Name.Trim() != string.Empty)
            {
                var fieldValue = new MultiplePicklistFieldValue("provider");
                fieldValue.Add(searchResultSegment.Provider.Name + " [" + searchResultSegment.Provider.Id + "]");
                tu.FieldValues.Add(fieldValue);
            }

            #endregion



            var searchResult = new SearchResult(tu)
            {
                ScoringResult = new ScoringResult {
                    BaseScore = score
                }
            };


            if (settings.Mode == SearchMode.ConcordanceSearch)
            {
                searchSegmentSource.Tokens = Tokenize(searchSegmentSource);
                searchResult.ScoringResult.MatchingConcordanceRanges = CollectConcordanceMatchRanges(searchSegmentSource, _visitor.PlainText);
            }
            else if (settings.Mode == SearchMode.TargetConcordanceSearch)
            {
                searchSegmentTarget.Tokens = Tokenize(searchSegmentTarget);
                searchResult.ScoringResult.MatchingConcordanceRanges = CollectConcordanceMatchRanges(searchSegmentTarget, _visitor.PlainText);
            }

            var providerPenalty = settings.FindPenalty(PenaltyType.ProviderPenalty);
            if (providerPenalty != null && providerPenalty.Malus > 0)
            {
                var penalty = new Penalty(PenaltyType.ProviderPenalty, settings.FindPenalty(PenaltyType.ProviderPenalty).Malus);
                searchResult.ScoringResult.ApplyPenalty(penalty);
            }

            if (searchResult.ScoringResult.BaseScore >= settings.MinScore)
            {
                searchResult.TranslationProposal = searchResult.MemoryTranslationUnit;

                if (searchSegment.HasTags)
                {
                    #region "Draft"
                    tu.ConfirmationLevel = ConfirmationLevel.Draft;
                    #endregion

                    #region "FormattingPenalty"
                    var penalty = new Penalty(PenaltyType.MemoryTagsDeleted, settings.FindPenalty(PenaltyType.MemoryTagsDeleted).Malus);
                    searchResult.ScoringResult.ApplyPenalty(penalty);
                    #endregion
                }
                else
                {
                    tu.ConfirmationLevel = ConfirmationLevel.Translated;
                }
            }
            else
            {
                searchResult = null;
            }
            #endregion

            return(searchResult);
        }
Example #8
0
        private List <SegmentRange> CollectConcordanceMatchRanges(Segment segment, string searchString)
        {
            var words = GetListOfSimpleWords(searchString);

            var concordanceMatchRanges = new List <SegmentRange>();


            foreach (var word in words)
            {
                var run          = 0;
                var searchLength = word.Length;
                var wordBuilder  = string.Empty;
                foreach (var element in segment.Elements)
                {
                    var text = element as Text;
                    if (text == null || string.IsNullOrEmpty(text.Value))
                    {
                        continue;
                    }
                    var index = text.Value.IndexOf(word, StringComparison.OrdinalIgnoreCase);

                    while (index >= 0 && index < text.Value.Length)
                    {
                        var segmentRange = new SegmentRange(run, index, index + searchLength - 1);

                        #region  |  test boundry  |
                        var prefixBoundry = true;
                        //test that the beginning is a boundry character
                        if (index > 0)
                        {
                            var c = Convert.ToChar(text.Value.Substring(index - 1, 1));
                            if (!char.IsWhiteSpace(c) &&
                                !char.IsPunctuation(c))
                            {
                                prefixBoundry = false;
                            }
                        }
                        else if (wordBuilder != string.Empty)
                        {
                            var c = Convert.ToChar(wordBuilder.Substring(wordBuilder.Length - 1));
                            if (!char.IsWhiteSpace(c) &&
                                !char.IsPunctuation(c))
                            {
                                prefixBoundry = false;
                            }
                        }

                        var suffixBountry = true;
                        if (index + searchLength + 1 < text.Value.Length)
                        {
                            var c = Convert.ToChar(text.Value.Substring(index + searchLength, 1));
                            if (!char.IsWhiteSpace(c) &&
                                !char.IsPunctuation(c))
                            {
                                suffixBountry = false;
                            }
                        }
                        #endregion
                        if (prefixBoundry && suffixBountry)
                        {
                            concordanceMatchRanges.Add(segmentRange);
                        }

                        index += searchLength;
                        if (index < text.Value.Length)
                        {
                            index = text.Value.IndexOf(word, index, StringComparison.OrdinalIgnoreCase);
                        }
                    }

                    run++;

                    wordBuilder += text.Value;
                }
            }
            return(concordanceMatchRanges);
        }
        static Sdl.LanguagePlatform.Core.Segment TranslatedHtml2Segment(Sdl.LanguagePlatform.Core.Segment sourceSegment, string translatedText)
        {
            var htmlTagName       = "span"; // the only we feed for translation is span, so we expect the translation only has span tags too.
            var xmlFragment       = "<segment>" + translatedText + "</segment>";
            var xmlReader         = new System.Xml.XmlTextReader(xmlFragment, System.Xml.XmlNodeType.Element, null);
            var tagStack          = new Stack <Sdl.LanguagePlatform.Core.Tag>();
            var translatedSegment = new Sdl.LanguagePlatform.Core.Segment();

            try
            {
                while (xmlReader.Read())
                {
                    switch (xmlReader.NodeType)
                    {
                    case System.Xml.XmlNodeType.Element:
                        if (xmlReader.Name == htmlTagName)
                        {
                            var tagClass = xmlReader.GetAttribute("class");
                            var tagType  = (Sdl.LanguagePlatform.Core.TagType)
                                           Enum.Parse(typeof(Sdl.LanguagePlatform.Core.TagType), tagClass);
                            int id = Convert.ToInt32(xmlReader.GetAttribute("id"));
                            Sdl.LanguagePlatform.Core.Tag sourceTag = sourceSegment.FindTag(tagType, id);
                            if (tagType != Sdl.LanguagePlatform.Core.TagType.Standalone && !xmlReader.IsEmptyElement)
                            {
                                tagStack.Push(sourceTag);
                            }
                            translatedSegment.Add(sourceTag.Duplicate());
                            if (tagType != Sdl.LanguagePlatform.Core.TagType.Standalone && xmlReader.IsEmptyElement)
                            // the API translated <span></span> to <span/> (it does that if the tag is empty).
                            // must fetch the end tag as there is no EndElement to triger the next case block.
                            {
                                var endTag = sourceSegment.FindTag(Sdl.LanguagePlatform.Core.TagType.End, id);
                                translatedSegment.Add(endTag.Duplicate());
                            }
                        }
                        break;

                    case System.Xml.XmlNodeType.EndElement:
                    {
                        if (xmlReader.Name == htmlTagName)
                        {
                            var startTag = tagStack.Pop();
                            if (startTag.Type != Sdl.LanguagePlatform.Core.TagType.Standalone)
                            {
                                var endTag = sourceSegment.FindTag(
                                    Sdl.LanguagePlatform.Core.TagType.End, startTag.Anchor);
                                if (endTag != null)
                                {
                                    translatedSegment.Add(endTag.Duplicate());
                                }
                            }
                        }
                    }
                    break;

                    case System.Xml.XmlNodeType.Text:
                        translatedSegment.Add(xmlReader.Value);
                        break;

                    case System.Xml.XmlNodeType.Whitespace:
                        translatedSegment.Add(xmlReader.Value);
                        break;

                    default:
                        break;
                    }
                }
            }
            catch (Exception)
            {
                var    paintextSegment = new Sdl.LanguagePlatform.Core.Segment();
                string plaitext        = Regex.Replace(translatedText, "<[^>]+>", "");
                paintextSegment.Add(plaitext);
                return(paintextSegment);
            }

            return(translatedSegment);
        }