예제 #1
0
        /// <summary>
        /// Attempts to identify the locations of (substrings of) the search segment in the result segment.
        /// Typically, the search segment is a single search string (such as a concordance search string),
        /// and the result segment is a transation unit segment.
        /// </summary>
        /// <param name="search">The search text.</param>
        /// <param name="text">The text segment, usually a sentence or larger block of text.</param>
        /// <param name="culture">The culture the search and text belong to.</param>
        /// <returns>The locations and other information about the coverage of the search segment
        /// in the result segment.</returns>
        public static TermFinderResult FindTerms(string search,
                                                 string text,
                                                 System.Globalization.CultureInfo culture,
                                                 bool expectContinuousMatch)
        {
            if (String.IsNullOrEmpty(search))
            {
                throw new ArgumentNullException("search");
            }
            if (String.IsNullOrEmpty(text))
            {
                throw new ArgumentNullException("text");
            }
            if (culture == null)
            {
                throw new ArgumentNullException("culture");
            }

            Core.Segment searchSegment = new Sdl.LanguagePlatform.Core.Segment(culture);
            Core.Segment textSegment   = new Sdl.LanguagePlatform.Core.Segment(culture);

            searchSegment.Add(search);
            textSegment.Add(text);

            Lingua.Tokenization.Tokenizer t =
                new Sdl.LanguagePlatform.Lingua.Tokenization.Tokenizer(Lingua.Tokenization.TokenizerSetupFactory.Create(culture));

            searchSegment.Tokens = t.Tokenize(searchSegment);
            textSegment.Tokens   = t.Tokenize(textSegment);

            return(FindTerms(searchSegment, textSegment, expectContinuousMatch));
        }
예제 #2
0
        // process the segment; individuating the content text, tags & comments
        public void ProcessSegment(ISegment segment, bool includeTagText)
        {
            TagCounter      = 0;
            Segment         = new Sdl.LanguagePlatform.Core.Segment();
            PlainText       = new StringBuilder(string.Empty);
            SegmentSections = new List <SegmentSection>();
            Comments        = new List <Comment>();

            TagPairs          = new List <ITagPair>();
            LockedContentTags = new List <ILockedContent>();
            PlaceholderTags   = new List <IPlaceholderTag>();
            TagUnits          = new List <TagUnit>();

            IncludeTagText = includeTagText;
            VisitChildren(segment);
        }
예제 #3
0
        public static string ToHtml(this Sdl.LanguagePlatform.Core.Segment segment)
        {
            var stringBuilder = new StringBuilder();

            foreach (var element in segment.Elements)
            {
                var text = element as Sdl.LanguagePlatform.Core.Text;
                if (text != null)
                {
                    stringBuilder.Append(text.Value.XmlEscape());
                }
                else
                {
                    var tag = element as Sdl.LanguagePlatform.Core.Tag;
                    if (tag != null)
                    {
                        switch (tag.Type)
                        {
                        case Sdl.LanguagePlatform.Core.TagType.Start:
                            stringBuilder.AppendFormat("<span class='{0}' id='{1}'>",
                                                       tag.Type, tag.Anchor);
                            break;

                        case Sdl.LanguagePlatform.Core.TagType.End:
                            stringBuilder.AppendFormat("</span>");
                            break;

                        case Sdl.LanguagePlatform.Core.TagType.Standalone:
                        case Sdl.LanguagePlatform.Core.TagType.TextPlaceholder:
                        case Sdl.LanguagePlatform.Core.TagType.LockedContent:
                            //stringBuilder.AppendFormat("<span class='{0}' id='{1}'></span>",=tag.Type, tag.Anchor);
                            stringBuilder.AppendFormat("<span class='{0}' id='{1}'></span> ",
                                                       tag.Type, tag.Anchor);
                            break;

                        default:
                            break;
                        }
                    }
                }
            }
            return(stringBuilder.ToString());
        }
예제 #4
0
        public static Sdl.LanguagePlatform.Core.Segment ToSegment(this string translatedText, Sdl.LanguagePlatform.Core.Segment sourceSegment)
        {
            var htmlTagName       = "span"; // the only we feed for translation is span, so we expect the translation only has span tags too.
            var xmlFragment       = "<segment>" + translatedText + "</segment>";
            var xmlReader         = new System.Xml.XmlTextReader(xmlFragment, System.Xml.XmlNodeType.Element, null);
            var tagStack          = new Stack <Sdl.LanguagePlatform.Core.Tag>();
            var translatedSegment = new Sdl.LanguagePlatform.Core.Segment();

            try
            {
                while (xmlReader.Read())
                {
                    switch (xmlReader.NodeType)
                    {
                    case System.Xml.XmlNodeType.Element:
                        if (xmlReader.Name == htmlTagName)
                        {
                            var tagClass = xmlReader.GetAttribute("class");
                            var tagType  = (Sdl.LanguagePlatform.Core.TagType)
                                           Enum.Parse(typeof(Sdl.LanguagePlatform.Core.TagType), tagClass);
                            int id = Convert.ToInt32(xmlReader.GetAttribute("id"));
                            Sdl.LanguagePlatform.Core.Tag sourceTag = sourceSegment.FindTag(tagType, id);
                            if (tagType != Sdl.LanguagePlatform.Core.TagType.Standalone && !xmlReader.IsEmptyElement)
                            {
                                tagStack.Push(sourceTag);
                            }
                            translatedSegment.Add(sourceTag.Duplicate());
                            if (tagType != Sdl.LanguagePlatform.Core.TagType.Standalone && xmlReader.IsEmptyElement)
                            // the API translated <span></span> to <span/> (it does that if the tag is empty).
                            // must fetch the end tag as there is no EndElement to triger the next case block.
                            {
                                var endTag = sourceSegment.FindTag(Sdl.LanguagePlatform.Core.TagType.End, id);
                                translatedSegment.Add(endTag.Duplicate());
                            }
                        }
                        break;

                    case System.Xml.XmlNodeType.EndElement:
                    {
                        if (xmlReader.Name == htmlTagName)
                        {
                            var startTag = tagStack.Pop();
                            if (startTag.Type != Sdl.LanguagePlatform.Core.TagType.Standalone)
                            {
                                var endTag = sourceSegment.FindTag(
                                    Sdl.LanguagePlatform.Core.TagType.End, startTag.Anchor);
                                if (endTag != null)
                                {
                                    translatedSegment.Add(endTag.Duplicate());
                                }
                            }
                        }
                    }
                    break;

                    case System.Xml.XmlNodeType.Text:
                        translatedSegment.Add(xmlReader.Value);
                        break;

                    case System.Xml.XmlNodeType.Whitespace:
                        translatedSegment.Add(xmlReader.Value);
                        break;

                    default:
                        break;
                    }
                }
            }
            catch (Exception)
            {
                var    paintextSegment = new Sdl.LanguagePlatform.Core.Segment();
                string plaitext        = Regex.Replace(translatedText, "<[^>]+>", "");
                paintextSegment.Add(plaitext);
                return(paintextSegment);
            }

            return(translatedSegment);
        }