/// <summary> /// Attempts to identify the locations of (substrings of) the search segment in the result segment. /// Typically, the search segment is a single search string (such as a concordance search string), /// and the result segment is a transation unit segment. /// </summary> /// <param name="search">The search text.</param> /// <param name="text">The text segment, usually a sentence or larger block of text.</param> /// <param name="culture">The culture the search and text belong to.</param> /// <returns>The locations and other information about the coverage of the search segment /// in the result segment.</returns> public static TermFinderResult FindTerms(string search, string text, System.Globalization.CultureInfo culture, bool expectContinuousMatch) { if (String.IsNullOrEmpty(search)) { throw new ArgumentNullException("search"); } if (String.IsNullOrEmpty(text)) { throw new ArgumentNullException("text"); } if (culture == null) { throw new ArgumentNullException("culture"); } Core.Segment searchSegment = new Sdl.LanguagePlatform.Core.Segment(culture); Core.Segment textSegment = new Sdl.LanguagePlatform.Core.Segment(culture); searchSegment.Add(search); textSegment.Add(text); Lingua.Tokenization.Tokenizer t = new Sdl.LanguagePlatform.Lingua.Tokenization.Tokenizer(Lingua.Tokenization.TokenizerSetupFactory.Create(culture)); searchSegment.Tokens = t.Tokenize(searchSegment); textSegment.Tokens = t.Tokenize(textSegment); return(FindTerms(searchSegment, textSegment, expectContinuousMatch)); }
// process the segment; individuating the content text, tags & comments public void ProcessSegment(ISegment segment, bool includeTagText) { TagCounter = 0; Segment = new Sdl.LanguagePlatform.Core.Segment(); PlainText = new StringBuilder(string.Empty); SegmentSections = new List <SegmentSection>(); Comments = new List <Comment>(); TagPairs = new List <ITagPair>(); LockedContentTags = new List <ILockedContent>(); PlaceholderTags = new List <IPlaceholderTag>(); TagUnits = new List <TagUnit>(); IncludeTagText = includeTagText; VisitChildren(segment); }
public static string ToHtml(this Sdl.LanguagePlatform.Core.Segment segment) { var stringBuilder = new StringBuilder(); foreach (var element in segment.Elements) { var text = element as Sdl.LanguagePlatform.Core.Text; if (text != null) { stringBuilder.Append(text.Value.XmlEscape()); } else { var tag = element as Sdl.LanguagePlatform.Core.Tag; if (tag != null) { switch (tag.Type) { case Sdl.LanguagePlatform.Core.TagType.Start: stringBuilder.AppendFormat("<span class='{0}' id='{1}'>", tag.Type, tag.Anchor); break; case Sdl.LanguagePlatform.Core.TagType.End: stringBuilder.AppendFormat("</span>"); break; case Sdl.LanguagePlatform.Core.TagType.Standalone: case Sdl.LanguagePlatform.Core.TagType.TextPlaceholder: case Sdl.LanguagePlatform.Core.TagType.LockedContent: //stringBuilder.AppendFormat("<span class='{0}' id='{1}'></span>",=tag.Type, tag.Anchor); stringBuilder.AppendFormat("<span class='{0}' id='{1}'></span> ", tag.Type, tag.Anchor); break; default: break; } } } } return(stringBuilder.ToString()); }
public static Sdl.LanguagePlatform.Core.Segment ToSegment(this string translatedText, Sdl.LanguagePlatform.Core.Segment sourceSegment) { var htmlTagName = "span"; // the only we feed for translation is span, so we expect the translation only has span tags too. var xmlFragment = "<segment>" + translatedText + "</segment>"; var xmlReader = new System.Xml.XmlTextReader(xmlFragment, System.Xml.XmlNodeType.Element, null); var tagStack = new Stack <Sdl.LanguagePlatform.Core.Tag>(); var translatedSegment = new Sdl.LanguagePlatform.Core.Segment(); try { while (xmlReader.Read()) { switch (xmlReader.NodeType) { case System.Xml.XmlNodeType.Element: if (xmlReader.Name == htmlTagName) { var tagClass = xmlReader.GetAttribute("class"); var tagType = (Sdl.LanguagePlatform.Core.TagType) Enum.Parse(typeof(Sdl.LanguagePlatform.Core.TagType), tagClass); int id = Convert.ToInt32(xmlReader.GetAttribute("id")); Sdl.LanguagePlatform.Core.Tag sourceTag = sourceSegment.FindTag(tagType, id); if (tagType != Sdl.LanguagePlatform.Core.TagType.Standalone && !xmlReader.IsEmptyElement) { tagStack.Push(sourceTag); } translatedSegment.Add(sourceTag.Duplicate()); if (tagType != Sdl.LanguagePlatform.Core.TagType.Standalone && xmlReader.IsEmptyElement) // the API translated <span></span> to <span/> (it does that if the tag is empty). // must fetch the end tag as there is no EndElement to triger the next case block. { var endTag = sourceSegment.FindTag(Sdl.LanguagePlatform.Core.TagType.End, id); translatedSegment.Add(endTag.Duplicate()); } } break; case System.Xml.XmlNodeType.EndElement: { if (xmlReader.Name == htmlTagName) { var startTag = tagStack.Pop(); if (startTag.Type != Sdl.LanguagePlatform.Core.TagType.Standalone) { var endTag = sourceSegment.FindTag( Sdl.LanguagePlatform.Core.TagType.End, startTag.Anchor); if (endTag != null) { translatedSegment.Add(endTag.Duplicate()); } } } } break; case System.Xml.XmlNodeType.Text: translatedSegment.Add(xmlReader.Value); break; case System.Xml.XmlNodeType.Whitespace: translatedSegment.Add(xmlReader.Value); break; default: break; } } } catch (Exception) { var paintextSegment = new Sdl.LanguagePlatform.Core.Segment(); string plaitext = Regex.Replace(translatedText, "<[^>]+>", ""); paintextSegment.Add(plaitext); return(paintextSegment); } return(translatedSegment); }