/// <summary> /// Test the ToLower function. /// Enhance JohnT: should ideally test the case where output is > 10 characters longer than /// input. However, I have not yet been able to find a Unicode character that is IN FACT /// longer when converted to lower case. /// </summary> public void TestToLower() { Assert.AreEqual("abc", Icu.ToLower("ABC", "en")); Assert.AreEqual("abc", Icu.ToLower("abc", "en")); Assert.AreEqual("abc", Icu.ToLower("Abc", "en")); Assert.AreEqual(";,.", Icu.ToLower(";,.", "en")); }
/// <summary> /// This guess factors in the placement of an occurrence in its segment for making other /// decisions like matching lowercase alternatives for sentence initial occurrences. /// </summary> public IAnalysis GetBestGuess(AnalysisOccurrence occurrence) { // first see if we can make a guess based on the lowercase form of a sentence initial (non-lowercase) wordform // TODO: make it look for the first word in the sentence...may not be at Index 0! if (occurrence.Analysis is IWfiWordform && occurrence.Index == 0) { ITsString tssWfBaseline = occurrence.BaselineText; CoreWritingSystemDefinition ws = Cache.ServiceLocator.WritingSystemManager.Get(tssWfBaseline.get_WritingSystemAt(0)); string sLower = Icu.ToLower(tssWfBaseline.Text, ws.IcuLocale); // don't bother looking up the lowercased wordform if the instanceOf is already in lowercase form. if (sLower != tssWfBaseline.Text) { ITsString tssLower = TsStringUtils.MakeString(sLower, TsStringUtils.GetWsAtOffset(tssWfBaseline, 0)); IWfiWordform lowercaseWf; if (Cache.ServiceLocator.GetInstance <IWfiWordformRepository>().TryGetObject(tssLower, out lowercaseWf)) { IAnalysis bestGuess; if (TryGetBestGuess(lowercaseWf, occurrence.BaselineWs, out bestGuess)) { return(bestGuess); } } } } if (occurrence.BaselineWs == -1) { return(null); // happens with empty translation lines } return(GetBestGuess(occurrence.Analysis, occurrence.BaselineWs)); }
internal static void DisplayEntries(FdoCache cache, IWin32Window owner, Mediator mediatorIn, IHelpTopicProvider helpProvider, string helpFileKey, ITsString tssWfIn) { ITsString tssWf = tssWfIn; List <int> rghvo = LexEntryUi.FindEntriesForWordform(cache, tssWf); // if we do not find a match for the word then try converting it to lowercase and see if there // is an entry in the lexicon for the Wordform in lowercase. This is needed for occurences of // words which are capitalized at the beginning of sentences. LT-7444 RickM if (rghvo == null || rghvo.Count == 0) { //We need to be careful when converting to lowercase therefore use Icu.ToLower() //get the WS of the tsString int wsWf = StringUtils.GetWsAtOffset(tssWf, 0); //use that to get the locale for the WS, which is used for string wsLocale = cache.LanguageWritingSystemFactoryAccessor.get_EngineOrNull(wsWf).IcuLocale; string sLower = Icu.ToLower(tssWf.Text, wsLocale); ITsTextProps ttp = tssWf.get_PropertiesAt(0); ITsStrFactory tsf = TsStrFactoryClass.Create(); tssWf = tsf.MakeStringWithPropsRgch(sLower, sLower.Length, ttp); rghvo = LexEntryUi.FindEntriesForWordform(cache, tssWf); } StringTable stOrig; Mediator mediator; IVwStylesheet styleSheet; bool fRestore = EnsureFlexTypeSetup(cache, mediatorIn, out stOrig, out mediator, out styleSheet); if (rghvo == null || rghvo.Count == 0) { int hvoLe = ShowFindEntryDialog(cache, mediator, tssWf, owner); if (hvoLe == 0) { // Restore the original string table in the mediator if needed. if (fRestore) { mediator.StringTbl = stOrig; } return; } rghvo = new List <int>(1); rghvo.Add(hvoLe); } using (SummaryDialogForm form = new SummaryDialogForm(rghvo, tssWf, helpProvider, helpFileKey, styleSheet, cache, mediator)) { form.ShowDialog(owner); if (form.ShouldLink) { form.LinkToLexicon(); } } // Restore the original string table in the mediator if needed. if (fRestore) { mediator.StringTbl = stOrig; } }
/// <summary> /// Get the lead character, either a single character or a composite matching something /// in the sort rules. (We need to support multi-graph letters. See LT-9244.) /// </summary> private string GetLeadChar(string sEntryNFD, string sWs) { if (string.IsNullOrEmpty(sEntryNFD)) { return(""); } string sEntry = Icu.ToLower(sEntryNFD, sWs); Dictionary <string, string> mapChars; Set <string> sortChars = GetDigraphs(sWs, out mapChars); string sEntryT = sEntry; bool fChanged = false; do { foreach (string key in mapChars.Keys) { sEntry = sEntry.Replace(key, mapChars[key]); } fChanged = sEntryT != sEntry; sEntryT = sEntry; } while (fChanged); string sFirst = sEntry.Substring(0, 1); foreach (string sChar in sortChars) { if (sEntry.StartsWith(sChar)) { if (sFirst.Length < sChar.Length) { sFirst = sChar; } } } // We don't want sFirst for an ignored first character or digraph. ILgCollatingEngine lce = LgIcuCollatorClass.Create(); lce.Open(sWs); byte[] ka = (byte[])lce.get_SortKeyVariant(sFirst, LgCollatingOptions.fcoDefault); if (ka[0] == 1) { string sT = sEntry.Substring(sFirst.Length); return(GetLeadChar(sT, sWs)); } return(sFirst); }
/// <summary> /// Convert string to lower case equivalent. /// </summary> /// <param name="input"></param> /// <returns></returns> public string ToLower(string input) { return(Icu.ToLower(input, m_icuLocale)); }
/// <summary> /// Get the set of significant digraphs (multigraphs) for the writing system. At the /// moment, these are derived from ICU sorting rules associated with the writing system. /// </summary> private Set <string> GetDigraphs(string sWs, out Dictionary <string, string> mapChars) { Set <string> digraphs = null; if (m_mapWsDigraphs.TryGetValue(sWs, out digraphs)) { mapChars = m_mapWsMapChars[sWs]; return(digraphs); } digraphs = new Set <string>(); mapChars = new Dictionary <string, string>(); int ws = m_cache.LanguageWritingSystemFactoryAccessor.GetWsFromStr(sWs); IWritingSystem wsX = null; ICollation coll = null; string sIcuRules = null; if (ws > 0) { wsX = m_cache.LanguageWritingSystemFactoryAccessor.get_EngineOrNull(ws); if (wsX.CollationCount > 0) { coll = wsX.get_Collation(0); sIcuRules = coll.IcuRules; if (String.IsNullOrEmpty(sIcuRules)) { // The ICU rules may not be loaded for built-in languages, but are // still helpful for our purposes here. string sIcuOrig = sIcuRules; coll.LoadIcuRules(sWs); sIcuRules = coll.IcuRules; coll.IcuRules = sIcuOrig; // but we don't want to actually change anything! } } } if (!String.IsNullOrEmpty(sIcuRules) && sIcuRules.Contains("&")) { string[] rgsRules = sIcuRules.Split(new char[] { '&' }, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < rgsRules.Length; ++i) { string sRule = rgsRules[i]; // This is a valid rule that specifies that the digraph aa should be ignored // [last tertiary ignorable] = \u02bc = aa // but the code here will ignore this. YAGNI the chances of a user specifying a digraph // as ignorable may never happen. if (sRule.Contains("[")) { sRule = sRule.Substring(0, sRule.IndexOf("[")); } if (String.IsNullOrEmpty(sRule.Trim())) { continue; } sRule = sRule.Replace("<<<", "="); sRule = sRule.Replace("<<", "="); if (sRule.Contains("<")) { // "&N<ng<<<Ng<ny<<<Ny" => "&N<ng=Ng<ny=Ny" // "&N<ñ<<<Ñ" => "&N<ñ=Ñ" // There are other issues we are not handling proplerly such as the next line // &N<\u006e\u0067 string[] rgsPieces = sRule.Split(new char[] { '<', '=' }, StringSplitOptions.RemoveEmptyEntries); for (int j = 0; j < rgsPieces.Length; ++j) { string sGraph = rgsPieces[j]; sGraph = sGraph.Trim(); if (String.IsNullOrEmpty(sGraph)) { continue; } sGraph = Icu.Normalize(sGraph, Icu.UNormalizationMode.UNORM_NFD); if (sGraph.Length > 1) { sGraph = Icu.ToLower(sGraph, sWs); if (!digraphs.Contains(sGraph)) { digraphs.Add(sGraph); } } } } else if (sRule.Contains("=")) { // "&ae<<æ<<<Æ" => "&ae=æ=Æ" string[] rgsPieces = sRule.Split(new char[] { '=' }, StringSplitOptions.RemoveEmptyEntries); string sGraphPrimary = rgsPieces[0].Trim(); Debug.Assert(!String.IsNullOrEmpty(sGraphPrimary)); sGraphPrimary = Icu.ToLower(sGraphPrimary, sWs); for (int j = 1; j < rgsPieces.Length; ++j) { string sGraph = rgsPieces[j]; sGraph = sGraph.Trim(); if (String.IsNullOrEmpty(sGraph)) { continue; } sGraph = Icu.Normalize(sGraph, Icu.UNormalizationMode.UNORM_NFD); sGraph = Icu.ToLower(sGraph, sWs); if (sGraph != sGraphPrimary) { if (!mapChars.ContainsKey(sGraph)) { mapChars.Add(sGraph, sGraphPrimary); } } } } } } m_mapWsDigraphs.Add(sWs, digraphs); m_mapWsMapChars.Add(sWs, mapChars); return(digraphs); }
internal static void DisplayEntry(FdoCache cache, IWin32Window owner, Mediator mediatorIn, IHelpTopicProvider helpProvider, string helpFileKey, ITsString tssWfIn) { ITsString tssWf = tssWfIn; LexEntryUi leui = null; Mediator mediator = null; try { leui = FindEntryForWordform(cache, tssWf); // if we do not find a match for the word then try converting it to lowercase and see if there // is an entry in the lexicon for the Wordform in lowercase. This is needed for occurences of // words which are capitalized at the beginning of sentences. LT-7444 RickM if (leui == null) { //We need to be careful when converting to lowercase therefore use Icu.ToLower() //get the WS of the tsString int wsWf = TsStringUtils.GetWsAtOffset(tssWf, 0); //use that to get the locale for the WS, which is used for string wsLocale = cache.ServiceLocator.WritingSystemManager.Get(wsWf).IcuLocale; string sLower = Icu.ToLower(tssWf.Text, wsLocale); ITsTextProps ttp = tssWf.get_PropertiesAt(0); tssWf = cache.TsStrFactory.MakeStringWithPropsRgch(sLower, sLower.Length, ttp); leui = FindEntryForWordform(cache, tssWf); } // Ensure that we have a valid mediator with the proper string table. bool fRestore; StringTable stOrig; mediator = EnsureValidMediator(mediatorIn, out fRestore, out stOrig); FdoCache cache2 = (FdoCache)mediator.PropertyTable.GetValue("cache"); if (cache2 != cache) { mediator.PropertyTable.SetProperty("cache", cache); } EnsureWindowConfiguration(mediator); IVwStylesheet styleSheet = GetStyleSheet(cache, mediator); if (leui == null) { ILexEntry entry = ShowFindEntryDialog(cache, mediator, tssWf, owner); if (entry == null) { // Restore the original string table in the mediator if needed. if (fRestore) { mediator.StringTbl = stOrig; } return; } leui = new LexEntryUi(entry); } if (mediator != null) { leui.Mediator = mediator; } leui.ShowSummaryDialog(owner, tssWf, helpProvider, helpFileKey, styleSheet); // Restore the original string table in the mediator if needed. if (fRestore) { mediator.StringTbl = stOrig; } } finally { if (leui != null) { leui.Dispose(); } if (mediator != mediatorIn) { mediator.Dispose(); } } }