/// <summary> /// Get the lexical chunk the specified current LSCP is within. /// </summary> private LexicalChunk GetChunk( TextLexicalService lexicalService, int lscpCurrent, int lscchLim, bool isCurrentAtWordStart, out int lscpChunk, out int lscchChunk ) { int lscpStart = lscpCurrent; int lscpLim = lscpStart + lscchLim; int cpFirst = _store.CpFirst; if (lscpStart > lscpLim) { // Start is always before limit lscpStart = lscpLim; lscpLim = lscpCurrent; } LexicalChunk chunk = new LexicalChunk(); int cchWordMax; CultureInfo textCulture; SpanVector <int> textVector; char[] rawText = _store.CollectRawWord( lscpStart, isCurrentAtWordStart, _isSideways, out lscpChunk, out lscchChunk, out textCulture, out cchWordMax, out textVector ); if (rawText != null && cchWordMax >= MinCchWordToHyphenate && lscpLim < lscpChunk + lscchChunk && textCulture != null && lexicalService != null && lexicalService.IsCultureSupported(textCulture) ) { // analyze the chunk and produce the lexical chunk to cache TextLexicalBreaks breaks = lexicalService.AnalyzeText( rawText, rawText.Length, textCulture ); if (breaks != null) { chunk = new LexicalChunk(breaks, textVector); } } return(chunk); }
/// <summary> /// Find the hyphen break following or preceding the specified current LSCP /// </summary> /// <remarks> /// This method never checks whether the specified current LSCP is already right /// at a hyphen break. It either finds the next or the previous break regardless. /// /// A negative lscchLim param value indicates the caller finds the hyphen immediately /// before the specified character index. /// </remarks> /// <param name="lscpCurrent">the current LSCP</param> /// <param name="lscchLim">the number of LSCP to search for break</param> /// <param name="isCurrentAtWordStart">flag indicates whether lscpCurrent is the beginning of the word to hyphenate</param> /// <param name="lscpHyphen">LSCP of the hyphen</param> /// <param name="lshyph">Hyphen properties</param> internal bool FindNextHyphenBreak( int lscpCurrent, int lscchLim, bool isCurrentAtWordStart, ref int lscpHyphen, ref LsHyph lshyph ) { lshyph = new LsHyph(); // no additional hyphen properties for now if (_store.Pap.Hyphenator != null) { int lscpChunk; int lscchChunk; LexicalChunk chunk = GetChunk( _store.Pap.Hyphenator, lscpCurrent, lscchLim, isCurrentAtWordStart, out lscpChunk, out lscchChunk ); _lscpHyphenationLookAhead = lscpChunk + lscchChunk; if (!chunk.IsNoBreak) { int ichCurrent = chunk.LSCPToCharacterIndex(lscpCurrent - lscpChunk); int ichLim = chunk.LSCPToCharacterIndex(lscpCurrent + lscchLim - lscpChunk); if (lscchLim >= 0) { int ichNext = chunk.Breaks.GetNextBreak(ichCurrent); if (ichNext >= 0 && ichNext > ichCurrent && ichNext <= ichLim) { // -1 because ichNext is the character index where break occurs in front of it, // while LSCP is the position where break occurs after it. lscpHyphen = chunk.CharacterIndexToLSCP(ichNext - 1) + lscpChunk; return(true); } } else { int ichPrev = chunk.Breaks.GetPreviousBreak(ichCurrent); if (ichPrev >= 0 && ichPrev <= ichCurrent && ichPrev > ichLim) { // -1 because ichPrev is the character index where break occurs in front of it, // while LSCP is the position where break occurs after it. lscpHyphen = chunk.CharacterIndexToLSCP(ichPrev - 1) + lscpChunk; return(true); } } } } return(false); }
/// <summary> /// Get the lexical chunk the specified current LSCP is within. /// </summary> private LexicalChunk GetChunk( TextLexicalService lexicalService, int lscpCurrent, int lscchLim, bool isCurrentAtWordStart, out int lscpChunk, out int lscchChunk ) { int lscpStart = lscpCurrent; int lscpLim = lscpStart + lscchLim; int cpFirst = _store.CpFirst; if (lscpStart > lscpLim) { // Start is always before limit lscpStart = lscpLim; lscpLim = lscpCurrent; } LexicalChunk chunk = new LexicalChunk(); int cchWordMax; CultureInfo textCulture; SpanVector<int> textVector; char[] rawText = _store.CollectRawWord( lscpStart, isCurrentAtWordStart, _isSideways, out lscpChunk, out lscchChunk, out textCulture, out cchWordMax, out textVector ); if ( rawText != null && cchWordMax >= MinCchWordToHyphenate && lscpLim < lscpChunk + lscchChunk && textCulture != null && lexicalService != null && lexicalService.IsCultureSupported(textCulture) ) { // analyze the chunk and produce the lexical chunk to cache TextLexicalBreaks breaks = lexicalService.AnalyzeText( rawText, rawText.Length, textCulture ); if (breaks != null) { chunk = new LexicalChunk(breaks, textVector); } } return chunk; }