} // ParseTag private bool HandleTag(TextReader reader, IRtfTag tag) { if (_level == 0) { throw new RtfStructureException(Strings.TagOnRootLevel(tag.ToString())); } if (_tagCount < 4) { UpdateEncoding(tag); } var tagName = tag.Name; // enable the font name detection in case the last tag was introducing // a theme font var detectFontName = _expectingThemeFont; if (_tagCountAtLastGroupStart == _tagCount) { // first tag in a group switch (tagName) { case RtfSpec.TagThemeFontLoMajor: case RtfSpec.TagThemeFontHiMajor: case RtfSpec.TagThemeFontDbMajor: case RtfSpec.TagThemeFontBiMajor: case RtfSpec.TagThemeFontLoMinor: case RtfSpec.TagThemeFontHiMinor: case RtfSpec.TagThemeFontDbMinor: case RtfSpec.TagThemeFontBiMinor: // these introduce a new font, but the actual font tag will be // the second tag in the group, so we must remember this condition ... _expectingThemeFont = true; break; } // always enable the font name detection also for the first tag in a group detectFontName = true; } if (detectFontName) { switch (tagName) { case RtfSpec.TagFont: if (_fontTableStartLevel > 0) { // in the font-table definition: _targetFont = tag.FullName; _expectingThemeFont = false; // reset that state now } break; case RtfSpec.TagFontTable: _fontTableStartLevel = _level; break; } } if (_targetFont != null) { if (RtfSpec.TagFontCharset.Equals(tagName)) { var charSet = tag.ValueAsNumber; var codePage = RtfSpec.GetCodePage(charSet); _fontToCodePageMapping[_targetFont] = codePage; UpdateEncoding(codePage); } } if (_fontToCodePageMapping.Count > 0 && RtfSpec.TagFont.Equals(tagName)) { var codePage = (int?)_fontToCodePageMapping[tag.FullName]; if (codePage != null) { UpdateEncoding(codePage.Value); } } var skippedContent = false; switch (tagName) { case RtfSpec.TagUnicodeCode: var unicodeValue = tag.ValueAsNumber; var unicodeChar = (char)unicodeValue; _curText.Append(unicodeChar); // skip over the indicated number of 'alternative representation' text for (var i = 0; i < _unicodeSkipCount; i++) { var nextChar = PeekNextChar(reader, true); switch (nextChar) { case ' ': case '\r': case '\n': reader.Read(); // consume peeked char skippedContent = true; if (i == 0) { i--; } break; case '\\': reader.Read(); // consume peeked char skippedContent = true; var secondChar = ReadOneByte(reader); // mandatory switch (secondChar) { case '\'': // ok, this is a hex-encoded 'byte' -> need to consume both // hex digits too ReadOneByte(reader); // high nibble ReadOneByte(reader); // low nibble break; } break; case '{': case '}': // don't consume peeked char and abort skipping i = _unicodeSkipCount; break; default: reader.Read(); // consume peeked char skippedContent = true; break; } } break; case RtfSpec.TagUnicodeSkipCount: var newSkipCount = tag.ValueAsNumber; if (newSkipCount < 0 || newSkipCount > 10) { throw new RtfUnicodeEncodingException(Strings.InvalidUnicodeSkipCount(tag.ToString())); } _unicodeSkipCount = newSkipCount; break; default: FlushText(); NotifyTagFound(tag); break; } _tagCount++; return(skippedContent); } // HandleTag
} // ParseTag // ---------------------------------------------------------------------- private bool HandleTag(TextReader reader, IRtfTag tag) { if (level == 0) { throw new RtfStructureException(Strings.TagOnRootLevel(tag.ToString())); } if (tagCount < 4) { // this only handles the initial encoding tag in the header section UpdateEncoding(tag); } string tagName = tag.Name; // enable the font name detection in case the last tag was introducing // a theme font bool detectFontName = expectingThemeFont; if (tagCountAtLastGroupStart == tagCount) { // first tag in a group switch (tagName) { case RtfSpec.TagThemeFontLoMajor: case RtfSpec.TagThemeFontHiMajor: case RtfSpec.TagThemeFontDbMajor: case RtfSpec.TagThemeFontBiMajor: case RtfSpec.TagThemeFontLoMinor: case RtfSpec.TagThemeFontHiMinor: case RtfSpec.TagThemeFontDbMinor: case RtfSpec.TagThemeFontBiMinor: // these introduce a new font, but the actual font tag will be // the second tag in the group, so we must remember this condition ... expectingThemeFont = true; break; } // always enable the font name detection also for the first tag in a group detectFontName = true; } if (detectFontName) { // first tag in a group: switch (tagName) { case RtfSpec.TagFont: if (fontTableStartLevel > 0) { // in the font-table definition: // -> remember the target font for charset mapping targetFont = tag.FullName; expectingThemeFont = false; // reset that state now } break; case RtfSpec.TagFontTable: // -> remember we're in the font-table definition fontTableStartLevel = level; break; } } if (targetFont != null) { // within a font-tables font definition: perform charset mapping if (RtfSpec.TagFontCharset.Equals(tagName)) { int charSet = tag.ValueAsNumber; int codePage = RtfSpec.GetCodePage(charSet); fontToCodePageMapping[targetFont] = codePage; UpdateEncoding(codePage); } } if (fontToCodePageMapping.Count > 0 && RtfSpec.TagFont.Equals(tagName)) { int codePage; if (fontToCodePageMapping.TryGetValue(tag.FullName, out codePage)) { UpdateEncoding(codePage); } } bool skippedContent = false; switch (tagName) { case RtfSpec.TagUnicodeCode: int unicodeValue = tag.ValueAsNumber; char unicodeChar = (char)unicodeValue; curText.Append(unicodeChar); // skip over the indicated number of 'alternative representation' text for (int i = 0; i < unicodeSkipCount; i++) { int nextChar = PeekNextChar(reader, true); switch (nextChar) { case ' ': case '\r': case '\n': reader.Read(); // consume peeked char skippedContent = true; if (i == 0) { // the first whitespace after the tag // -> only a delimiter, doesn't count for skipping ... i--; } break; case '\\': reader.Read(); // consume peeked char skippedContent = true; int secondChar = ReadOneByte(reader); // mandatory switch (secondChar) { case '\'': // ok, this is a hex-encoded 'byte' -> need to consume both // hex digits too ReadOneByte(reader); // high nibble ReadOneByte(reader); // low nibble break; } break; case '{': case '}': // don't consume peeked char and abort skipping i = unicodeSkipCount; break; default: reader.Read(); // consume peeked char skippedContent = true; break; } } break; case RtfSpec.TagUnicodeSkipCount: int newSkipCount = tag.ValueAsNumber; if (newSkipCount < 0 || newSkipCount > 10) { throw new RtfUnicodeEncodingException(Strings.InvalidUnicodeSkipCount(tag.ToString())); } unicodeSkipCount = newSkipCount; break; default: FlushText(); NotifyTagFound(tag); break; } tagCount++; return(skippedContent); } // HandleTag
public static async Task <string> ParseRtfText(string rawText = "") { string htmlText = string.Empty; string test = string.IsNullOrEmpty(rawText) ? await ConverterHelper.GetFileFromAppAsync().ConfigureAwait(false) : rawText; char[] textCharArr = test.Replace("\r", "").Replace("\n", "").ToCharArray(); int startPoint = 0; var attributeList = new List <string>(); string attribute = ""; for (var i = 0; i < textCharArr.Length; i++) { var item = textCharArr[i]; switch (item) { case '{': case '}': case '\r': case '\n': break; //case '\\': case ' ': // Attribute가 아무것도 없으면 쓸모없는 공간 if (!string.IsNullOrEmpty(attribute)) { if (attribute[0].Equals('\\')) { // RTF Format과 일반 단어 사이에 위치한 공백 attributeList.Add(attribute); attribute = string.Empty; } else { // 일반 단어 사이에 위치한 공백 if (!string.IsNullOrWhiteSpace(attribute)) { attribute += item; } } } break; default: if (!string.IsNullOrEmpty(attribute)) { // 나중에 해당 Attribute를 해당 Attribute의 단어가 RTF Format 예약어로 사용중인지 // 검사해서 맞으면 value가 아닌 것으로 체크하는 로직 필요 // item항목 전의 앞의 단어는 사용자 입력 문구 if (item == '\\' && !attribute.Contains("\\")) { // 일반 단어 attribute = attribute.TrimStart().TrimEnd(); attribute = attribute.Insert(0, "<itisdesignemval>"); attribute = attribute.Insert(attribute.Length, "</itisdesignemval>"); attributeList.Add(attribute); attribute = string.Empty; } else if (item == '\\') { attributeList.Add(attribute); attribute = string.Empty; } else { var reservedWorld = attribute.Replace("\\", ""); // 예약어 이면 if (!string.IsNullOrEmpty(RtfSpec.GetHtmlFromRtfCode(reservedWorld))) { // \b \b0 같은 쌍으로 묶인 항목 if (item != '0') { attributeList.Add(attribute); attribute = string.Empty; } // none로 끝나는 항목 //if(item != 'n' && item != 'o' && item != 'e') { } } } } // 다시 시작 attribute += item; attribute = attribute.TrimStart().TrimEnd(); break; } } htmlText += DefaultHeader; htmlText += "<html>"; htmlText += "<head>"; htmlText += "<title>RTF to Html Test page</title>"; htmlText += "</head>"; htmlText += "<body>"; // 시작점 찾기 for (int i = 0; i < attributeList.Count; i++) { //if (Regex.IsMatch(attributeList[i], @"\\lang[0-9*]")) // startPoint = i; if (Regex.IsMatch(attributeList[i], @"\\viewkind[0-9*]")) { startPoint = i; } } // 본문 쓰기 for (int i = startPoint; i < attributeList.Count; i++) { var item = attributeList[i]; if (item.Contains("<itisdesignemval>")) // 값 { var beforeItem = attributeList[i - 1].Replace("\\", ""); var valueText = item.Replace("<itisdesignemval>", "").Replace("</itisdesignemval>", ""); //htmlText += RtfSpec.GetHtmlFromRtfCode(beforeItem, valueText); htmlText += valueText; } else { item = item.Replace("\\", ""); htmlText += RtfSpec.GetHtmlFromRtfCode(item); } } htmlText += "</body>"; htmlText += "</html>"; return(htmlText); }