コード例 #1
0
ファイル: TextMeasure.cs プロジェクト: TI-Russia/smart_parser
 public void TextMeasureApproximatedTest()
 {
     TStringMeasure.InitDefaultFontApproximated("Times New Roman", 10);
     foreach (var i in CasesTimesNewRoman10)
     {
         float width = TStringMeasure.MeasureStringWidth(i.Key);
         Assert.AreEqual(i.Value, (int)width);
     }
 }
コード例 #2
0
 static public List <string> GetLinesWithSoftBreaks(Cell cell)
 {
     if (cell.FontSize == 0)
     {
         return(cell.Text.Split('\n').ToList <string>());
     }
     else
     {
         TStringMeasure.InitDefaultFont(cell.FontName, cell.FontSize);
         return(TStringMeasure.GetLinesBySoftBreaks(cell.Text, cell.CellWidth));
     }
 }
コード例 #3
0
ファイル: TextMeasure.cs プロジェクト: TI-Russia/smart_parser
 public void TimesNewRomanCharWidthWindows()
 {
     if (!TStringMeasure.IsLinux())
     {
         // there is no "Times New Roman" under ubuntu
         TStringMeasure.InitDefaultFontSystem("Times New Roman", 10);
         foreach (var i in CasesTimesNewRoman10)
         {
             float width = TStringMeasure.MeasureStringWidth(i.Key);
             Assert.AreEqual(i.Value, (int)width);
         }
     }
 }
コード例 #4
0
        static public void BuildCharToWidth(string fontName, StreamWriter outputFile)
        {
            TStringMeasure.InitDefaultFontSystem(fontName, 10);
            string varName = fontName.Replace(' ', '_');

            outputFile.WriteLine("        static Dictionary<int, int> {0} = new Dictionary<int, int> {{", varName);
            List <int> widths = new List <int>();

            WriteCharPeriod(LatinStart, LatinEnd, widths, outputFile);
            outputFile.WriteLine(",");
            WriteCharPeriod(CyrillicStart, CyrillicEnd, widths, outputFile);
            outputFile.WriteLine(",\n             {{0, {0}}}", GetMedian(widths.ToArray()));
            outputFile.Write("        };\n");
        }
コード例 #5
0
 static void WriteCharPeriod(int start, int end, List <int> widths, StreamWriter outputFile)
 {
     outputFile.WriteLine("             //chars from {0} to {1}", start, end);
     outputFile.Write("             ");
     for (int i = start; i < end; ++i)
     {
         string ch = "";
         ch += (char)i;
         float width     = TStringMeasure.MeasureStringWidth(ch);
         int   afm_width = (int)(width * 1000.0f / TStringMeasure.FontSize);
         widths.Add(afm_width);
         if (i != start)
         {
             outputFile.Write(",");
         }
         outputFile.Write("{{ {0}, {1} }}", i, afm_width);
     }
 }
コード例 #6
0
        public string[] SplitJoinedLinesByFuzzySeparator(List <int> linesWithNumbers)
        {
            var value = GetText(); // no trim

            string[] lines;

            // Eg: "1. Квартира\n2. Квартира"
            // Eg: "1) Квартира\n2) Квартира"
            if (Regex.Matches(value, @"^\d[\.\)]\s+.+\n\d\.\s", RegexOptions.Singleline).Count > 0)
            {
                lines = (string[])Regex.Split(value, @"\d[\.\)]\s").Skip(1).ToArray();
                return(lines);
            }

            // a weaker regexp but the same count (bullet can contain two digits)
            // Eg: "1. Квартира\n10. Квартира"
            if (Regex.Matches(value, @"^\s*\d[\.\)]\s*.+\n\d[\.\)]\s*", RegexOptions.Singleline).Count > 0)
            {
                lines = (string[])Regex.Split(value, @"\d{1,2}\s*[\.\)]").Skip(1).ToArray();
                if (lines.Length == linesWithNumbers.Count && linesWithNumbers.Count > 0)
                {
                    return(lines);
                }
            }

            // Eg: "- Квартира\n- Квартира"
            if (Regex.Matches(value, @"^\p{Pd}\s+.+\n\p{Pd}\s", RegexOptions.Singleline).Count > 0)
            {
                lines = (string[])Regex.Split(value, @"\n\p{Pd}");
                return(lines);
            }

            // Eg: "... собственность) - Жилой дом ..."
            if (Regex.Matches(value, @"^\p{Pd}.+\)[\s\n]+\p{Pd}\s", RegexOptions.Singleline).Count > 0)
            {
                lines = (string[])Regex.Split(value, @"[\s\n]\p{Pd}\s");
                return(lines);
            }

            // Eg: "Квартира \n(долевая собственность \n\n0,3) \n \n \n \nКвартира \n(индивидуальная собственность) \n"
            var matches = Regex.Matches(value, @"[^\)]+\([^\)]+\)\;?", RegexOptions.Singleline);

            if (matches.Count == linesWithNumbers.Count && linesWithNumbers.Count > 0)
            {
                lines = matches.Select(m => m.Value).ToArray();
                return(lines);
            }

            // Eg: Квартира\n\nКвартира\n\nКвартира
            var value1 = Regex.Replace(value, @"[\s-[\n]]+\n", "\n");
            var tokens = Regex.Split(value1, @"\n\n+", RegexOptions.Singleline);

            if (tokens.Length == linesWithNumbers.Count && linesWithNumbers.Count > 0)
            {
                return(tokens);
            }

            lines = value.Trim(' ', ';').Split(';');
            if (lines.Length == linesWithNumbers.Count)
            {
                return(lines);
            }
            lines = value.Split('\n');
            if (lines.Length == linesWithNumbers.Count)
            {
                return(lines);
            }

            var notEmptyLines = new List <string>();

            foreach (var l in lines)
            {
                if (l.Trim(' ').Length > 0)
                {
                    notEmptyLines.Add(l);
                }
            }
            if (notEmptyLines.Count == linesWithNumbers.Count)
            {
                return(notEmptyLines.ToArray());
            }

            TStringMeasure.InitDefaultFont(FontName, FontSize);
            lines = TStringMeasure.GetLinesBySoftBreaks(value, CellWidth).ToArray();
            var items = new List <String>();

            for (int i = 0; i < linesWithNumbers.Count; i++)
            {
                int start = linesWithNumbers[i];
                int end   = lines.Length;
                if (i + 1 < linesWithNumbers.Count)
                {
                    end = linesWithNumbers[i + 1];
                }

                var item = String.Join("\n", lines.Skip(start).Take(Math.Min(end, lines.Length) - start)).ReplaceEolnWithSpace();
                items.Add(item);
                if (end >= lines.Length)
                {
                    break;
                }
            }
            return(items.ToArray());
        }