Пример #1
0
        public IEnumerable<MedStandardInfo> Find(string keywords, IProgress<string> progress)
        {
            using (var directory = GetDirectory())
            using (var searcher = new IndexSearcher(directory))
            {
                

                var query = GetQuery(keywords);
                var sort = GetSort();

                var docs = searcher.Search(query, null, 1000, sort);

                var result = new List<MedStandardInfo>();
                foreach (var scoreDoc in docs.ScoreDocs)
                {
                    var doc = searcher.Doc(scoreDoc.Doc);
                    var product = new MedStandardInfo()
                    {
                        StandardName = doc.Get("StandardName"),
                        FileName = doc.Get("FileName"),
                        Mkb = doc.Get("Mkb"),
                        OrderNum = doc.Get("OrderNum"),
                    };
                    result.Add(product);
                }

                return result;
            }
        }
Пример #2
0
        public async Task<MedStandardInfo> ReadPdfAsync(string fileName, CancellationToken cancellationToken, IProgress<string> progress = null)
        {
            if (File.Exists(fileName))
            {
                progress = progress ?? new Progress<string>();
                progress.Report($@"Чтение текста файла {System.IO.Path.GetFileName(fileName)} ...");
              
                var text = new StringBuilder();

                await Task.Run(() =>{
                    using (PdfReader reader = new PdfReader(fileName))
                        for (int i = 1; i <= reader.NumberOfPages; i++)
                            text.Append(PdfTextExtractor.GetTextFromPage(reader, i));
                    }, cancellationToken);

                var result = new MedStandardInfo();
                result.FileName = System.IO.Path.GetFileName(fileName);
                result.Text = text.ToString();
                var lines = result.Text.ToLower().Split(new [] { '\n'}).Select(x => x.Trim()).ToList();

                var regNum = new Regex(@"№\s*\d+\s*(н)?");

                var ordNumLineIndex = lines.FindIndex(x => regNum.IsMatch(x));
                if (ordNumLineIndex >= 0)
                {
                    var num = regNum.Match(lines[ordNumLineIndex]).Value;
                    result.OrderNum = num.Replace(@"№", "").Trim();
                }

                var lineIndex = lines.FindIndex(x => x.Contains(@"зарегистрировано"));
                if (lineIndex >= 0)
                {
                    lineIndex = lines.FindIndex(lineIndex, x => x.Contains(@"стандарт"));
                    if (lineIndex >= 0)
                    {
                        do
                        {
                            result.StandardName += " " + lines[lineIndex++];
                        } while (!lines[lineIndex].Contains(":") && !lines[lineIndex].Contains("мероприят"));
                        result.StandardName = result.StandardName.Trim().Replace("  ", " ");

                        lineIndex = lines.FindIndex(0, x => x.Contains("код по мкб") || x.Contains("нозолог"));
                        if (lineIndex >= 0)
                        {
                            var reg = new Regex(@"[a-z,а-я]\d{1,2}(\.\d{1,2})?");
                            for (int i = lineIndex; i < lines.Count; i++)
                            {
                                foreach (var match in reg.Matches(lines[i]).OfType<Match>())
                                {
                                    result.Mkb += " " + match.Value;
                                }
                                if(lines[i].Contains("мероприят") || lines[i].Contains("услуги"))
                                    break;
                            }
                            

                            if (!string.IsNullOrEmpty(result.Mkb))
                            {
                                result.Mkb = Translit(result.Mkb.Trim().Replace("  ", " ").ToUpper());
                                result.Text = string.Join(" ", lines.GetRange(0, lineIndex));
                            }
                        }
                        else
                            progress.Report(@"Не найдена секция диагнозов");
                    }
                    else
                    {
                        progress.Report(@"Не найдено слово СТАНДАРТ");
                    }
                }

                return result;
                
            }
            else
                throw new FileNotFoundException($@"Файл {fileName} не найден", fileName);
        }
Пример #3
0
 private Document MapMedStandard(MedStandardInfo medStandard)
 {
     var document = new Document();
     document.Add(new Field("StandardName", medStandard.StandardName, Field.Store.YES, Field.Index.ANALYZED));
     document.Add(new Field("Text", medStandard.Text, Field.Store.YES, Field.Index.ANALYZED));
     document.Add(new Field("Mkb", medStandard.Mkb, Field.Store.YES, Field.Index.ANALYZED));
     document.Add(new Field("OrderNum", medStandard.OrderNum, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
     document.Add(new Field("FileName", medStandard.FileName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
     return document;
 }