public ICollection <IPersonReferrent> Persons(string text) { var persons = new List <PersonReferrent>(); using (var proc = ProcessorService.CreateSpecificProcessor( string.Join(",", new List <string>() { UriAnalyzer.ANALYZER_NAME, PhoneAnalyzer.ANALYZER_NAME, BankAnalyzer.ANALYZER_NAME, GeoAnalyzer.ANALYZER_NAME, AddressAnalyzer.ANALYZER_NAME, OrganizationAnalyzer.ANALYZER_NAME, PersonAnalyzer.ANALYZER_NAME, NamedEntityAnalyzer.ANALYZER_NAME }))) { // анализируем текст var analysisResult = proc.Process(new SourceOfAnalysis(CleanText(text))); persons.AddRange( analysisResult.Entities.OfType <PersonReferent>() .Select(personReferent => new PersonReferrent(personReferent))); } UpdateByStat(persons); return(persons.Cast <IPersonReferrent>().ToList()); }
public ICollection <IMailReferent> CommonHeaders(string text) { using (var proc = ProcessorService.CreateSpecificProcessor(MailAnalyzer.ANALYZER_NAME)) { // анализируем текст var result = proc.Process(new SourceOfAnalysis(text)); //достанем только почтовые блоки var emailBlocks = result.Entities.OfType <MailReferent>(); //достанем блок с телом return(emailBlocks .Where(c => c.Kind == MailKind.Body) .Select(c => (IMailReferent) new Models.MailReferent(c)) .ToList()); } }
private List <Entity> CreateGeoEntities(List <Entity> entities, string text) { ProcessorService.Initialize(MorphLang.RU | MorphLang.EN); GeoAnalyzer.Initialize(); using (Processor proc = ProcessorService.CreateSpecificProcessor(GeoAnalyzer.ANALYZER_NAME)) { AnalysisResult ar = proc.Process(new SourceOfAnalysis(text)); foreach (var geo in ar.Entities) { if (geo is GeoReferent) { var geoEntitiName = geo.ToString(); string entityproperties = null; var props = geo.Slots; string newProperties = null; foreach (var prop in props) { var name = prop.TypeName.ToString(); var value = prop.Value.ToString(); newProperties += name += " = " + value + ";"; } var existingEntity = entities.Find(m => m.Value.Equals(geoEntitiName)); if (existingEntity != null) { entityproperties = existingEntity.Properties; if (entityproperties != newProperties) { newProperties += entityproperties; } entities.Remove(existingEntity); } entities.Add(new Entity(geoEntitiName, newProperties, EntitiesType.geo)); } } return(entities); } }
static void Main(string[] args) { Stopwatch sw = Stopwatch.StartNew(); // инициализация - необходимо проводить один раз до обработки текстов Console.Write("Initializing ... "); ProcessorService.Initialize(MorphLang.RU | MorphLang.EN); // инициализируются все используемые анализаторы MyAnalyzer.Initialize(); Console.WriteLine("OK (by {0} ms), version {1}", (int)sw.ElapsedMilliseconds, ProcessorService.Version); AnalysisResult ar = null; if (args.Length > 0) { if (args[0] == "csv") { ClientApiSettings settings = new ClientApiSettings(); foreach (var file in Directory.GetFiles("Texts", "*.csv")) { using (var sr = new StreamReader(file)) { var i = 1; var line = sr.ReadLine(); while (line != null) { var data = line.Split(';', ' ', ';'); if (data.Length < 3) { Console.WriteLine("Ошибка формата csv. \r\n Формат\r\n Name;CaseId;DocumentId"); } GlobalState.File = i + "_" + MakeValidFileName(data[0]) + ".txt"; var client = new PravoRu.DataLake.Arbitr.CaseCard.Api.Client.v1.FileClient(settings, new HttpClientFactory()); DocumentPlainText response = null; try { response = client.GetDocumentTextAsync(new DocumentFileRequest() { CaseId = Guid.Parse(data[1]), IsBase64 = false, DocumentId = Guid.Parse(data[2]) }).GetAwaiter().GetResult(); } catch (Exception e) { Console.WriteLine(data[0] + "\t" + e.Message); } if (response == null) { line = sr.ReadLine(); continue; } File.WriteAllText(Path.Combine("Results", "Original_" + GlobalState.File), response.HtmlText); // создаём экземпляр обычного процессора using (Processor proc = ProcessorService.CreateSpecificProcessor(nameof(MyAnalyzer))) { // анализируем текст ar = proc.Process(new SourceOfAnalysis(response.HtmlText)); try { PostExecute(ar); } catch (Exception e) { Console.WriteLine(e); } } Console.WriteLine("Обработан файл " + GlobalState.File); line = sr.ReadLine(); i++; } } } } if (args[0] == "txt") { foreach (var file in Directory.GetFiles("Texts", "*.txt")) { Console.WriteLine($"{file}------------------------------------"); string txt = File.ReadAllText(file); GlobalState.File = new FileInfo(file).Name; // создаём экземпляр обычного процессора using (Processor proc = ProcessorService.CreateSpecificProcessor(nameof(MyAnalyzer))) { // анализируем текст ar = proc.Process(new SourceOfAnalysis(txt)); try { PostExecute(ar); } catch (Exception e) { Console.WriteLine(e); } } } } } sw.Stop(); Console.WriteLine("Over!(by {0} ms), version {1}", (int)sw.ElapsedMilliseconds, ProcessorService.Version); }