private static void InitializeEP() { // инициализация - необходимо проводить один раз до обработки текстов Logger.Info("Initializing EP... "); ProcessorService.Initialize(MorphLang.RU | MorphLang.EN); // инициализируются все используемые анализаторы EP.Ner.Money.MoneyAnalyzer.Initialize(); EP.Ner.Uri.UriAnalyzer.Initialize(); EP.Ner.Phone.PhoneAnalyzer.Initialize(); EP.Ner.Definition.DefinitionAnalyzer.Initialize(); EP.Ner.Date.DateAnalyzer.Initialize(); EP.Ner.Bank.BankAnalyzer.Initialize(); EP.Ner.Geo.GeoAnalyzer.Initialize(); EP.Ner.Address.AddressAnalyzer.Initialize(); EP.Ner.Org.OrganizationAnalyzer.Initialize(); EP.Ner.Person.PersonAnalyzer.Initialize(); EP.Ner.Mail.MailAnalyzer.Initialize(); EP.Ner.Transport.TransportAnalyzer.Initialize(); EP.Ner.Decree.DecreeAnalyzer.Initialize(); EP.Ner.Titlepage.TitlePageAnalyzer.Initialize(); EP.Ner.Booklink.BookLinkAnalyzer.Initialize(); EP.Ner.Named.NamedEntityAnalyzer.Initialize(); EP.Ner.Goods.GoodsAnalyzer.Initialize(); }
public ICollection <IPersonReferrent> Persons(string text) { var persons = new List <PersonReferrent>(); using (var proc = ProcessorService.CreateSpecificProcessor( string.Join(",", new List <string>() { UriAnalyzer.ANALYZER_NAME, PhoneAnalyzer.ANALYZER_NAME, BankAnalyzer.ANALYZER_NAME, GeoAnalyzer.ANALYZER_NAME, AddressAnalyzer.ANALYZER_NAME, OrganizationAnalyzer.ANALYZER_NAME, PersonAnalyzer.ANALYZER_NAME, NamedEntityAnalyzer.ANALYZER_NAME }))) { // анализируем текст var analysisResult = proc.Process(new SourceOfAnalysis(CleanText(text))); persons.AddRange( analysisResult.Entities.OfType <PersonReferent>() .Select(personReferent => new PersonReferrent(personReferent))); } UpdateByStat(persons); return(persons.Cast <IPersonReferrent>().ToList()); }
public void GetByIdTest() { this._context = this.InitContext().Result; ProcessorService ps = new ProcessorService(this._context); Processor p1 = new Processor() { id = "0", name = "test_processor1", version = "0.0.1", enabled = true, description = "test processor1", file_type = ".fake", process_found = 1 }; this._context.Processors.AddAsync(p1); this._context.SaveChangesAsync(); var result = ps.GetById(p1.name).Result; Assert.NotNull(result); var badResult = ps.GetById("fakeID").Result; Assert.Null(badResult); }
public void GetAllTest(int expected) { this._context = this.InitContext().Result; ProcessorService ps = new ProcessorService(this._context); Processor p1 = new Processor() { id = "0", name = "test_processor1", version = "0.0.1", enabled = true, description = "test processor1", file_type = ".fake", process_found = 1 }; Processor p2 = new Processor() { id = "1", name = "test_processor2", version = "0.0.1", enabled = true, description = "test processor2", file_type = ".fake", process_found = 1 }; this._context.Processors.AddAsync(p1); this._context.Processors.AddAsync(p2); this._context.SaveChangesAsync(); var results = ps.GetAll().Result.ToList(); Assert.Equal(expected, results.Count); }
public UrlImportMap(Database db, string ConnectionString, Item importItem, ILogger l) : base(db, ConnectionString, importItem, l) { ImportItem = importItem; ProcessorService = new ProcessorService(l); HtmlService = new HtmlService(l); }
private void InitializeInformations() { PleaseWaitWindow pleaseWaitWindow = new PleaseWaitWindow(); pleaseWaitWindow.Show(); SystemService systemService = new SystemService(); systemService.InsertInformationsToListView(ref systemView); ProcessorService processorService = new ProcessorService(); processorService.InsertInformationsToListView(ref processorView); MotherboardService motherboardService = new MotherboardService(); motherboardService.InsertInformationsToListView(ref motherboardView); MemoryService memoryService = new MemoryService(); memoryService.InsertInformationsToListView(ref memoryView); VideoAdapterService videoAdapterService = new VideoAdapterService(); videoAdapterService.InsertInformationsToListView(ref videoAdapterView); AudioService audioService = new AudioService(); audioService.InsertInformationsToListView(ref audioView); DrivesService drivesService = new DrivesService(); drivesService.InsertInformationsToListView(ref drivesView); TemperatureService temperatureService = new TemperatureService(); temperatureService.InsertInformationsToListView(ref temperatureView); pleaseWaitWindow.Close(); }
public virtual void PostProcessItem(object importRow, Item newItem) { foreach (var procItem in PostProcessors) { ProcessorService.ExecuteProcessor(this, procItem, importRow, newItem); } }
public MyAnalyzer() : base() { _organizationProcessor = ProcessorService.CreateEmptyProcessor(); _organizationProcessor.AddAnalyzer(new OrganizationAnalyzer()); _uriProcessor = ProcessorService.CreateEmptyProcessor(); _uriProcessor.AddAnalyzer(new UriAnalyzer()); _dateProcessor = ProcessorService.CreateEmptyProcessor(); _dateProcessor.AddAnalyzer(new DateAnalyzer()); }
public BaseDataMap(Database db, string connectionString, Item importItem, ILogger l) { if (l == null) { throw new Exception("The provided Logger is null"); } //instantiate log Logger = l; //setup import details ToDB = db; DatabaseConnectionString = connectionString; ImportItem = importItem; //determine the item name max length ItemNameMaxLength = GetNameLength(); //get query Query = ImportItem.GetItemField("Query", Logger); //get parent item ImportToWhere = GetImportToWhereItem(); //get new item template ImportToWhatTemplate = GetImportToTemplate(); //get item name field ItemNameFields = ImportItem.GetItemField("Pull Item Name from What Fields", Logger).Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries); //get import language ImportToLanguage = GetImportItemLanguage("Import To Language", ToDB); //foldering information FolderByDate = ImportItem.GetItemBool("Folder By Date"); FolderByName = ImportItem.GetItemBool("Folder By Name"); FolderByPath = ImportItem.GetItemBool("Folder By Path"); DateField = ImportItem.GetItemField("Date Field", Logger); FolderTemplate = GetImportFolderTemplate(); //populate field definitions FieldDefinitions = GetFieldDefinitions(ImportItem); PreProcessors = GetProcessors("Item Pre Processors"); PostProcessors = GetProcessors("Item Post Processors"); StringService = new StringService(); ProcessorService = new ProcessorService(l); FieldService = new FieldService(l); }
public HardwareSystemInformationController( MotherBoardService motherBoardService, DiskDriveService diskDriveService, PhysicalMemoryService physicalMemoryService, ProcessorService processorService, VideoCardService videoCardService ) { _motherBoardService = motherBoardService; _diskDriveService = diskDriveService; _physicalMemoryService = physicalMemoryService; _processorService = processorService; _videoCardService = videoCardService; }
public static void Init(string[] langs, string[] analyzers) { Log.Info("Init Pullenti v{0} ...", ProcessorService.Version); foreach (string lang in langs) { Log.Info("Load lang: {0}", lang); MorphologyService.LoadLanguages(LANGS[lang]); } ProcessorService.Initialize(MorphologyService.LoadedLanguages); foreach (string analyzer in analyzers) { Log.Info("Load analyzer: {0}", analyzer); ANALYZERS[analyzer](); } }
public ICollection <IMailReferent> CommonHeaders(string text) { using (var proc = ProcessorService.CreateSpecificProcessor(MailAnalyzer.ANALYZER_NAME)) { // анализируем текст var result = proc.Process(new SourceOfAnalysis(text)); //достанем только почтовые блоки var emailBlocks = result.Entities.OfType <MailReferent>(); //достанем блок с телом return(emailBlocks .Where(c => c.Kind == MailKind.Body) .Select(c => (IMailReferent) new Models.MailReferent(c)) .ToList()); } }
protected override void Load(ContainerBuilder builder) { base.Load(builder); ProcessorService.Initialize(MorphLang.RU | MorphLang.EN); // инициализируются все используемые анализаторы EP.Ner.Uri.UriAnalyzer.Initialize(); EP.Ner.Phone.PhoneAnalyzer.Initialize(); EP.Ner.Bank.BankAnalyzer.Initialize(); EP.Ner.Geo.GeoAnalyzer.Initialize(); EP.Ner.Address.AddressAnalyzer.Initialize(); EP.Ner.Org.OrganizationAnalyzer.Initialize(); EP.Ner.Person.PersonAnalyzer.Initialize(); EP.Ner.Mail.MailAnalyzer.Initialize(); EP.Ner.Named.NamedEntityAnalyzer.Initialize(); }
public static AnalysisResult Process(string text) { Stopwatch timer = new Stopwatch(); timer.Start(); Processor processor = ProcessorService.CreateProcessor(); // cached var result = processor.Process(new SourceOfAnalysis(text)); timer.Stop(); TimeSpan span = timer.Elapsed; string time = String.Format("{0}.{1:000}", span.Seconds, span.Milliseconds); int size = text.Length; int referents = result.Entities.Count; Log.Info("Process: {0} chars, {1}s, {2} refs", size, time, referents); return(result); }
private string GetPersonName() { var paragraphs = Document.Descendants <Paragraph>().ToList(); string txt = FindTitleAboveTheTable(); // создаём экземпляр обычного процессора using (Processor proc = ProcessorService.CreateProcessor()) { // анализируем текст AnalysisResult ar = proc.Process(new SourceOfAnalysis(txt)); var nameEntity = ar.Entities.ToList().Find(x => x.TypeName == "PERSON"); if (nameEntity != default) { var t = nameEntity.ToString().Split(" "); return($"{t[2]} {t[0]} {t[1]}"); } } return(default);
public void UpdateListTest() { this._context = this.InitContext().Result; ProcessorService ps = new ProcessorService(this._context); Processor p1 = new Processor() { id = "0", name = "test_processor1", version = "0.0.1", enabled = true, description = "test processor1", file_type = ".fake", process_found = 1 }; Processor p2 = new Processor() { id = "1", name = "test_processor2", version = "0.0.1", enabled = true, description = "test processor2", file_type = ".fake", process_found = 1 }; this._context.Processors.AddAsync(p1); this._context.Processors.AddAsync(p2); this._context.SaveChangesAsync(); p1.file_type = ".very_fake1"; p2.file_type = ".very_fake2"; Processor[] pList = new Processor[] { p1, p2 }; var t = ps.Update(pList); var result1 = this._context.Processors.SingleAsync(p => p.id == p1.id).Result; Assert.Equal(p1.file_type, result1.file_type); var result2 = this._context.Processors.SingleAsync(p => p.id == p2.id).Result; Assert.Equal(p2.file_type, result2.file_type); }
public MultiplierManagerTest() : base() { MockLoggingService = new Mock <ILogger <ProcessorService> >(); MockGeneratorManager = new Mock <IGeneratorManager>(); MockMultiplierManager = new Mock <IMultiplierManager>(); MockBatchRepository = new Mock <IBatchRepository>(); MockBatchContext = new Mock <BatchContext>(); ProcessorServiceUnderTest = new ProcessorService(MockGeneratorManager.Object, MockMultiplierManager.Object, MockBatchRepository.Object, MockLoggingService.Object); MockSet = new Mock <DbSet <Batch> >(); var queryable = BatchItems.AsQueryable(); MockSet.As <IQueryable <Batch> >().Setup(m => m.Provider).Returns(queryable.Provider); MockSet.As <IQueryable <Batch> >().Setup(m => m.Expression).Returns(queryable.Expression); MockSet.As <IQueryable <Batch> >().Setup(m => m.ElementType).Returns(queryable.ElementType); MockSet.As <IQueryable <Batch> >().Setup(m => m.GetEnumerator()).Returns(queryable.GetEnumerator); }
public MessageFlow(MessageFlowConfiguration configuration, ProcessorService processor) { Configuration = configuration; Processor = processor; #region Create nodes nodesByName = new Dictionary<string, Node>(); foreach (NodeConfiguration nodeCfg in configuration.Nodes) { Node node; if (nodeCfg is CbrNodeConfiguration) { node = new CbrNode(); } else if (nodeCfg is ActionNodeConfiguration) { node = new ActionNode(); } else if (nodeCfg is TerminatorNodeConfiguration) { node = new TerminatorNode(); } else if (nodeCfg is EntryNodeConfiguration) { node = new EntryNode(); } else { throw new InvalidOperationException(string.Format( "Cannot create a node named '{0}' of unknown node type.", nodeCfg.Name)); } node.Initialize(nodeCfg, Processor); nodesByName.Add(node.Name, node); } #endregion entryNode = (EntryNode)nodesByName[configuration.GetEntryNode().Name]; }
/* * private static void FillLists(List<Referent> entities) * { * foreach(var e in entities) * { * listNamedEntities.Add(new NamedEntity(e, e.Slots * .Select(t => new NamedEntityElement(t.TypeName, t.Value.ToString())).ToArray())); * } * } */ private static Processor InitializeProcessor() { Sdk.Initialize(MorphLang.RU); var proc = ProcessorService.CreateProcessor(); // баганутый proc.AddAnalyzer(new Pullenti.Ner.Measure.MeasureAnalyzer()); return(proc); /* * Для пустого процессора * * processor.AddAnalyzer(new Pullenti.Ner.Address.AddressAnalyzer()); * processor.AddAnalyzer(new Pullenti.Ner.Date.DateAnalyzer()); * processor.AddAnalyzer(new Pullenti.Ner.Money.MoneyAnalyzer()); * processor.AddAnalyzer(new Pullenti.Ner.Named.NamedEntityAnalyzer()); * processor.AddAnalyzer(new Pullenti.Ner.Person.PersonAnalyzer()); * processor.AddAnalyzer(new Pullenti.Ner.Phone.PhoneAnalyzer()); * processor.AddAnalyzer(new Pullenti.Ner.Measure.MeasureAnalyzer()); */ }
private List <Entity> CreateGeoEntities(List <Entity> entities, string text) { ProcessorService.Initialize(MorphLang.RU | MorphLang.EN); GeoAnalyzer.Initialize(); using (Processor proc = ProcessorService.CreateSpecificProcessor(GeoAnalyzer.ANALYZER_NAME)) { AnalysisResult ar = proc.Process(new SourceOfAnalysis(text)); foreach (var geo in ar.Entities) { if (geo is GeoReferent) { var geoEntitiName = geo.ToString(); string entityproperties = null; var props = geo.Slots; string newProperties = null; foreach (var prop in props) { var name = prop.TypeName.ToString(); var value = prop.Value.ToString(); newProperties += name += " = " + value + ";"; } var existingEntity = entities.Find(m => m.Value.Equals(geoEntitiName)); if (existingEntity != null) { entityproperties = existingEntity.Properties; if (entityproperties != newProperties) { newProperties += entityproperties; } entities.Remove(existingEntity); } entities.Add(new Entity(geoEntitiName, newProperties, EntitiesType.geo)); } } return(entities); } }
public UnitTest1() { ProcessorService.Initialize(MorphLang.RU | MorphLang.EN); // инициализируются все используемые анализаторы EP.Ner.Money.MoneyAnalyzer.Initialize(); EP.Ner.Uri.UriAnalyzer.Initialize(); EP.Ner.Phone.PhoneAnalyzer.Initialize(); EP.Ner.Definition.DefinitionAnalyzer.Initialize(); EP.Ner.Date.DateAnalyzer.Initialize(); EP.Ner.Bank.BankAnalyzer.Initialize(); EP.Ner.Geo.GeoAnalyzer.Initialize(); EP.Ner.Address.AddressAnalyzer.Initialize(); EP.Ner.Org.OrganizationAnalyzer.Initialize(); EP.Ner.Person.PersonAnalyzer.Initialize(); EP.Ner.Mail.MailAnalyzer.Initialize(); EP.Ner.Transport.TransportAnalyzer.Initialize(); EP.Ner.Decree.DecreeAnalyzer.Initialize(); EP.Ner.Titlepage.TitlePageAnalyzer.Initialize(); EP.Ner.Booklink.BookLinkAnalyzer.Initialize(); EP.Ner.Named.NamedEntityAnalyzer.Initialize(); EP.Ner.Goods.GoodsAnalyzer.Initialize(); }
public void UpdateTest() { this._context = this.InitContext().Result; ProcessorService ps = new ProcessorService(this._context); Processor p1 = new Processor() { id = "0", name = "test_processor1", version = "0.0.1", enabled = true, description = "test processor1", file_type = ".fake", process_found = 1 }; Processor p2 = new Processor() { id = "0", name = "test_processor2", version = "0.0.2", enabled = true, description = "test processor2", file_type = ".fake2", process_found = 1 }; this._context.Processors.AddAsync(p1); this._context.SaveChangesAsync(); var t = ps.Update(p1.id, p2); var result = this._context.Processors.SingleAsync(p => p.id == p1.id).Result; Assert.Equal(p2.name, result.name); // P1 processor updated with P2 var t2 = ps.Update("invalid", p1); var result2 = this._context.Processors.SingleAsync(p => p.id == p1.id).Result; Assert.Equal(p2.name, result2.name); // No update expected. }
public static void Initialize() { _terminCollection = new TerminCollection(); Termin tContract = new Termin("контракт", MorphLang.RU, true); tContract.AddAbridge("гос.контракт"); tContract.AddAbridge("г.контракт"); tContract.AddAbridge("гос.к-т"); _terminCollection.Add(tContract); _opfTerminCollection = new TerminCollection(); Termin tOpf = new Termin("ОБЩЕСТВО С ОГРАНИЧЕННОЙ ОТВЕТСВЕННОСТЬЮ"); tOpf.AddAbridge("ООО"); tOpf.AddVariant("ЗАКРЫТОЕ АКЦИОНЕРНОЕ ОБЩЕСТВО"); tOpf.AddAbridge("ЗАО"); _opfTerminCollection.Add(tOpf); OrganizationAnalyzer.Initialize(); UriAnalyzer.Initialize(); DateAnalyzer.Initialize(); ProcessorService.RegisterAnalyzer(new MyAnalyzer()); }
public void CreateTest() { this._context = this.InitContext().Result; ProcessorService ps = new ProcessorService(this._context); Processor p = new Processor() { id = "0", name = "test_processor", version = "0.0.1", enabled = true, description = "test processor", file_type = ".fake", process_found = 1 }; var p0 = ps.Create(p).Result; var result = this._context.Processors.SingleAsync(x => x.id == p.id).Result; Assert.NotNull(result); Processor p2 = new Processor(); var r2 = ps.Create(p2).Result; Assert.Null(r2.id); }
private static void AddDataForTesting(DataContext context) { #region Model var modelMinRequiredRecord = 1; var modelId = 0; var modelService = new ModelService(context); if (context.Models.LongCount() <= modelMinRequiredRecord) { for (var i = 0; i <= modelMinRequiredRecord; i++) { var obj = new Model { Name = string.Format("Model {0}", (i + 1)) }; modelService.Add(obj); if (modelId == 0) { modelId = obj.Id; } } } #endregion #region Category var categoryMinRequiredRecord = 1; if (context.Categories.LongCount() <= categoryMinRequiredRecord) { var categoryService = new CategoryService(context); for (var i = 0; i <= categoryMinRequiredRecord; i++) { var obj = new Category { Name = string.Format("Category {0}", (i + 1)) }; categoryService.Add(obj); } } #endregion #region Manufacturer var manufacturerMinRequiredRecord = 1; if (context.Manufacturers.LongCount() <= manufacturerMinRequiredRecord) { var manufacturerService = new ManufacturerService(context); for (var i = 0; i <= manufacturerMinRequiredRecord; i++) { var obj = new Manufacturer { Name = string.Format("Manufacturer {0}", (i + 1)) }; manufacturerService.Add(obj); } } #endregion #region Processor var processorMinRequiredRecord = 1; if (context.Processors.LongCount() <= processorMinRequiredRecord) { var processorService = new ProcessorService(context); for (var i = 0; i <= processorMinRequiredRecord; i++) { var obj = new Processor { Name = string.Format("Processor {0}", (i + 1)) }; processorService.Add(obj); } } #endregion #region Size var hardDiskMinRequiredRecord = 1; if (context.HardDiskSizes.LongCount() <= hardDiskMinRequiredRecord) { var hdService = new HardDiskSizeService(context); for (var i = 0; i <= hardDiskMinRequiredRecord; i++) { var obj = new HardDisk { Size = string.Format("{0}", (i + 1)) }; hdService.Add(obj); } } var memoryMinRequiredRecord = 1; if (context.MemorySizes.LongCount() <= memoryMinRequiredRecord) { var ramService = new MemorySizeService(context); for (var i = 0; i <= memoryMinRequiredRecord; i++) { var obj = new Memory { Size = string.Format("{0}", (i + 1)) }; ramService.Add(obj); } } var videoCardMinRequiredRecord = 1; if (context.VideoCardSizes.LongCount() <= videoCardMinRequiredRecord) { var vcService = new VideoCardSizeService(context); for (var i = 0; i <= videoCardMinRequiredRecord; i++) { var obj = new VideoCard { Size = string.Format("{0}", (i + 1)) }; vcService.Add(obj); } } #endregion #region Supplier var supplierMinRequiredRecord = 1; if (context.Suppliers.LongCount() <= supplierMinRequiredRecord) { var supplierService = new SupplierService(context); for (int i = 0; i <= supplierMinRequiredRecord; i++) { var obj = new Supplier { Name = "Supplier " + i }; supplierService.Add(obj); } } #endregion #region User var userMinRequiredRecord = 1; if (context.Users.LongCount() <= userMinRequiredRecord) { var userService = new UserService(context); for (var i = 0; i <= userMinRequiredRecord; i++) { var obj = new User { FullName = "User " + (i + 1), UserName = "******" + (i + 1), }; userService.Add(obj, "blotocol"); } } #endregion #region Asset var assetMinRequiredRecord = 1; if (context.Assets.LongCount() <= assetMinRequiredRecord) { var service = new AssetService(context); for (int i = 0; i <= assetMinRequiredRecord; i++) { var obj = new Asset { AssetTag = "Asset Tag" + i, Name = "Asset Name" + i, Status = StatusType.Available, ModelId = modelId }; service.Add(obj); } } #endregion }
// // Summary: // Основная функция выделения объектов // // Parameters: // container: // // lastStage: public override void Process(AnalysisKit kit) { try { List <Participant> organizationReferents = new List <Participant>(); var analysisResult = _organizationProcessor.Process(kit.Sofa); var analyzerData = kit.GetAnalyzerData(this); //Ищем участников foreach (var organizationReferent in analysisResult.Entities.OfType <OrganizationReferent>().Where(x => !x.ToString().ToUpper().Contains(" СУД "))) { var participant = new Participant() { Name = organizationReferent.ToString() }; if (String.IsNullOrWhiteSpace(organizationReferent.INN) || String.IsNullOrWhiteSpace(organizationReferent.OGRN)) { foreach (var occurance in organizationReferent.Occurrence) { var tempParticipant = RecognizeParticipant(kit.Sofa.Text, occurance.BeginChar, occurance.EndChar, organizationReferent); if (!tempParticipant.Inn.NullOrEmpty() && participant.Inn.NullOrEmpty()) { participant.Inn = tempParticipant.Inn; } if (!tempParticipant.Ogrn.NullOrEmpty() && participant.Ogrn.NullOrEmpty()) { participant.Ogrn = tempParticipant.Ogrn; } } } else { participant = new Participant { Inn = organizationReferent.INN, Ogrn = organizationReferent.OGRN, Name = organizationReferent.ToString() }; } //Полученный участник может быть с такими же реквизатами, но с иным названием. Добавляем участника без реквизитов if (!organizationReferents.Any(x => x.Name.Equals(participant.Name))) { try { if (!participant.Inn.NullOrEmpty() && organizationReferents.Any(x => participant.Inn.Equals(x.Inn))) { participant.Inn = null; } if (!participant.Ogrn.NullOrEmpty() && organizationReferents.Any(x => participant.Ogrn.Equals(x.Ogrn))) { participant.Ogrn = null; } } catch (Exception e) { Console.WriteLine(e); throw; } } organizationReferents.Add(participant); } //Участники var participants = organizationReferents.Distinct(new OrganizationComparator()).ToList(); //Ищем контракты по датам List <Contract> contracts = new List <Contract>(); analysisResult = _dateProcessor.Process(kit.Sofa); foreach (var dateReferent in analysisResult.Entities.OfType <DateReferent>()) { if (dateReferent.Day > 0) { foreach (var occurance in dateReferent.Occurrence) { var start = occurance.BeginChar - 100 < 0 ? 0 : occurance.BeginChar - 100; var length = occurance.EndChar - occurance.BeginChar + 200; var str = ""; try { str = kit.Sofa.Text.Substring(start, kit.Sofa.Text.Length - start >= length ? length : kit.Sofa.Text.Length - start); } catch (Exception e) { Console.WriteLine(e); throw; } AnalysisKit analyzisKit = new AnalysisKit(new SourceOfAnalysis(str), true, MorphLang.RU) { Processor = ProcessorService.CreateEmptyProcessor() }; var numbers = ExtractionNumbers(analyzisKit.FirstToken).Distinct().ToList(); if (numbers.Count > 0) { foreach (var number in numbers) { var contract = contracts.FirstOrDefault(x => String.Equals(x.Number, number, StringComparison.InvariantCultureIgnoreCase)); if (contract == null) { var c = new Contract() { Number = number }; c.Dates.Add(dateReferent.Dt); contracts.Add(c); } else { contract.Dates.Add(dateReferent.Dt); } } break; } } } } //Пробуем выделить по слову контракт List <string> contractNumbers = new List <string>(); for (Token t = kit.FirstToken; t != null; t = t.Next) { //Ищем контракты TerminToken token = _terminCollection.TryParse(t, TerminParseAttr.No); if (token != null) { var str = kit.Sofa.Text.Substring(token.EndChar + 1, 100); AnalysisKit analyzisKit = new AnalysisKit(new SourceOfAnalysis(str), true, MorphLang.RU) { Processor = ProcessorService.CreateEmptyProcessor() }; var numbers = ExtractionNumbers(analyzisKit.FirstToken); if (numbers?.Count > 0) { contractNumbers.AddRange(numbers); } } } contractNumbers = contractNumbers.Distinct().ToList(); contracts.AddRange(contractNumbers.Select(x => new Contract() { Number = x, Dates = new List <DateTime?>() })); //Просто вычленяем все номера var resultNumbers = ExtractionNumbers(kit.FirstToken); analyzerData.RegisterReferent(new MyReferent(nameof(MyReferent)) { Contracts = contracts.Where(x => CaseNumber.Parse(x.Number).IsValid == false).ToList(), Participants = participants }); //Вывод в файл using (var streamWriter = new StreamWriter(Path.Combine("Results", GlobalState.File))) { streamWriter.WriteLine("Организации"); foreach (var participant in participants) { streamWriter.WriteLine( $"INN:{participant.Inn}\t OGRN:{participant.Ogrn}\t {participant.Name}"); } streamWriter.WriteLine(); streamWriter.WriteLine("Контракты после слова 'контракт'"); foreach (var contract in contractNumbers) { streamWriter.WriteLine($"{contract}"); } streamWriter.WriteLine(); streamWriter.WriteLine("Контракты рядом с датами "); foreach (var contract in contracts) { foreach (var date in contract.Dates) { streamWriter.WriteLine($"{contract.Number} {date}"); } } streamWriter.WriteLine(); streamWriter.WriteLine("Скорее всего нужные нам контракты (номера с датами входящие в номера по слову 'Контракт')".ToUpper()); var cs = contracts.Where(x => contractNumbers.Contains(x.Number)).ToList(); foreach (var contract in cs) { foreach (var date in contract.Dates) { streamWriter.WriteLine($"{contract.Number} {date}"); } } streamWriter.WriteLine(); streamWriter.WriteLine("Мусор"); streamWriter.WriteLine("Номера"); foreach (var contract in resultNumbers) { streamWriter.WriteLine($"{contract}"); } } } catch (Exception e) { Console.WriteLine(e); throw; } }
/// <summary> /// Initializes the node. /// </summary> /// <remarks> /// This method has to be called before the node can be used! /// </remarks> /// <param name="configuration"></param> /// <param name="processor"></param> public void Initialize(NodeConfiguration configuration, ProcessorService processor) { Name = configuration.Name; ProcessorService = new ProcessorServiceForNode(processor, this); InitializeCore(configuration); }
static void Main(string[] args) { Stopwatch sw = Stopwatch.StartNew(); // инициализация - необходимо проводить один раз до обработки текстов Console.Write("Initializing ... "); ProcessorService.Initialize(MorphLang.RU | MorphLang.EN); // инициализируются все используемые анализаторы MyAnalyzer.Initialize(); Console.WriteLine("OK (by {0} ms), version {1}", (int)sw.ElapsedMilliseconds, ProcessorService.Version); AnalysisResult ar = null; if (args.Length > 0) { if (args[0] == "csv") { ClientApiSettings settings = new ClientApiSettings(); foreach (var file in Directory.GetFiles("Texts", "*.csv")) { using (var sr = new StreamReader(file)) { var i = 1; var line = sr.ReadLine(); while (line != null) { var data = line.Split(';', ' ', ';'); if (data.Length < 3) { Console.WriteLine("Ошибка формата csv. \r\n Формат\r\n Name;CaseId;DocumentId"); } GlobalState.File = i + "_" + MakeValidFileName(data[0]) + ".txt"; var client = new PravoRu.DataLake.Arbitr.CaseCard.Api.Client.v1.FileClient(settings, new HttpClientFactory()); DocumentPlainText response = null; try { response = client.GetDocumentTextAsync(new DocumentFileRequest() { CaseId = Guid.Parse(data[1]), IsBase64 = false, DocumentId = Guid.Parse(data[2]) }).GetAwaiter().GetResult(); } catch (Exception e) { Console.WriteLine(data[0] + "\t" + e.Message); } if (response == null) { line = sr.ReadLine(); continue; } File.WriteAllText(Path.Combine("Results", "Original_" + GlobalState.File), response.HtmlText); // создаём экземпляр обычного процессора using (Processor proc = ProcessorService.CreateSpecificProcessor(nameof(MyAnalyzer))) { // анализируем текст ar = proc.Process(new SourceOfAnalysis(response.HtmlText)); try { PostExecute(ar); } catch (Exception e) { Console.WriteLine(e); } } Console.WriteLine("Обработан файл " + GlobalState.File); line = sr.ReadLine(); i++; } } } } if (args[0] == "txt") { foreach (var file in Directory.GetFiles("Texts", "*.txt")) { Console.WriteLine($"{file}------------------------------------"); string txt = File.ReadAllText(file); GlobalState.File = new FileInfo(file).Name; // создаём экземпляр обычного процессора using (Processor proc = ProcessorService.CreateSpecificProcessor(nameof(MyAnalyzer))) { // анализируем текст ar = proc.Process(new SourceOfAnalysis(txt)); try { PostExecute(ar); } catch (Exception e) { Console.WriteLine(e); } } } } } sw.Stop(); Console.WriteLine("Over!(by {0} ms), version {1}", (int)sw.ElapsedMilliseconds, ProcessorService.Version); }
public PanelController(MainService mainService, ApplicationContext applicationContext, ProcessorService processorService) { _mainService = mainService; _applicationContext = applicationContext; _processorService = processorService; }
void crawler_ProcessPageCrawlCompleted(object sender, PageCrawlCompletedArgs e) { CrawledPage crawledPage = e.CrawledPage; string uri = crawledPage.Uri.AbsoluteUri; if (crawledPage.WebException != null || crawledPage.HttpWebResponse.StatusCode != HttpStatusCode.OK) { new HelloResponse { Result = $"Crawl of page failed {uri}!" } } ; else { new HelloResponse { Result = $"Crawl of page succeeded {uri}!" } }; var htmlAgilityPackDocument = crawledPage.HtmlDocument; //Html Agility Pack parser Extract(e, htmlAgilityPackDocument); } string RemoveQuotes(string articleText) { return(Regex.Replace(articleText, "\"", String.Empty)); } void Extract(PageCrawlCompletedArgs page, HtmlDocument htmlAgilityPackDocument) { try { DateTime date = default(DateTime); string entityText = ""; StringBuilder text = new StringBuilder(); StringBuilder headerArticle = new StringBuilder(); StringBuilder articleText = new StringBuilder(); var t2 = htmlAgilityPackDocument.DocumentNode.SelectSingleNode("//div[@class='news-detail']"); if (t2 != null) { // You need to add an * to the xpath. The * means you want to select any element. // With an xpath query you can also use "." to indicate the search should start at the current node. HtmlNode HeaderArticleNode = t2.SelectSingleNode(".//*[@class='name']"); if (HeaderArticleNode != null) { headerArticle.Append(RemoveQuotes(HeaderArticleNode.InnerText)); } HtmlNode TextNode = t2.SelectSingleNode(".//*[@id='detailText']"); if (TextNode != null) { if (TextNode.InnerText != "") { articleText.Append(RemoveQuotes(TextNode.InnerText)); ProcessorService.Initialize(MorphLang.RU | MorphLang.EN); EP.Ner.Geo.GeoAnalyzer.Initialize(); EP.Ner.Org.OrganizationAnalyzer.Initialize(); EP.Ner.Person.PersonAnalyzer.Initialize(); //// создаём экземпляр обычного процессора using (Processor proc = ProcessorService.CreateProcessor()) { // анализируем текст AnalysisResult ar = proc.Process(new SourceOfAnalysis(articleText.ToString().Trim())); // результирующие сущности foreach (var e in ar.Entities) { // if (e.GetType().Name.Equals("GeoReferent")) //e.GetType().Name + " " + e; entityText += e.ToString() + " "; } } } } /* * // Вариант с перебором всех дочерних узлов и использованием Descendants * HtmlNodeCollection childNodes = t2.ChildNodes; * foreach (var nNode in childNodes.Descendants("h1")) * { * if (nNode.NodeType == HtmlNodeType.Element) * { * name = nNode.InnerText; * } * } */ } htmlAgilityPackDocument.DocumentNode.SelectNodes("//style|//script").ToList().ForEach(n => n.Remove()); var xpath = "//text()[not(normalize-space())]"; var emptyNodes = htmlAgilityPackDocument.DocumentNode.SelectNodes(xpath); //replace each and all empty text nodes with single new-line text node foreach (HtmlNode emptyNode in emptyNodes) { emptyNode.ParentNode .ReplaceChild(HtmlTextNode.CreateNode(Environment.NewLine) , emptyNode ); } string FullText = htmlAgilityPackDocument.DocumentNode.InnerHtml.AsString(); var bla = htmlAgilityPackDocument.DocumentNode.SelectSingleNode("//span[@class='news-date-time news_date']"); if (bla != null) { bla.InnerHtml.ToString(); date = DateTime.Parse(bla.InnerText); } if (headerArticle.Length != 0 & articleText.Length != 0) { using (var db = dbConnectionFactory.OpenDbConnection()) { db.Insert(new Article() { // HeaderArticle = HeaderArticle, HeaderArticle = headerArticle.ToString().Trim(), UrlArticle = page.CrawledPage.Uri.AbsoluteUri, FullText = page.CrawledPage.HtmlDocument.DocumentNode.OuterHtml, Text = articleText.ToString().Trim(), LastUpdated = date, EntityText = entityText, } ); } } } catch (Exception e) { Console.WriteLine("", e.Message); } } }
static void Main(string[] args) { Stopwatch sw = Stopwatch.StartNew(); // инициализация - необходимо проводить один раз до обработки текстов Console.Write("Initializing ... "); ProcessorService.Initialize(MorphLang.RU | MorphLang.EN); // инициализируются все используемые анализаторы //EP.Ner.Money.MoneyAnalyzer.Initialize(); //EP.Ner.Uri.UriAnalyzer.Initialize(); //EP.Ner.Phone.PhoneAnalyzer.Initialize(); //EP.Ner.Definition.DefinitionAnalyzer.Initialize(); //EP.Ner.Date.DateAnalyzer.Initialize(); //EP.Ner.Bank.BankAnalyzer.Initialize(); EP.Ner.Geo.GeoAnalyzer.Initialize(); // этот нужен // EP.Ner.Address.AddressAnalyzer.Initialize(); EP.Ner.Org.OrganizationAnalyzer.Initialize(); // Этот нужен EP.Ner.Person.PersonAnalyzer.Initialize(); // этот нужен //EP.Ner.Mail.MailAnalyzer.Initialize(); //EP.Ner.Transport.TransportAnalyzer.Initialize(); //EP.Ner.Decree.DecreeAnalyzer.Initialize(); //EP.Ner.Titlepage.TitlePageAnalyzer.Initialize(); //EP.Ner.Booklink.BookLinkAnalyzer.Initialize(); //EP.Ner.Named.NamedEntityAnalyzer.Initialize(); //EP.Ner.Goods.GoodsAnalyzer.Initialize(); sw.Stop(); Console.WriteLine("OK (by {0} ms), version {1}", (int)sw.ElapsedMilliseconds, ProcessorService.Version); // анализируемый текст string txt = " улица Куйбышева Единственным конкурентом «Трансмаша» на этом дебильном тендере фамилия Гуполович было ООО «Плассер Алека Рейл Сервис», основным владельцем которого является австрийская компания «СТЦ-Холдинг ГМБХ». До конца 2011 г. эта же фирма была совладельцем «Трансмаша» вместе с «Тако» Краснова. Зато совладельцем «Плассера», также до конца 2011 г., был тот самый Карл Контрус, который имеет четверть акций «Трансмаша». "; // создаём экземпляр обычного процессора using (Processor proc = ProcessorService.CreateProcessor()) { // анализируем текст AnalysisResult ar = proc.Process(new SourceOfAnalysis(txt)); // результирующие сущности Console.WriteLine("Entities: "); foreach (var e in ar.Entities) { if (e.GetType().Name.Equals("GeoReferent")) { Console.WriteLine(e.GetType().Name + " " + e + " if прошел успешно"); } else { Console.WriteLine(e.GetType().Name + " " + e); } } //// пример выделения именных групп //Console.WriteLine("Noun groups: "); //for (Token t = ar.FirstToken; t != null; t = t.Next) //{ // if (t.GetReferent() != null) continue; // токены с сущностями игнорируем // // пробуем создать именную группу // NounPhraseToken npt = NounPhraseHelper.TryParse(t, NounPhraseParseAttr.AdjectiveCanBeLast); // if (npt == null) continue; // не получилось // Console.WriteLine(npt); // t = npt.EndToken; // указатель на последний токен группы //} } Console.WriteLine("Over!"); }
public QuestionController(RepositoryService mainService, ApplicationContext applicationContext, ProcessorService processorService) { //_mainService = mainService; _applicationContext = applicationContext; _processorService = processorService; }