protected AlgoBase(AsyncSaver saver, IWikidumpReader wikiReader, IMongoCollection <Triplet> triplets, string positionsPath) { _saver = saver; _wikiReader = wikiReader; _triplets = triplets; _positions = PrepareCsvReader(positionsPath); }
protected bool ProcessTriplet(ObjectId id, PositionLine object_, PositionLine subject, IWikidumpReader reader) { var position = new AnotherArticlePosition { ArticleTitle = object_.WikiTitle, ArticleId = object_.PageId, ObjectPosition = object_.ToPosition(), SubjectPosition = subject.ToPosition() }; var text = reader.ExtractArticleText(object_.PageId); if (text == null) { return(false); } var startPosition = object_.Start < subject.Start ? object_.Start : subject.Start; var endPosition = object_.End > subject.End ? object_.End : subject.End; int newStart; int newEnd; position.Text = TextHelper.ExtractTextWithSentenceWindow(text, startPosition, endPosition, out newStart, out newEnd); // do not save with line break if (position.Text.Contains('\n') || position.Text.Contains('\r')) { return(false); } position.Start = newStart; position.End = newEnd; position.Distance = newEnd - newStart; _saver.Save(id, position); return(true); }
public AlgoInMemory(AsyncSaver saver, IWikidumpReader wikiReader, IMongoCollection <Triplet> triplets, string positionsPath) : base(saver, wikiReader, triplets, positionsPath) { _tripletsInMemory = GetTripletsCache(); }
public AlgoInDb(AsyncSaver saver, IWikidumpReader wikiReader, IMongoCollection <Triplet> triplets, string positionsPath) : base(saver, wikiReader, triplets, positionsPath) { }