private async Task <IReadOnlyList <IText> > CreateTextsAsync(IEnumerable <string> projects, TextCorpusType type) { StringTokenizer wordTokenizer = new LatinWordTokenizer(); IMongoDatabase sfDatabase = _mongoClient.GetDatabase("scriptureforge"); IMongoDatabase realtimeDatabase = _mongoClient.GetDatabase("realtime"); IMongoCollection <BsonDocument> projectsColl = sfDatabase.GetCollection <BsonDocument>("projects"); var texts = new List <IText>(); foreach (string projectId in projects) { Project project = await _projectRepo.GetAsync(projectId); if (project == null) { continue; } string segmentType = null; string suffix = null; switch (type) { case TextCorpusType.Source: suffix = "source"; segmentType = project.SourceSegmentType; break; case TextCorpusType.Target: suffix = "target"; segmentType = project.TargetSegmentType; break; } StringTokenizer segmentTokenizer = null; if (segmentType != null) { segmentTokenizer = WebApiUtils.CreateSegmentTokenizer(segmentType); } FilterDefinition <BsonDocument> filter = Builders <BsonDocument> .Filter.Eq("_id", ObjectId.Parse(projectId)); BsonDocument projectDoc = await projectsColl.Find(filter).FirstOrDefaultAsync(); if (projectDoc == null) { continue; } var code = "sf_" + (string)projectDoc["projectCode"]; var isScripture = (bool)projectDoc["config"]["isTranslationDataScripture"]; IMongoCollection <BsonDocument> projectColl = realtimeDatabase.GetCollection <BsonDocument>(code); IMongoDatabase projectDatabase = _mongoClient.GetDatabase(code); IMongoCollection <BsonDocument> translateColl = projectDatabase.GetCollection <BsonDocument>("translate"); filter = Builders <BsonDocument> .Filter.Eq("isDeleted", false); using (IAsyncCursor <BsonDocument> cursor = await translateColl.Find(filter).ToCursorAsync()) { while (await cursor.MoveNextAsync()) { foreach (BsonDocument docInfo in cursor.Current) { var id = (ObjectId)docInfo["_id"]; filter = Builders <BsonDocument> .Filter.Eq("_id", $"{id}:{suffix}"); BsonDocument doc = await projectColl.Find(filter).FirstAsync(); if (isScripture) { texts.Add(new XForgeScriptureText(wordTokenizer, project.Id, doc)); } else { texts.Add(new XForgeRichText(segmentTokenizer, wordTokenizer, project.Id, doc)); } } } } } return(texts); }
public SegmentTokenizer(string segmentType) { _tokenizer = WebApiUtils.CreateSegmentTokenizer(segmentType); }