public DetectionResult Detect(params FilterTypes[] types) { if (types.Length == 0) { throw new ArgumentException("Value cannot be an empty collection.", nameof(types)); } log.Debug("Detect"); anomaly.Clear(); if (document.Sentences.Length <= 3) { log.Debug("Detect - text too short"); return(new DetectionResult(reconstructor.Reconstruct(document.Sentences), anomaly.ToArray())); } log.Info("Using sentence clustering"); var sentenceClusters = GetSentencesBlock().ToArray(); foreach (FilterTypes filterTypes in types) { DetectionResults result = factory.Create(filterTypes).Filter(new DocumentClusters(sentenceClusters)); anomaly.AddRange(result.Anomaly); sentenceClusters = result.Result; } return(new DetectionResult(reconstructor.Reconstruct(sentenceClusters.SelectMany(item => item.Sentences).Distinct().ToArray()), anomaly.ToArray())); }
public void Add(DataType type, params IProcessingTextBlock[] blocks) { logger.LogDebug("Add: {0}", type); IEnumerable <Document> documents = blocks.Select(item => reconstructor.Reconstruct(item.Sentences)); foreach (var document in documents) { if (string.IsNullOrEmpty(document.Text)) { logger.LogWarning("Ignoring empty document"); continue; } if (duplicate.ContainsKey(document.Text)) { logger.LogWarning("Duplicate document detected - ignoring"); continue; } duplicate[document.Text] = document; if (type == DataType.Positive) { positive.Add(document); } else { negative.Add(document); } } }