public ActionResult Index( long[] matterIds, int characteristicTypeLinkId, int[] featureIds, string maxPercentageDifference, double characteristicValueFrom, double characteristicValueTo) { return Action(() => { Dictionary<int, string> features; var attributeValues = new List<AttributeValue>(); var characteristics = new SubsequenceData[matterIds.Length][]; string characteristicName; long[] parentSequenceIds; var matterNames = new string[matterIds.Length]; IFullCalculator calculator; Link link; int mattersCount = matterIds.Length; int[] subsequencesCount = new int[mattersCount]; using (var db = new LibiadaWebEntities()) { var featureRepository = new FeatureRepository(db); features = featureRepository.Features.ToDictionary(f => f.Id, f => f.Name); var parentSequences = db.DnaSequence.Include(s => s.Matter) .Where(s => s.NotationId == Aliases.Notation.Nucleotide && matterIds.Contains(s.MatterId)) .Select(s => new { s.Id, MatterName = s.Matter.Name }) .ToDictionary(s => s.Id); parentSequenceIds = parentSequences.Keys.ToArray(); for (int n = 0; n < parentSequenceIds.Length; n++) { matterNames[n] = parentSequences[parentSequenceIds[n]].MatterName; } var characteristicTypeLinkRepository = new CharacteristicTypeLinkRepository(db); characteristicName = characteristicTypeLinkRepository.GetCharacteristicName(characteristicTypeLinkId); string className = characteristicTypeLinkRepository.GetCharacteristicType(characteristicTypeLinkId).ClassName; calculator = CalculatorsFactory.CreateFullCalculator(className); link = characteristicTypeLinkRepository.GetLibiadaLink(characteristicTypeLinkId); } // cycle through matters; first level of characteristics array for (int i = 0; i < parentSequenceIds.Length; i++) { var subsequencesData = SubsequencesCharacteristicsCalculator.CalculateSubsequencesCharacteristics( new[] { characteristicTypeLinkId }, featureIds, parentSequenceIds[i], new[] { calculator }, new[] { link }, attributeValues); subsequencesCount[i] = subsequencesData.Length; subsequencesData = subsequencesData.Where(c => (characteristicValueFrom == 0 && characteristicValueTo == 0) || (c.CharacteristicsValues[0] >= characteristicValueFrom && c.CharacteristicsValues[0] <= characteristicValueTo)). OrderBy(c => c.CharacteristicsValues[0]).ToArray(); characteristics[i] = subsequencesData; } double decimalDifference = double.Parse(maxPercentageDifference, CultureInfo.InvariantCulture) / 100; var similarities = new object[mattersCount, mattersCount]; var equalElements = new List<SubsequenceComparisonData>(); int comparisonNumber = 0; for (int i = 0; i < characteristics.Length; i++) { for (int j = 0; j < characteristics.Length; j++) { comparisonNumber++; double similarSequencesCharacteristicValueFirst = 0; var similarSequencesCharacteristicValueSecond = new Dictionary<int, double>(); double similarFirstSequencesCharacteristicValue = 0; double similarSecondSequencesCharacteristicValue = 0; int secondArrayStartPosition = 0; double differenceSum = 0; int equalElementsCountFromFirst = 0; var equalElementsCountFromSecond = new Dictionary<int, int>(); bool equalFound = false; int equalPairsCount = 0; for (int k = 0; k < characteristics[i].Length; k++) { double first = characteristics[i][k].CharacteristicsValues[0]; for (int l = secondArrayStartPosition; l < characteristics[j].Length; l++) { double second = characteristics[j][l].CharacteristicsValues[0]; double difference = calculateAverageDifference(first, second); bool nextElementInSecondArrayIsEqual = false; if (l < characteristics[j].Length - 1) { nextElementInSecondArrayIsEqual = calculateAverageDifference(second, characteristics[j][l + 1].CharacteristicsValues[0]) <= decimalDifference; } if (difference <= decimalDifference) { equalFound = true; equalPairsCount++; if (!equalElementsCountFromSecond.ContainsKey(l)) { equalElementsCountFromSecond.Add(l, 1); differenceSum += difference; } if (!similarSequencesCharacteristicValueSecond.ContainsKey(l)) { similarSequencesCharacteristicValueSecond.Add(l, second); } if (i != j) { equalElements.Add(new SubsequenceComparisonData { Difference = difference, FirstMatterId = i, SecondMatterId = j, FirstSubsequenceId = k, SecondSubsequenceId = l, }); } similarFirstSequencesCharacteristicValue += first; similarSecondSequencesCharacteristicValue += second; if (!nextElementInSecondArrayIsEqual) { break; } } else if (second < first) { secondArrayStartPosition++; } } if (equalFound) { equalElementsCountFromFirst++; similarSequencesCharacteristicValueFirst += first; } } double differenceSecondFinal = equalElementsCountFromSecond.Sum(s => s.Value); double differenceFinal = equalElementsCountFromFirst < differenceSecondFinal ? equalElementsCountFromFirst * 2d : differenceSecondFinal * 2d; double formula1 = differenceFinal / (subsequencesCount[i] + subsequencesCount[j]); double formula2 = 0; if (equalPairsCount != 0 && formula1 != 0) { formula2 = (differenceSum / equalPairsCount) / formula1; } double similarSequencesCharacteristicValueSecondFinal = similarSequencesCharacteristicValueSecond.Sum(s => s.Value); double similarSequencesCharacteristicValue = similarSequencesCharacteristicValueFirst < similarSequencesCharacteristicValueSecondFinal ? similarSequencesCharacteristicValueFirst * 2d : similarSequencesCharacteristicValueSecondFinal * 2d; double formula3 = similarSequencesCharacteristicValue * 100d / (characteristics[i].Sum(c => c.CharacteristicsValues[0]) + characteristics[j].Sum(c => c.CharacteristicsValues[0])); similarities[i, j] = new { formula1 = Math.Round(formula1 * 100d, 3), formula2 = Math.Round(formula2, 3), formula3 = Math.Round(formula3, 3) }; } } var result = new Dictionary<string, object> { { "mattersNames", matterNames }, { "characteristicName", characteristicName }, { "similarities", similarities }, { "characteristics", characteristics }, { "equalElements", equalElements.OrderBy(e => e.Difference).ToList() }, { "features", features }, { "attributeValues", attributeValues.Select(sa => new { attribute = sa.AttributeId, value = sa.Value }) }, { "attributes", EnumExtensions.ToArray<LibiadaWeb.Attribute>().ToDictionary(a => (byte)a, a => a.GetDisplayValue()) } }; return new Dictionary<string, object> { { "data", JsonConvert.SerializeObject(result) } }; }); }
/// <summary> /// Calculates subsequences characteristics. /// </summary> /// <param name="characteristicTypeLinkIds"> /// The characteristic type link ids. /// </param> /// <param name="featureIds"> /// The features ids. /// </param> /// <param name="parentSequenceId"> /// The parent sequence id. /// </param> /// <param name="calculators"> /// The calculators. /// </param> /// <param name="links"> /// The links. /// </param> /// <param name="attributeValues"> /// Nonredundant array of all attributes. /// </param> /// <param name="filters"> /// Textual search filters for subsequences products. /// </param> /// <returns> /// The <see cref="T:SubsequenceData[]"/>. /// </returns> public static SubsequenceData[] CalculateSubsequencesCharacteristics( int[] characteristicTypeLinkIds, int[] featureIds, long parentSequenceId, IFullCalculator[] calculators, Link[] links, List<AttributeValue> attributeValues, string[] filters = null) { // creating local context to avoid memory overflow due to possibly big cache of characteristics using (var context = new LibiadaWebEntities()) { var subsequenceExtractor = new SubsequenceExtractor(context); var sequenceAttributeRepository = new SequenceAttributeRepository(context); var attributeRepository = new AttributeRepository(); var newCharacteristics = new List<Characteristic>(); // extracting data from database var dbSubsequences = filters == null ? subsequenceExtractor.GetSubsequences(parentSequenceId, featureIds) : subsequenceExtractor.GetSubsequences(parentSequenceId, featureIds, filters); var subsequenceIds = dbSubsequences.Select(s => s.Id).ToArray(); var dbSubsequencesAttributes = sequenceAttributeRepository.GetAttributes(subsequenceIds); var dbCharacteristics = context.Characteristic.Where(c => characteristicTypeLinkIds.Contains(c.CharacteristicTypeLinkId) && subsequenceIds.Contains(c.SequenceId)) .ToArray() .GroupBy(c => c.SequenceId) .ToDictionary(c => c.Key, c => c.ToDictionary(ct => ct.CharacteristicTypeLinkId, ct => ct.Value)); // converting to libiada sequences var sequences = subsequenceExtractor.ExtractChains(dbSubsequences, parentSequenceId); var subsequenceData = new SubsequenceData[sequences.Length]; // cycle through subsequences for (int i = 0; i < sequences.Length; i++) { var values = new double[characteristicTypeLinkIds.Length]; Dictionary<int, double> sequenceDbCharacteristics; if (!dbCharacteristics.TryGetValue(dbSubsequences[i].Id, out sequenceDbCharacteristics)) { sequenceDbCharacteristics = new Dictionary<int, double>(); } // cycle through characteristics and notations for (int j = 0; j < characteristicTypeLinkIds.Length; j++) { int characteristicTypeLinkId = characteristicTypeLinkIds[j]; if (!sequenceDbCharacteristics.TryGetValue(characteristicTypeLinkId, out values[j])) { values[j] = calculators[j].Calculate(sequences[i], links[j]); var currentCharacteristic = new Characteristic { SequenceId = dbSubsequences[i].Id, CharacteristicTypeLinkId = characteristicTypeLinkId, Value = values[j] }; newCharacteristics.Add(currentCharacteristic); } } AttributeValue[] attributes; if (!dbSubsequencesAttributes.TryGetValue(dbSubsequences[i].Id, out attributes)) { attributes = new AttributeValue[0]; } var attributeIndexes = new int[attributes.Length]; for (int j = 0; j < attributes.Length; j++) { if (!attributeValues.Contains(attributes[j])) { attributeValues.Add(attributes[j]); } attributeIndexes[j] = attributeValues.IndexOf(attributes[j]); } subsequenceData[i] = new SubsequenceData(dbSubsequences[i], values, attributeIndexes); } // trying to save calculated characteristics to database var characteristicRepository = new CharacteristicRepository(context); characteristicRepository.TrySaveCharacteristicsToDatabase(newCharacteristics); return subsequenceData; } }