public void axc_is_not_subsequence_of_abhdc()
        {
            var source = "axc";
            var target = "abhdc";

            Assert.IsFalse(Subsequence.IsSubsequence(source, target));
        }
        public void abc_is_subsequence_of_abhdc()
        {
            var source = "abc";
            var target = "abhdc";

            Assert.IsTrue(Subsequence.IsSubsequence(source, target));
        }
        private void AddEvent(Subsequence sequence)
        {
            CreateEventRequest.Raise(
                new Confirmation
            {
                Title = "Add Event",
            },
                confirmation =>
            {
                if (confirmation.Confirmed)
                {
                    var evt = (Event)confirmation.Content;
                    switch (sequence)
                    {
                    case Subsequence.Primary:
                        Sequence.PrimarySequence.Add(evt);
                        break;

                    case Subsequence.Abort:
                        Sequence.AbortSequence.Add(evt);
                        break;

                    default:
                        break;
                    }
                }
            });
        }
Beispiel #4
0
        /// <summary>
        /// Extracts joined subsequence with complement flag.
        /// </summary>
        /// <param name="sourceSequence">
        /// The complete sequence.
        /// </param>
        /// <param name="subsequence">
        /// The subsequence.
        /// </param>
        /// <returns>
        /// The <see cref="Chain"/>.
        /// </returns>
        private Chain GetJoinedSubsequenceWithComplement(Sequence sourceSequence, Subsequence subsequence)
        {
            ISequence bioSequence = sourceSequence.GetSubSequence(subsequence.Start, subsequence.Length);

            Position[] positions = subsequence.Position.ToArray();
            string     resultSequence;

            if (subsequence.SequenceAttribute.Any(sa => sa.Attribute == Attribute.ComplementJoin))
            {
                string joinedSequence = bioSequence.ConvertToString();

                foreach (Position position in positions)
                {
                    joinedSequence += sourceSequence.GetSubSequence(position.Start, position.Length).ConvertToString();
                }

                resultSequence = new Sequence(Alphabets.DNA, joinedSequence).GetReverseComplementedSequence().ConvertToString();
            }
            else
            {
                resultSequence = bioSequence.GetReverseComplementedSequence().ConvertToString();

                foreach (Position position in positions)
                {
                    resultSequence += sourceSequence.GetSubSequence(position.Start, position.Length).GetReverseComplementedSequence().ConvertToString();
                }
            }

            return(new Chain(resultSequence));
        }
Beispiel #5
0
        public void CanAddEventToSubsequence(Subsequence subsequence)
        {
            var evt = new OutputEvent
            {
                ChannelName = "Channel",
                StartTime   = TimeSpan.Zero,
                EndTime     = TimeSpan.FromSeconds(1)
            };
            var viewModel = new SequenceViewModel(new Sequence());

            viewModel.CreateEventRequest.Raised += (o, e) =>
            {
                var confirmation = (IConfirmation)e.Context;
                confirmation.Confirmed = true;
                confirmation.Content   = evt;
                e.Callback();
            };

            viewModel.AddEventCommand.Execute(subsequence.ToString());

            var seq = (subsequence == Subsequence.Primary ? viewModel.Sequence.PrimarySequence : viewModel.Sequence.AbortSequence);

            Assert.Single(seq);
            Assert.Same(evt, seq.First());
        }
Beispiel #6
0
        /// <summary>
        /// Checks if subsequence attribute passes filters.
        /// </summary>
        /// <param name="subsequence">
        /// The subsequence.
        /// </param>
        /// <param name="attribute">
        /// The attribute.
        /// </param>
        /// <param name="filters">
        /// The filters.
        /// </param>
        /// <returns>
        /// The <see cref="bool"/>.
        /// </returns>
        private bool IsSubsequenceAttributePassesFilters(Subsequence subsequence, Attribute attribute, string[] filters)
        {
            if (subsequence.SequenceAttribute.Any(sa => sa.Attribute == attribute))
            {
                string value = subsequence.SequenceAttribute.Single(sa => sa.Attribute == attribute).Value.ToLowerInvariant();
                return(filters.Any(f => value.Contains(f)));
            }

            return(false);
        }
Beispiel #7
0
        public void TestMethodSequence()
        {
            List <int> newList = new List <int>();

            newList.Add(5);

            List <int> seuence = Subsequence.FindSubsequence(newList);

            Assert.AreEqual(5, seuence[0]);
        }
Beispiel #8
0
 /// <summary>
 /// Extracts joined subsequence.
 /// </summary>
 /// <param name="sourceSequence">
 /// The complete sequence.
 /// </param>
 /// <param name="subsequence">
 /// The subsequence.
 /// </param>
 /// <returns>
 /// The <see cref="Chain"/>.
 /// </returns>
 private Chain GetJoinedSubsequence(Sequence sourceSequence, Subsequence subsequence)
 {
     if (subsequence.SequenceAttribute.Any(sa => sa.Attribute == Attribute.Complement))
     {
         return(GetJoinedSubsequenceWithComplement(sourceSequence, subsequence));
     }
     else
     {
         return(GetJoinedSubsequenceWithoutComplement(sourceSequence, subsequence));
     }
 }
        /// <summary>
        /// The get subsequence characteristic.
        /// </summary>
        /// <param name="subsequenceId">
        /// The subsequence id.
        /// </param>
        /// <param name="characteristicLinkId">
        /// The characteristic type link id.
        /// </param>
        /// <param name="windowSize">
        /// The window size.
        /// </param>
        /// <param name="step">
        /// The step.
        /// </param>
        /// <returns>
        /// The <see cref="string"/>.
        /// </returns>
        public string GetSubsequenceCharacteristic(
            long subsequenceId,
            short characteristicLinkId,
            int windowSize,
            int step)
        {
            Chain           chain;
            IFullCalculator calculator;
            Link            link;

            using (var db = new LibiadaWebEntities())
            {
                var characteristicTypeLinkRepository = FullCharacteristicRepository.Instance;

                FullCharacteristic characteristic =
                    characteristicTypeLinkRepository.GetCharacteristic(characteristicLinkId);
                calculator = FullCalculatorsFactory.CreateCalculator(characteristic);
                link       = characteristicTypeLinkRepository.GetLinkForCharacteristic(characteristicLinkId);

                var subsequenceExtractor = new SubsequenceExtractor(db);

                Subsequence subsequence = db.Subsequence.Single(s => s.Id == subsequenceId);
                chain = subsequenceExtractor.GetSubsequenceSequence(subsequence);
            }

            CutRule cutRule = new SimpleCutRule(chain.Length, step, windowSize);

            CutRuleIterator iterator = cutRule.GetIterator();

            var fragments = new List <Chain>();

            while (iterator.Next())
            {
                int start = iterator.GetStartPosition();
                int end   = iterator.GetEndPosition();

                var fragment = new List <IBaseObject>();
                for (int k = 0; start + k < end; k++)
                {
                    fragment.Add(chain[start + k]);
                }

                fragments.Add(new Chain(fragment));
            }

            var characteristics = new double[fragments.Count];

            for (int k = 0; k < fragments.Count; k++)
            {
                characteristics[k] = calculator.Calculate(fragments[k], link);
            }

            return(JsonConvert.SerializeObject(characteristics));
        }
Beispiel #10
0
        /// <summary>
        /// Extracts subsequence without joins (additional positions).
        /// </summary>
        /// <param name="sourceSequence">
        /// The complete sequence.
        /// </param>
        /// <param name="subsequence">
        /// The subsequence.
        /// </param>
        /// <returns>
        /// The <see cref="Chain"/>.
        /// </returns>
        private Chain GetSimpleSubsequence(Sequence sourceSequence, Subsequence subsequence)
        {
            ISequence bioSequence = sourceSequence.GetSubSequence(subsequence.Start, subsequence.Length);

            if (subsequence.SequenceAttribute.Any(sa => sa.Attribute == Attribute.Complement))
            {
                bioSequence = bioSequence.GetReverseComplementedSequence();
            }

            return(new Chain(bioSequence.ConvertToString()));
        }
Beispiel #11
0
 /// <summary>
 ///Extracts subsequence from given parent sequence.
 /// </summary>
 /// <param name="source">
 /// Parent sequence for extraction.
 /// </param>
 /// <param name="subsequence">
 /// Subsequence to be extracted from parent sequence.
 /// </param>
 /// <returns>
 /// Extracted from given position sequence as <see cref="Chain"/>.
 /// </returns>
 private Chain GetSequence(Sequence source, Subsequence subsequence)
 {
     if (subsequence.Position.Count == 0)
     {
         return(GetSimpleSubsequence(source, subsequence));
     }
     else
     {
         return(GetJoinedSubsequence(source, subsequence));
     }
 }
Beispiel #12
0
        /// <summary>
        /// Extracts joined subsequence without complement flag.
        /// </summary>
        /// <param name="sourceSequence">
        /// The complete sequence.
        /// </param>
        /// <param name="subsequence">
        /// The subsequence.
        /// </param>
        /// <returns>
        /// The <see cref="Chain"/>.
        /// </returns>
        private Chain GetJoinedSubsequenceWithoutComplement(Sequence sourceSequence, Subsequence subsequence)
        {
            string joinedSequence = sourceSequence.GetSubSequence(subsequence.Start, subsequence.Length).ConvertToString();

            Position[] positions = subsequence.Position.ToArray();

            foreach (Position position in positions)
            {
                joinedSequence += sourceSequence.GetSubSequence(position.Start, position.Length).ConvertToString();
            }

            return(new Chain(joinedSequence));
        }
Beispiel #13
0
        public void TestMethodSequence2()
        {
            List <int> list = new List <int>()
            {
                1, 2, 2, 2, 3, 2, 4, 5, 5, 5, 5, 7, 8
            };

            List <int> seuence = Subsequence.FindSubsequence(list);

            for (int i = 0; i < seuence.Count; i++)
            {
                Assert.AreEqual(5, seuence[i]);
            }
        }
        /// <summary>
        /// The extract chains.
        /// </summary>
        /// <param name="subsequences">
        /// The subsequences.
        /// </param>
        /// <param name="chainId">
        /// The sequence id.
        /// </param>
        /// <returns>
        /// The <see cref="List{Chain}"/>.
        /// </returns>
        public Chain[] ExtractChains(Subsequence[] subsequences, long chainId)
        {
            var parentChain = commonSequenceRepository.ToLibiadaBaseChain(chainId).ToString();
            var sourceSequence = new Sequence(Alphabets.DNA, parentChain);
            var result = new Chain[subsequences.Length];

            for (int i = 0; i < subsequences.Length; i++)
            {
                result[i] = subsequences[i].Position.Count == 0
                        ? ExtractSimpleSubsequence(sourceSequence, subsequences[i])
                        : ExtractJoinedSubsequence(sourceSequence, subsequences[i]);
            }

            return result;
        }
        public void サブシーケンスを検索することができる()
        {
            var source = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };

            var finder = new Subsequence(new byte[] { 1 });
            Assert.AreEqual(0, finder.FindIn(source, 0));
            Assert.IsNull(finder.FindIn(source, 1));

            finder = new Subsequence(new byte[] { 3, 4, 5 });
            Assert.AreEqual(2, finder.FindIn(source, 0));
            Assert.IsNull(finder.FindIn(source, 3));

            finder = new Subsequence(new byte[] { 8, 9, 10 });
            Assert.AreEqual(7, finder.FindIn(source, 0));
        }
Beispiel #16
0
        public void サブシーケンスを検索することができる()
        {
            var source = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };

            var finder = new Subsequence(new byte[] { 1 });

            Assert.AreEqual(0, finder.FindIn(source, 0));
            Assert.IsNull(finder.FindIn(source, 1));

            finder = new Subsequence(new byte[] { 3, 4, 5 });
            Assert.AreEqual(2, finder.FindIn(source, 0));
            Assert.IsNull(finder.FindIn(source, 3));

            finder = new Subsequence(new byte[] { 8, 9, 10 });
            Assert.AreEqual(7, finder.FindIn(source, 0));
        }
        /// <summary>
        /// Initializes a new instance of the <see cref="SubsequenceData"/> class.
        /// </summary>
        /// <param name="subsequence">
        /// The subsequence.
        /// </param>
        /// <param name="characteristics">
        /// The characteristic.
        /// </param>
        /// <param name="attributes">
        /// Attributes of the given subsequence in form of dictionary.
        /// </param>
        public SubsequenceData(Subsequence subsequence, double[] characteristics, int[] attributes)
        {
            CharacteristicsValues = characteristics;
            Attributes = attributes ?? new int[0];
            FeatureId = subsequence.FeatureId;
            RemoteId = subsequence.RemoteId;
            Partial = subsequence.Partial;

            var positions = subsequence.Position.ToArray();

            Starts = new int[positions.Length + 1];
            Starts[0] = subsequence.Start;
            Lengths = new int[positions.Length + 1];
            Lengths[0] = subsequence.Length;
            for (int i = 0; i < positions.Length; i++)
            {
                Starts[i + 1] = positions[i].Start;
                Lengths[i + 1] = positions[i].Length;
            }
        }
Beispiel #18
0
        /// <summary>
        /// Initializes a new instance of the <see cref="SubsequenceData"/> struct.
        /// </summary>
        /// <param name="subsequence">
        /// The subsequence.
        /// </param>
        public SubsequenceData(Subsequence subsequence)
        {
            Attributes            = new int[0];
            CharacteristicsValues = new double[0];
            Id        = subsequence.Id;
            FeatureId = (byte)subsequence.Feature;
            RemoteId  = subsequence.RemoteId;
            Partial   = subsequence.Partial;

            Position[] positions = subsequence.Position.ToArray();

            Starts     = new int[positions.Length + 1];
            Starts[0]  = subsequence.Start;
            Lengths    = new int[positions.Length + 1];
            Lengths[0] = subsequence.Length;
            for (int i = 0; i < positions.Length; i++)
            {
                Starts[i + 1]  = positions[i].Start;
                Lengths[i + 1] = positions[i].Length;
            }
        }
        static Subsequence[] InitializeSingleSubsequences()
        {
            var result = new Subsequence['z' + 1];

            result[' '] = new Subsequence
            {
                Value  = new[] { ' ' },
                Length = 1
            };

            for (var i = '0'; i <= '9'; ++i)
            {
                result[i] = new Subsequence
                {
                    Value  = new[] { i },
                    Length = 1
                };
            }

            for (var i = 'A'; i <= 'Z'; ++i)
            {
                result[i] = new Subsequence
                {
                    Value  = new[] { i },
                    Length = 1
                };
            }

            for (var i = 'a'; i <= 'z'; ++i)
            {
                result[i] = new Subsequence
                {
                    Value  = new[] { i },
                    Length = 1
                };
            }

            return(result);
        }
Beispiel #20
0
        static void Main(string[] args)
        {
            int         n           = int.Parse(Console.ReadLine());
            List <long> Subsequence = new List <long>();

            for (int i = 1; i <= n; i++)
            {
                long[] sequence = new long[i];

                sequence[0] = 1;
                if (sequence.Length > 1)
                {
                    sequence[sequence.Length - 1] = 1;
                    if (Subsequence.Count() != 0)
                    {
                        int counter = 1;
                        for (int y = 0; y < Subsequence.Count(); y++)
                        {
                            for (int z = counter; z < sequence.Length - 1;)
                            {
                                sequence[z] = Subsequence[y];
                                counter++;
                                break;
                            }
                        }
                        Subsequence = new List <long>();
                    }
                    for (int x = 0; x < sequence.Length - 1; x++)
                    {
                        long number = sequence[x] + sequence[x + 1];
                        Subsequence.Add(number);
                    }
                }
                Console.WriteLine(string.Join(" ", sequence));
            }
        }
        /// <summary>
        /// Create subsequences from features
        /// and noncoding subsequences from gaps.
        /// </summary>
        private void CreateFeatureSubsequences()
        {
            var newSubsequences = new List<Subsequence>();
            var newPositions = new List<Position>();
            var newSequenceAttributes = new List<SequenceAttribute>();

            for (int i = 1; i < features.Count; i++)
            {
                var feature = features[i];
                var location = feature.Location;
                var leafLocations = location.GetLeafLocations();
                int featureId;

                if (feature.Key == gene)
                {
                    if (allNonGenesLeafLocations.Where(l => leafLocations[0].LocationStart == l[0].LocationStart).Any(l => LocationsEqual(leafLocations, l)))
                    {
                        continue;
                    }

                    featureId = Aliases.Feature.Gene;
                }
                else
                {
                    featureId = featureRepository.GetFeatureIdByName(feature.Key);
                }

                if (feature.Qualifiers.ContainsKey(LibiadaWeb.Attribute.Pseudo.GetDisplayValue()) ||
                    feature.Qualifiers.ContainsKey(LibiadaWeb.Attribute.Pseudogene.GetDisplayValue()))
                {
                    featureId = Aliases.Feature.PseudoGen;
                }

                bool partial = CheckPartial(leafLocations);
                bool complement = location.Operator == LocationOperator.Complement;
                bool join = leafLocations.Count > 1;
                bool complementJoin = join && complement;

                if (location.SubLocations.Count > 0)
                {
                    complement = complement || location.SubLocations[0].Operator == LocationOperator.Complement;
                }

                int start = leafLocations[0].LocationStart - 1;
                int end = leafLocations[0].LocationEnd - 1;
                int length = end - start + 1;

                var subsequence = new Subsequence
                {
                    Id = DbHelper.GetNewElementId(db),
                    FeatureId = featureId,
                    Partial = partial,
                    SequenceId = sequenceId,
                    Start = start,
                    Length = length,
                    RemoteId = location.Accession
                };

                newSubsequences.Add(subsequence);

                AddPositionToMap(start, end);

                for (int k = 1; k < leafLocations.Count; k++)
                {
                    var leafLocation = leafLocations[k];
                    var leafStart = leafLocation.LocationStart - 1;
                    var leafEnd = leafLocation.LocationEnd - 1;
                    var leafLength = leafEnd - leafStart + 1;

                    var position = new Position
                    {
                        SubsequenceId = subsequence.Id,
                        Start = leafStart,
                        Length = leafLength
                    };

                    newPositions.Add(position);

                    AddPositionToMap(leafStart, leafEnd);
                }

                newSequenceAttributes.AddRange(sequenceAttributeRepository.CreateSubsequenceAttributes(feature.Qualifiers, complement, complementJoin, subsequence));
            }

            newSubsequences.AddRange(CreateNonCodingSubsequences());

            db.Subsequence.AddRange(newSubsequences);
            db.Position.AddRange(newPositions);
            db.SequenceAttribute.AddRange(newSequenceAttributes);

            db.SaveChanges();
        }
Beispiel #22
0
 /// <summary>
 /// Initializes a new instance of the <see cref="SubsequenceData"/> struct.
 /// </summary>
 /// <param name="subsequence">
 /// The subsequence.
 /// </param>
 /// <param name="characteristics">
 /// The characteristic.
 /// </param>
 /// <param name="attributes">
 /// Attributes of the given subsequence in form of dictionary.
 /// </param>
 public SubsequenceData(Subsequence subsequence, double[] characteristics, int[] attributes) : this(subsequence)
 {
     CharacteristicsValues = characteristics;
     Attributes            = attributes ?? new int[0];
 }
        /// <summary>
        /// The calculate characteristic.
        /// </summary>
        /// <param name="characteristicTypeLinkId">
        /// The characteristic type and link id.
        /// </param>
        /// <param name="sequences">
        /// The sequences.
        /// </param>
        /// <param name="subsequences">
        /// The subsequences.
        /// </param>
        /// <returns>
        /// The <see cref="List{Subsequence}"/>.
        /// </returns>
        private List<double> CalculateCharacteristic(int characteristicTypeLinkId, Chain[] sequences, Subsequence[] subsequences)
        {
            var characteristics = new List<double>();
            var newCharacteristics = new List<Characteristic>();
            string className = characteristicTypeLinkRepository.GetCharacteristicType(characteristicTypeLinkId).ClassName;
            IFullCalculator calculator = CalculatorsFactory.CreateFullCalculator(className);
            var link = characteristicTypeLinkRepository.GetLibiadaLink(characteristicTypeLinkId);

            for (int j = 0; j < sequences.Length; j++)
            {
                long subsequenceId = subsequences[j].Id;

                if (!db.Characteristic.Any(c => c.SequenceId == subsequenceId && c.CharacteristicTypeLinkId == characteristicTypeLinkId))
                {
                    double value = calculator.Calculate(sequences[j], link);
                    var currentCharacteristic = new Characteristic
                    {
                        SequenceId = subsequenceId,
                        CharacteristicTypeLinkId = characteristicTypeLinkId,
                        Value = value
                    };
                    newCharacteristics.Add(currentCharacteristic);
                }
            }

            db.Characteristic.AddRange(newCharacteristics);
            db.SaveChanges();

            for (int d = 0; d < sequences.Length; d++)
            {
                long subsequenceId = subsequences[d].Id;
                double characteristic = db.Characteristic.Single(c => c.SequenceId == subsequenceId && c.CharacteristicTypeLinkId == characteristicTypeLinkId).Value;

                characteristics.Add(characteristic);
            }

            return characteristics;
        }
        /// <summary>
        /// Creates complement, join and partial attributes.
        /// </summary>
        /// <param name="complement">
        /// The complement.
        /// </param>
        /// <param name="complementJoin">
        /// The complement join.
        /// </param>
        /// <param name="subsequence">
        /// The subsequence.
        /// </param>
        /// <returns>
        /// The <see cref="List{SequenceAttribute}"/>.
        /// </returns>
        private List<SequenceAttribute> CreateComplementJoinPartialAttributes(bool complement, bool complementJoin, Subsequence subsequence)
        {
            var result = new List<SequenceAttribute>();
            if (complement)
            {
                result.Add(CreateSequenceAttribute(LibiadaWeb.Attribute.Complement, subsequence.Id));

                if (complementJoin)
                {
                    result.Add(CreateSequenceAttribute(LibiadaWeb.Attribute.ComplementJoin, subsequence.Id));
                }
            }

            if (subsequence.Partial)
            {
                result.Add(CreateSequenceAttribute(LibiadaWeb.Attribute.Partial, subsequence.Id));
            }

            return result;
        }
Beispiel #25
0
        /// <summary>
        /// Extracts sequence for given subsequence from database.
        /// </summary>
        /// <param name="subsequence">
        /// Subsequence to be extracted from database.
        /// </param>
        /// <returns></returns>
        public Chain GetSubsequenceSequence(Subsequence subsequence)
        {
            Sequence sourceSequence = GetDotNetBioSequence(subsequence.SequenceId);

            return(GetSequence(sourceSequence, subsequence));
        }
        /// <summary>
        /// Extracts joined subsequence with complement flag.
        /// </summary>
        /// <param name="sourceSequence">
        /// The complete sequence.
        /// </param>
        /// <param name="subsequence">
        /// The subsequence.
        /// </param>
        /// <returns>
        /// The <see cref="Chain"/>.
        /// </returns>
        private Chain ExtractJoinedSubsequenceWithComplement(Sequence sourceSequence, Subsequence subsequence)
        {
            var bioSequence = sourceSequence.GetSubSequence(subsequence.Start, subsequence.Length);
            var position = subsequence.Position.ToArray();
            string resultSequence;

            if (subsequence.SequenceAttribute.Any(sa => sa.Attribute == Attribute.ComplementJoin))
            {
                var joinedSequence = bioSequence.ConvertToString();

                for (int j = 0; j < position.Length; j++)
                {
                    joinedSequence += sourceSequence.GetSubSequence(position[j].Start, position[j].Length).ConvertToString();
                }

                resultSequence = new Sequence(Alphabets.DNA, joinedSequence).GetReverseComplementedSequence().ConvertToString();
            }
            else
            {
                resultSequence = bioSequence.GetReverseComplementedSequence().ConvertToString();

                for (int j = 0; j < position.Length; j++)
                {
                    resultSequence += sourceSequence.GetSubSequence(position[j].Start, position[j].Length).GetReverseComplementedSequence().ConvertToString();
                }
            }

            return new Chain(resultSequence);
        }
Beispiel #27
0
 public Form1()
 {
     InitializeComponent();
     subsequence = new Subsequence();
 }
Beispiel #28
0
        /// <summary>
        /// Create subsequences from features
        /// and non-coding subsequences from gaps.
        /// </summary>
        /// <returns>
        /// Returns tuple of coding and non-coding features count.
        /// </returns>
        private (int, int) CreateFeatureSubsequences()
        {
            var codingSubsequences    = new List <Subsequence>(features.Count);
            var newPositions          = new List <Position>();
            var newSequenceAttributes = new List <SequenceAttribute>();

            for (int i = 1; i < features.Count; i++)
            {
                FeatureItem      feature       = features[i];
                ILocation        location      = feature.Location;
                List <ILocation> leafLocations = location.GetLeafLocations();
                Feature          subsequenceFeature;

                if (feature.Key == gene)
                {
                    if (allNonGenesLeafLocations.Any(l => LocationsEqual(leafLocations, l)))
                    {
                        continue;
                    }

                    subsequenceFeature = Feature.Gene;
                }
                else
                {
                    subsequenceFeature = FeatureRepository.GetFeatureByName(feature.Key);
                }

                if (feature.Qualifiers.ContainsKey(LibiadaWeb.Attribute.Pseudo.GetDisplayValue()) ||
                    feature.Qualifiers.ContainsKey(LibiadaWeb.Attribute.Pseudogene.GetDisplayValue()))
                {
                    subsequenceFeature = Feature.PseudoGen;
                }

                bool partial        = CheckPartial(leafLocations);
                bool complement     = location.Operator == LocationOperator.Complement;
                bool join           = leafLocations.Count > 1;
                bool complementJoin = join && complement;

                if (location.SubLocations.Count > 0)
                {
                    complement = complement || location.SubLocations[0].Operator == LocationOperator.Complement;
                }

                int start  = leafLocations[0].LocationStart - 1;
                int end    = leafLocations[0].LocationEnd - 1;
                int length = end - start + 1;

                var subsequence = new Subsequence
                {
                    Id         = db.GetNewElementId(),
                    Feature    = subsequenceFeature,
                    Partial    = partial,
                    SequenceId = sequenceId,
                    Start      = start,
                    Length     = length,
                    RemoteId   = location.Accession
                };

                codingSubsequences.Add(subsequence);
                AddPositionToMap(start, end);
                newPositions.AddRange(CreateAdditionalPositions(leafLocations, subsequence.Id));
                var sequenceAttributes = sequenceAttributeRepository.Create(feature.Qualifiers, complement, complementJoin, subsequence);
                newSequenceAttributes.AddRange(sequenceAttributes);
            }

            var nonCodingSubsequences = CreateNonCodingSubsequences();

            db.Subsequence.AddRange(codingSubsequences);
            db.Subsequence.AddRange(nonCodingSubsequences);
            db.Position.AddRange(newPositions);
            db.SequenceAttribute.AddRange(newSequenceAttributes);

            db.SaveChanges();

            return(codingSubsequences.Count, nonCodingSubsequences.Count);
        }
        /// <summary>
        /// Extracts subsequence without joins (additional positions).
        /// </summary>
        /// <param name="sourceSequence">
        /// The complete sequence.
        /// </param>
        /// <param name="subsequence">
        /// The subsequence.
        /// </param>
        /// <returns>
        /// The <see cref="Chain"/>.
        /// </returns>
        private Chain ExtractSimpleSubsequence(Sequence sourceSequence, Subsequence subsequence)
        {
            ISequence bioSequence = sourceSequence.GetSubSequence(subsequence.Start, subsequence.Length);

            if (subsequence.SequenceAttribute.Any(sa => sa.Attribute == LibiadaWeb.Attribute.Complement))
            {
                bioSequence = bioSequence.GetReverseComplementedSequence();
            }

            return new Chain(bioSequence.ConvertToString());
        }
 /// <summary>
 /// Extracts joined subsequence.
 /// </summary>
 /// <param name="sourceSequence">
 /// The complete sequence.
 /// </param>
 /// <param name="subsequence">
 /// The subsequence.
 /// </param>
 /// <returns>
 /// The <see cref="Chain"/>.
 /// </returns>
 private Chain ExtractJoinedSubsequence(Sequence sourceSequence, Subsequence subsequence)
 {
     if (subsequence.SequenceAttribute.Any(sa => sa.Attribute == Attribute.Complement))
     {
         return ExtractJoinedSubsequenceWithComplement(sourceSequence, subsequence);
     }
     else
     {
         return ExtractJoinedSubsequenceWithoutComplement(sourceSequence, subsequence);
     }
 }
Beispiel #31
0
        /// <summary>
        /// Creates and adds to db subsequence attributes.
        /// </summary>
        /// <param name="qualifiers">
        /// The attributes to add.
        /// </param>
        /// <param name="complement">
        /// Complement flag.
        /// </param>
        /// <param name="complementJoin">
        /// Complement join flag.
        /// </param>
        /// <param name="subsequence">
        /// The subsequence.
        /// </param>
        /// <exception cref="Exception">
        /// Thrown if qualifier has more than one value.
        /// </exception>
        /// <returns>
        /// The <see cref="List{SequenceAttribute}"/>.
        /// </returns>
        public List <SequenceAttribute> Create(Dictionary <string, List <string> > qualifiers, bool complement, bool complementJoin, Subsequence subsequence)
        {
            var result = new List <SequenceAttribute>(qualifiers.Count);

            foreach ((string key, List <string> values) in qualifiers)
            {
                foreach (string value in values)
                {
                    if (key == "translation")
                    {
                        break;
                    }

                    if (key == "protein_id")
                    {
                        string remoteId = CleanAttributeValue(value);

                        if (!string.IsNullOrEmpty(subsequence.RemoteId) && subsequence.RemoteId != remoteId)
                        {
                            throw new Exception($"Several remote ids in one subsequence. First {subsequence.RemoteId} Second {remoteId}");
                        }

                        subsequence.RemoteId = remoteId;
                    }

                    result.Add(Create(key, CleanAttributeValue(value), subsequence.Id));
                }
            }

            result.AddRange(CreateComplementJoinPartialAttributes(complement, complementJoin, subsequence));

            return(result);
        }
 /// <summary>
 /// Initializes a new instance of the <see cref="SubsequenceData"/> struct.
 /// </summary>
 /// <param name="subsequence">
 /// The subsequence.
 /// </param>
 /// <param name="characteristics">
 /// The characteristic.
 /// </param>
 /// <param name="attributes">
 /// Attributes of the given subsequence in form of dictionary.
 /// </param>
 public SubsequenceData(Subsequence subsequence, double[] characteristics, int[] attributes) : this(subsequence)
 {
     CharacteristicsValues = characteristics;
     Attributes            = attributes ?? Array.Empty <int>();
 }
Beispiel #33
0
        /// <summary>
        /// Creates complement, join and partial attributes.
        /// </summary>
        /// <param name="complement">
        /// The complement.
        /// </param>
        /// <param name="complementJoin">
        /// The complement join.
        /// </param>
        /// <param name="subsequence">
        /// The subsequence.
        /// </param>
        /// <returns>
        /// The <see cref="List{SequenceAttribute}"/>.
        /// </returns>
        private List <SequenceAttribute> CreateComplementJoinPartialAttributes(bool complement, bool complementJoin, Subsequence subsequence)
        {
            var result = new List <SequenceAttribute>();

            if (complement)
            {
                result.Add(CreateSequenceAttribute(Attribute.Complement, subsequence.Id));

                if (complementJoin)
                {
                    result.Add(CreateSequenceAttribute(Attribute.ComplementJoin, subsequence.Id));
                }
            }

            if (subsequence.Partial)
            {
                result.Add(CreateSequenceAttribute(Attribute.Partial, subsequence.Id));
            }

            return(result);
        }
        /// <summary>
        /// Extracts joined subsequence without complement flag.
        /// </summary>
        /// <param name="sourceSequence">
        /// The complete sequence.
        /// </param>
        /// <param name="subsequence">
        /// The subsequence.
        /// </param>
        /// <returns>
        /// The <see cref="Chain"/>.
        /// </returns>
        private Chain ExtractJoinedSubsequenceWithoutComplement(Sequence sourceSequence, Subsequence subsequence)
        {
            var joinedSequence = sourceSequence.GetSubSequence(subsequence.Start, subsequence.Length).ConvertToString();

            var position = subsequence.Position.ToArray();

            for (int j = 0; j < position.Length; j++)
            {
                joinedSequence += sourceSequence.GetSubSequence(position[j].Start, position[j].Length).ConvertToString();
            }

            return new Chain(joinedSequence);
        }
Beispiel #35
0
 public RhetoricalFigure(Subsequence subsequence, RhetoricalFigures type, int windowId)
 {
     Tokens = subsequence;
       Type = type;
       WindowId = windowId;
 }
        private Sequence ParseSequence(string s, ref int pos)
        {
            int startPos = pos;
            var elements = new List <RegexElement>();

            for (;;)
            {
                switch (PeekSym(s, pos))
                {
                case null:
                case ")":
                    AdvancePos(s, ref pos);
                    goto AFTER_RPAR;

                case ".":
                    elements.Add(new ItemMatchAndMore(pos));
                    AdvancePos(s, ref pos);
                    break;

                case ":":
                    elements.Add(new AnyDependency(pos));
                    AdvancePos(s, ref pos);
                    break;

                case "[":
                    AdvancePos(s, ref pos);
                    elements.Add(ParseSet(s, ref pos));
                    AdvancePos(s, ref pos);
                    break;

                case "(":
                    AdvancePos(s, ref pos);
                    Sequence    sequence = ParseSequence(s, ref pos);
                    Subsequence sub      = sequence as Subsequence;
                    if (sub != null)
                    {
                        elements.AddRange(sub.Elements);
                    }
                    else
                    {
                        elements.Add(sequence);
                    }
                    AdvancePos(s, ref pos);
                    break;

                default:
                    throw new RegexValidationException(null, null, $"Invalid symbol at {s.Substring(pos)}");
                }
            }
AFTER_RPAR:
            Sequence result;

            switch (PeekSym(s, pos))
            {
            case "*":
                result = new ZeroOrMore(startPos, elements);
                AdvancePos(s, ref pos);
                break;

            case "+":
                result = new OneOrMore(startPos, elements);
                AdvancePos(s, ref pos);
                break;

            case "?":
                result = new Optional(startPos, elements);
                AdvancePos(s, ref pos);
                break;

            default:
                result = new Subsequence(startPos, elements);
                break;
            }
            return(result);
        }
Beispiel #37
0
        /// <summary>
        /// Antimetabole: Repetition of words in reverse grammatical order.
        /// </summary>
        /// <param name="a"></param>
        /// <param name="windowSize"></param>
        /// <param name="minLength"></param>
        public static void FindAntimetabole(Analyzer a, int? windowSize, object minLength)
        {
            int ws = windowSize ?? 1; // Use default window size of 1.
              int ml = Convert.ToInt32(minLength ?? 2);

              var allSubsequences = new List<List<Subsequence>>();

              for (int i = 0; i < a.Document.Sentences.Count; ++i) {
            var window = new List<Subsequence>(); // Search window
            for (int j = 0; j < ws; ++j) {
              if (i + j < a.Document.Sentences.Count) {
            var phrases = a.Document.Sentences[i + j].Phrases;
            if (phrases.Count > 0)
              window.AddRange(phrases[0].SubsequencesKeepNounsVerbsAdjectivesAdverbsTag);
              //window.AddRange(phrases[0].SubsequencesKeepNounsVerbsAdjectivesAdverbsPronounsTagEquivalent);
              }
            }

            // Search.
            for (int j = 0; j < window.Count; ++j) {
              var list = new List<Subsequence>();
              list.Add(new Subsequence(window[j], i));

              if (list.Count != 0) {
            for (int k = j + 1; k < window.Count; ++k) {
              var comparer = list.Last();
              var current = window[k];
              if (comparer.Equivalent(current))
                list.Add(new Subsequence(current, i));
            }
              }

              if (list.Count == 2)
            allSubsequences.Add(list);
            }
              }

              var repetitions = MergeFigures(allSubsequences, RhetoricalFigures.Antimetabole, multiWindow: true, demarcation: null);

              var figures = new List<RhetoricalFigure>();

              for (int i = 0; i < repetitions.Count - 1; ++i) {
            var al = repetitions[i].Tokens.Split();
            for (int j = i + 1; j < repetitions.Count; ++j) {
              if (repetitions[i].WindowId != repetitions[j].WindowId)
            continue;
              var bl = repetitions[j].Tokens.Split();

              if ((al[0].Last().Right <= bl[0].First().Left || al[0].Last().SentenceId < bl[0].First().SentenceId) &&
              (al[1].First().Left >= bl[1].Last().Right || al[1].First().SentenceId > bl[1].Last().SentenceId)) {
            var subsequence = new Subsequence();
            subsequence.AddRange(al[0]);
            bl[0].Add(new SubsequenceToken(new Token(FigureComponentsSeparator, "", 0)));
            subsequence.AddRange(bl[0]);
            subsequence.AddRange(bl[1]);
            al[1].Add(new SubsequenceToken(new Token(FigureComponentsSeparator, "", 0)));
            subsequence.AddRange(al[1]);

            figures.Add(new RhetoricalFigure(subsequence, RhetoricalFigures.Antimetabole, repetitions[i].WindowId));
              }
            }
              }

              a.Figures.AddRange(figures);
        }
Beispiel #38
0
        // Add WordNet search paths to this as the 'object' parameter?
        /// <summary>
        /// Oxymoron: A terse paradox; the yoking of two contradictory terms.
        /// </summary>
        /// <param name="a"></param>
        /// <param name="windowSize"></param>
        public static void FindOxymoron(Analyzer a, int? windowSize, object greedy)
        {
            int ws = windowSize ?? 1; // Not used. The window size is one sentence.
              bool greedySearch = (bool?)greedy ?? false;

              GetDependencyIndexDelegate GetDependencyIndex = delegate(TreeGraphNode t)
              {
            return Convert.ToInt32(Regex.Match(t.toString(), "^.*?-(\\d+)\\'*$").Result("$1")) - 1;
              };

              Action<Miscellaneous.TreeNode<Analyzer.WordNetRelation>, object> WordNetRelationVisitor =
            (Miscellaneous.TreeNode<Analyzer.WordNetRelation> n, object o) =>
              {
            if (n.IsRoot())
              return;

            var oxymoronData = (OxymoronData)o;

            if (oxymoronData.Overlap.Value != 0)
              return;

            var w1 = oxymoronData.W1;
            var derivedFormsW2 = oxymoronData.GetDerivedFormsW2();

            bool checkedAntonyms = false;
            var currentNode = n;
            while (!currentNode.Parent.IsRoot()) {
              currentNode = currentNode.Parent;
              if (currentNode.Value.Relation == WordNetEngine.SynSetRelation.Antonym) {
            checkedAntonyms = true;
            break;
              }
            }

            var p = n.Parent;

            var candidates = new List<string> { w1 };
            if (!p.IsRoot())
              candidates = p.Value.Words;

            var relation = n.Value.Relation;

            switch(relation) {
              case WordNetEngine.SynSetRelation.SimilarTo:
            n.Value.Words = Token.FindSynonyms(candidates);
            break;

              case WordNetEngine.SynSetRelation.Antonym:
            n.Value.Words = Token.FindAntonyms(candidates);
            if (!checkedAntonyms)
              checkedAntonyms = true;
            break;

              case WordNetEngine.SynSetRelation.DerivationallyRelated:
            n.Value.Words = Token.FindDerivationalForms(candidates, Analyzer.SimilarityPrefixes, Analyzer.MostCommonSimilaritySuffixes, useAllForms: greedySearch ? true : false);
            if (checkedAntonyms) {
              var negations = new List<string>(Analyzer.NegationPrefixes.Select(x => (string)(x.Clone()) + w1));

              n.Value.Words.AddRange(Token.FindDerivationalForms(negations, null, null, useAllForms: greedySearch ? true : false));
            }
            break;
            }

            if (!checkedAntonyms)
              n.Value.Words.AddRange(candidates);

            n.Value.Words = n.Value.Words.Distinct().ToList(); // Remove duplicates.

            if (oxymoronData.Debug) {
              Console.WriteLine("===================================================");
              Console.WriteLine("Relation: " + relation.ToString());
              //Console.WriteLine("Parent relation: " + p.Value.Relation.ToString());
              Console.WriteLine("Child count: " + n.Children.Count());
              Console.WriteLine("Node candidates:");
              if (n.IsRoot() || n.Value.Words.Count == 0) Console.WriteLine("  None");
              else {
            foreach (var w in n.Value.Words)
              Console.WriteLine("  " + w.ToString());
              }
              if (n.IsLeaf()) Console.WriteLine("LEAF NODE");
              Console.WriteLine("===================================================");
            }

            if (checkedAntonyms)
              oxymoronData.Overlap.Value = n.Value.Words.Intersect(derivedFormsW2).Count();
              };

              Action<Miscellaneous.TreeNode<Analyzer.WordNetRelation>, object> WordNetRelationNullVisitor =
            (Miscellaneous.TreeNode<Analyzer.WordNetRelation> n, object o) =>
              {
            //Console.WriteLine(n.Value.Relation.ToString());
            n.Value.Words = null;
              };

              string dependencySymbols = @"^(amod|advmod|acomp|dobj|nsubj|prep)$";

              var allSubsequences = new List<List<Subsequence>>();

              TreebankLanguagePack tlp = new PennTreebankLanguagePack();
              GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();

              for (int i = 0; i < a.Document.Sentences.Count; ++i) {
            var sentence = a.Document.Sentences[i];
            var subsequenceTokens = new List<SubsequenceToken>();
            foreach (var token in sentence.Tokens)
              subsequenceTokens.Add(new SubsequenceToken(token, sentence));
            var phrases = sentence.Phrases;
            if (phrases.Count > 0) {
              var subsequence = new Subsequence(subsequenceTokens, sentence, phrases[0].Subsequences[0].ContainingSubsequence, i);

              var tree = sentence.Tree;
              GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
              java.util.Collection tdc = gs.typedDependenciesCollapsed();

              var candidates = new List<Subsequence>();
              for (java.util.Iterator j = tdc.iterator(); j.hasNext(); ) {
            var td = (TypedDependency)j.next();
            var relation = td.reln().getShortName();
            if (Regex.IsMatch(relation, dependencySymbols)) {
              var governorIndex = GetDependencyIndex(td.gov());
              var dependentIndex = GetDependencyIndex(td.dep());

              var index = Math.Min(governorIndex, dependentIndex);
              var count = Math.Abs(dependentIndex - governorIndex) + 1;
              var ss = relation == "prep" ? subsequence.GetRange(index, count) : subsequence.Where((n, k) => k == governorIndex | k == dependentIndex).ToList();

              // Remove any leftover punctuation from the candidate subsequences.
              ss.RemoveAll(n => Regex.IsMatch(n.Tag, Analyzer.PunctuationPatterns));

              candidates.Add(new Subsequence(ss, sentence, subsequence.ContainingSubsequence, i));
            }
              }

              // Determine whether the candidate pairs are oxymorons.
              for (int k = 0; k < candidates.Count; ++k) {
            var list = new List<Subsequence>();

            Token[] pair = { candidates[k][0], candidates[k][candidates[k].Count - 1] };

            // Clear (i.e. null) all the word lists in the WordNet search-path tree.
            a.WordNetSearchPath.Traverse(WordNetRelationNullVisitor);

            var overlap = new OxymoronData.IntClass(0);
            a.WordNetSearchPath.Traverse(WordNetRelationVisitor, new OxymoronData(pair, overlap, greedy: greedySearch, debug: false));
            if (overlap.Value == 0) {
              a.WordNetSearchPath.Traverse(WordNetRelationNullVisitor);
              a.WordNetSearchPath.Traverse(WordNetRelationVisitor, new OxymoronData(pair.Reverse().ToArray(), overlap, greedy: greedySearch, debug: false));
            }

            if (overlap.Value != 0) {
              list.Add(candidates[k]);
              allSubsequences.Add(list);
            }
              }
            }
              }

              // Remove duplicate instances and merge those contained in others.
              var figures = MergeFigures(allSubsequences, RhetoricalFigures.Oxymoron, multiWindow: true);

              a.Figures.AddRange(figures);
        }
Beispiel #39
0
        /// <summary>
        /// Anadiplosis: Repetition of the ending word or phrase from the previous clause at the beginning of the next.
        /// </summary>
        /// <param name="a"></param>
        /// <param name="windowSize"></param>
        /// <param name="minLength"></param>
        public static void FindAnadiplosis(Analyzer a, int? windowSize, object minLength)
        {
            int ws = windowSize ?? 2; // Use default window size of 2.
              int ml = Convert.ToInt32(minLength ?? 2); // V. Gawryjolek, p. 23

              var allSubsequences = new List<List<Subsequence>>();

              for (int i = 0; i < a.Document.Sentences.Count; ++i) {
            var window = new List<Subsequence>(); // Search window
            for (int j = 0; j < ws; ++j) {
              if (i + j < a.Document.Sentences.Count) {
            var containingSubsequence = new Subsequence();
            if (a.Document.Sentences[i + j].Clauses.Count > 0) {
              var containingSubsequences = a.Document.Sentences[i + j].Clauses[0].SubsequencesNoDeterminersEtc;
              if (containingSubsequences.Count > 0)
                containingSubsequence = a.Document.Sentences[i + j].Clauses[0].SubsequencesNoDeterminersEtc[0]; // No determiners etc. needed here.
              else
                containingSubsequence = a.Document.Sentences[i + j].Clauses[0].Subsequences[0];
            }
            for (int k = 0; k < a.Document.Sentences[i + j].Clauses.Count; ++k) {
              var subsequences = a.Document.Sentences[i + j].Clauses[k].SubsequencesNoStartDeterminersEtc;
              for (int l = 0; l < subsequences.Count; ++l)
                subsequences[l].ContainingSubsequence = containingSubsequence; // To check for contiguity.
              window.AddRange(subsequences);
            }
              }
            }

            // Search.
            for (int j = 0; j < window.Count; ++j) {
              var list = new List<Subsequence>();
              if (window[j].Last().IsEnd)
            list.Add(new Subsequence(window[j], i));
              else
            continue;

              if (list.Count != 0) {
            for (int k = j + 1; k < window.Count; ++k) {
              var comparer = list.Last();
              var current = window[k];
              if (comparer.Equivalent(current) && current.First().IsStart) {
                if (comparer.IsRightContiguous(current)) {
                  list.Add(new Subsequence(current, i));
                  break;
                }
              }
            }
              }

              if (list.Count > 1)
            allSubsequences.Add(list);
            }

            for (int j = 0; j < window.Count; ++j) {
              var list = new List<Subsequence>();
              if (window[j].First().IsStart)
            list.Add(new Subsequence(window[j], i));
              else
            continue;

              if (list.Count != 0) {
            for (int k = j + 1; k < window.Count; ++k) {
              var comparer = list.Last();
              var current = window[k];
              if (comparer.Equivalent(current) && current.Last().IsEnd) {
                if (comparer.IsLeftContiguous(current)) {
                  list.Add(new Subsequence(current, i));
                  break;
                }
              }
            }
              }

              if (list.Count > 1) {
            list = list.OrderBy(s => s.SentenceId).ThenBy(s => s[0].Left).ToList();
            allSubsequences.Add(list);
              }
            }
              }

              // Some figures may be out of order WRT the start of the text; reorder them here.
              //allSubsequences = allSubsequences.OrderBy(s => s[0].SentenceId).ThenBy(s => s[0][0].Left).ToList();

              // Remove duplicate instances and merge those contained in others.
              var figures = MergeFigures(allSubsequences, RhetoricalFigures.Anadiplosis, multiWindow: true);

              a.Figures.AddRange(figures);
        }
Beispiel #40
0
        // Methods:
        public static List<RhetoricalFigure> MergeFigures(List<List<Subsequence>> subsequences, RhetoricalFigures type, bool multiWindow = true, string demarcation = FigureComponentsSeparator)
        {
            // Some figures may be out of order WRT the start of the text; reorder them here.
              subsequences = subsequences.OrderBy(s => s[0].SentenceId).ThenBy(s => s[0][0].Left).ToList();

              var deepSubsequences = subsequences;

              // Merge multi-window figures.
              if (multiWindow) {
            for (int i = 0; i < deepSubsequences.Count - 1; ++i) {
              for (int j = i + 1; j < deepSubsequences.Count; ++j) {
            var intersection = deepSubsequences[i].Intersect(deepSubsequences[j]);
            if (intersection.Any()) {
              var intersectionList = intersection.ToList();
              if (deepSubsequences[i].Last() == intersectionList.Last() && deepSubsequences[j].First() == intersectionList.First()) {
                var merger = deepSubsequences[i].Union(deepSubsequences[j]).ToList();
                deepSubsequences[i] = merger;
                deepSubsequences.RemoveAt(j);
                i -= 1;
                break;
              }
            }
              }
            }
              }

              // At this point, no subsequence component of any figure should contain part of any other subsequence component.
              for (int i = 0; i < deepSubsequences.Count; ++i)
            deepSubsequences[i] = deepSubsequences[i].Distinct().ToList();

              // N.B. V. the following for a discussion of how 'Distinct()' results are ordered:
              // http://stackoverflow.com/questions/4734852/does-c-sharp-distinct-method-keep-original-ordering-of-sequence-intact

              // Flatten out subsequence lists.
              var flatSubsequences = new List<Subsequence>();
              foreach (var s in deepSubsequences)
            flatSubsequences.Add(new Subsequence(s.SelectMany(x => x), s[0].WindowId));

              // At this point, no subsequence component of any figure should contain part of any other subsequence component.
              //for (int i = 0; i < flatSubsequences.Count; ++i) {
              //  if (flatSubsequences[i].Distinct().Count() < flatSubsequences[i].Count) {
              //    flatSubsequences.RemoveAt(i);
              //    deepSubsequences.RemoveAt(i);
              //    i -= 1;
              //    continue;
              //  }
              //}

              // Remove duplicate list instances and merge those contained in others.
              for (int i = 0; i < flatSubsequences.Count - 1; ++i) {
            for (int j = i + 1; j < flatSubsequences.Count; ++j) {
              if (flatSubsequences[i].IsSupersetOf(flatSubsequences[j])) {
            flatSubsequences.RemoveAt(j);
            deepSubsequences.RemoveAt(j);
            i -= 1;
            break;
              }
              else if (flatSubsequences[j].IsSupersetOf(flatSubsequences[i])) {
            flatSubsequences[i] = flatSubsequences[j];
            deepSubsequences[i] = new List<Subsequence>(deepSubsequences[j]);
            i -= 1;
            break;
              }
            }
              }

              // Remove any duplicate subsequences within each figure.
              for (int i = 0; i < deepSubsequences.Count; ++i)
            deepSubsequences[i] = deepSubsequences[i].Distinct().ToList();

              // Make sure figure constituents are properly ordered.
              for (int i = 0; i < deepSubsequences.Count; ++i) {
            for (int j = 0; j < deepSubsequences[i].Count; ++j)
              deepSubsequences[i][j].OrderBy(s => s.SentenceId).ThenBy(s => s.Left).ToList();
              }

              for (int i = 0; i < deepSubsequences.Count; ++i) {
            deepSubsequences[i] = deepSubsequences[i].OrderBy(s => s[0].SentenceId).ToList();
            for (int j = 0; j < deepSubsequences[i].Count; ++j) {
              var dsij =  deepSubsequences[i][j];
              deepSubsequences[i][j] = new Subsequence(dsij.OrderBy(s => s.Left), dsij.ContainingSentence, dsij.ContainingSubsequence, dsij.WindowId);
              if (demarcation != null)
            deepSubsequences[i][j].Add(new SubsequenceToken(new Token(demarcation, "", 0)));
            }
              }

              var figures = new List<RhetoricalFigure>();
              foreach (var deepSubsequence in deepSubsequences) {
            var d = deepSubsequence.OrderBy(x => x.SentenceId).ThenBy(x => x[0].Left).ToList(); // Sort figure constituents so leftmost in text appears first, etc.
            var figure = new RhetoricalFigure(new Subsequence(d.SelectMany(x => x)), type, d[0].WindowId);
            //figure.Tokens = figure.Tokens.OrderBy(s => s.SentenceId).ThenBy(s => s.Left).ToList(); // This sort could cause problems with the collapsed 'figure'. Stick to the one just above.
            figures.Add(figure);
              }

              return figures;
        }
Beispiel #41
0
        /// <summary>
        /// Symploce: Repetition of a word or phrase at the beginning, and of another at the end, of successive clauses; the combination of Anaphora and Epistrophe.
        /// </summary>
        /// <param name="a"></param>
        /// <param name="windowSize"></param>
        /// <param name="minLength"></param>
        public static void FindSymploce(Analyzer a, int? windowSize, object minLength)
        {
            int ws = windowSize ?? 3; // Use default window size of 3.
              int ml = Convert.ToInt32(minLength ?? 2); // V. Gawryjolek, p. 23

              var allSubsequences = new List<List<Subsequence>>();
              var rejections = new List<Subsequence>();

              for (int i = 0; i < a.Document.Sentences.Count; ++i) {
            var window = new List<Subsequence>(); // Search window
            for (int j = 0; j < ws; ++j) {
              if (i + j < a.Document.Sentences.Count) {
            for (int k = 0; k < a.Document.Sentences[i + j].Clauses.Count; ++k) { // Or 'Phrases', but the clauses may be more apt.
              var startEndSubsequence = new List<Subsequence>();
              var subsequences = a.Document.Sentences[i + j].Clauses[k].SubsequencesNoBoundaryConjunctions; // Added 29 Mar. 2015.
              //var subsequences = a.Document.Sentences[i + j].Clauses[k].SubsequencesNoBoundaryDeterminersEtc;
              //var subsequences = a.Document.Sentences[i + j].Clauses[k].Subsequences;
              if (subsequences.Count > 0)
                startEndSubsequence.Add(subsequences[0]);
              window.AddRange(startEndSubsequence);
            }
              }
            }

            // Search.
            for (int j = 0; j < window.Count; ++j) {
              var list = new List<Subsequence>();
              if (window[j].Count >= ml && window[j].First().IsStart && window[j].Last().IsEnd) // Some (complete?) redundancy here with the 'IsStart' and 'IsEnd' tests.
            list.Add(new Subsequence(window[j], i));
              else
            continue;

              if (list.Count != 0) {
            var comparer = list.Last();
            for (int k = j + 1; k < window.Count; ++k) {
              var current = new Subsequence(window[k], i);
              var shorter = Math.Min(comparer.Count, current.Count);
              for (int l = 1; l < shorter; ++l) {
                var comparerStart = new Subsequence(comparer.GetRange(0, l), comparer.ContainingSentence, comparer.ContainingSubsequence, comparer.WindowId);
                var currentStart = new Subsequence(current.GetRange(0, l), current.ContainingSentence, current.ContainingSubsequence, current.WindowId);
                for (int m = 1; m < shorter; ++m) {
                  var comparerEnd = new Subsequence(comparer.GetRange(comparer.Count - m, m), comparer.ContainingSentence, comparer.ContainingSubsequence, comparer.WindowId);
                  var currentEnd = new Subsequence(current.GetRange(current.Count - m, m), current.ContainingSentence, current.ContainingSubsequence, current.WindowId);
                  if (comparerStart.Equivalent(currentStart) && comparerEnd.Equivalent(currentEnd)) {
                    var figureList = new List<Subsequence>();
                    comparerEnd.InsertRange(0, comparerStart);
                    currentEnd.InsertRange(0, currentStart);
                    figureList.Add(comparerEnd);
                    figureList.Add(currentEnd);

                    if (figureList.Count > 0)
                      allSubsequences.Add(figureList);
                  }
                }
              }
            }
              }
            }
              }

              // Remove duplicate instances and merge those contained in others.
              var figures = MergeFigures(allSubsequences, RhetoricalFigures.Symploce, multiWindow: true);

              a.Figures.AddRange(figures);
        }
        /// <summary>
        /// The index.
        /// </summary>
        /// <returns>
        /// The <see cref="ActionResult"/>.
        /// </returns>
        public ActionResult Index()
        {
            using (var db = new LibiadaWebEntities())
            {
                var           matterRepository         = new MatterRepository(db);
                var           dnaSequenceRepository    = new GeneticSequenceRepository(db);
                var           commonSequenceRepository = new CommonSequenceRepository(db);
                var           elementRepository        = new ElementRepository(db);
                var           matterIds = new long[] { 1332, 1333, 1339, 1330, 1337, 1342, 1331, 1338, 1340, 1943, 1945, 1334 };
                DnaSequence[] sequences = db.DnaSequence.Include(d => d.Matter).Where(d => matterIds.Contains(d.MatterId)).ToArray();

                for (int i = 0; i < sequences.Length; i++)
                {
                    var newMatter = new Matter
                    {
                        Name         = $"{sequences[i].Matter.Name} Cleaned of IS110",
                        Description  = sequences[i].Matter.Description,
                        Nature       = sequences[i].Matter.Nature,
                        Group        = sequences[i].Matter.Group,
                        SequenceType = sequences[i].Matter.SequenceType
                    };

                    var newSequence = new CommonSequence
                    {
                        Notation    = sequences[i].Notation,
                        Matter      = newMatter,
                        Description = sequences[i].Description,
                        RemoteDb    = sequences[i].RemoteDb,
                        RemoteId    = sequences[i].RemoteId
                    };
                    var chain = commonSequenceRepository.GetLibiadaChain(sequences[i].Id);

                    matterRepository.CreateOrExtractExistingMatterForSequence(newSequence);
                    dnaSequenceRepository.Create(newSequence, false, elementRepository.ToDbElements(chain.Alphabet, Notation.Nucleotides, false), chain.Building);
                    var sequenceId              = sequences[i].Id;
                    var subsequences            = db.Subsequence.Include(s => s.Position).Include(s => s.SequenceAttribute).Where(s => s.SequenceId == sequenceId).ToList();
                    var subsequenceIds          = subsequences.Select(s => s.Id);
                    var subsequencesIdsToRemove = db.SequenceAttribute
                                                  .Where(sa => subsequenceIds.Contains(sa.SequenceId) && sa.Value.Contains("IS110"))
                                                  .Select(sa => sa.SequenceId)
                                                  .Distinct()
                                                  .ToArray();

                    subsequences.RemoveAll(s => subsequencesIdsToRemove.Contains(s.Id));

                    var newSubsequences       = new Subsequence[subsequences.Count];
                    var newSequenceAttributes = new List <SequenceAttribute>();
                    var newPositions          = new List <Position>();
                    for (int j = 0; j < subsequences.Count; j++)
                    {
                        newSubsequences[j] = new Subsequence
                        {
                            Id         = db.GetNewElementId(),
                            Feature    = subsequences[j].Feature,
                            SequenceId = newSequence.Id,
                            Start      = subsequences[j].Start,
                            Length     = subsequences[j].Length,
                            RemoteId   = subsequences[j].RemoteId,
                            Partial    = subsequences[j].Partial
                        };

                        foreach (SequenceAttribute subsequenceAttribute in subsequences[j].SequenceAttribute.ToArray())
                        {
                            newSequenceAttributes.Add(new SequenceAttribute
                            {
                                SequenceId = newSubsequences[j].Id,
                                Attribute  = subsequenceAttribute.Attribute,
                                Value      = subsequenceAttribute.Value
                            });
                        }

                        foreach (Position position in subsequences[j].Position.ToArray())
                        {
                            newPositions.Add(new Position
                            {
                                SubsequenceId = newSubsequences[j].Id,
                                Length        = position.Length,
                                Start         = position.Start
                            });
                        }
                    }

                    db.Subsequence.AddRange(newSubsequences);
                    db.SequenceAttribute.AddRange(newSequenceAttributes);
                    db.Position.AddRange(newPositions);
                    db.SaveChanges();
                }
            }

            return(View());
        }
        /// <summary>
        /// Creates and adds to db subsequence attributes.
        /// </summary>
        /// <param name="qualifiers">
        /// The attributes to add.
        /// </param>
        /// <param name="complement">
        /// Complement flag.
        /// </param>
        /// <param name="complementJoin">
        /// Complement join flag.
        /// </param>
        /// <param name="subsequence">
        /// The subsequence.
        /// </param>
        /// <exception cref="Exception">
        /// Thrown if qualifier has more than one value.
        /// </exception>
        /// <returns>
        /// The <see cref="List{SequenceAttribute}"/>.
        /// </returns>
        public List<SequenceAttribute> CreateSubsequenceAttributes(Dictionary<string, List<string>> qualifiers, bool complement, bool complementJoin, Subsequence subsequence)
        {
            var result = new List<SequenceAttribute>();

            foreach (var qualifier in qualifiers)
            {
                foreach (var value in qualifier.Value)
                {
                    if (qualifier.Key == "translation")
                    {
                        break;
                    }

                    if (qualifier.Key == "protein_id")
                    {
                        var remoteId = value.Replace("\"", string.Empty);

                        if (!string.IsNullOrEmpty(subsequence.RemoteId) && subsequence.RemoteId != remoteId)
                        {
                            throw new Exception("Several remote ids in one subsequence. First " + subsequence.RemoteId + "Second " + value);
                        }

                        subsequence.RemoteId = remoteId;
                    }

                    result.Add(CreateSequenceAttribute(qualifier.Key, CleanAttributeValue(value), subsequence.Id));
                }
            }

            result.AddRange(CreateComplementJoinPartialAttributes(complement, complementJoin, subsequence));

            return result;
        }