Пример #1
0
        public bool IsMapped(ShapeNode leftNode1, Ngram <Segment> target1, ShapeNode rightNode1, ShapeNode leftNode2, Ngram <Segment> target2, ShapeNode rightNode2)
        {
            if (_mappings.Count == 0)
            {
                return(false);
            }

            foreach (string strRep1 in GetStrReps(target1))
            {
                foreach (string strRep2 in GetStrReps(target2))
                {
                    if (strRep1 == strRep2)
                    {
                        return(true);
                    }

                    Dictionary <string, List <Tuple <Environment, Environment> > > segments;
                    List <Tuple <Environment, Environment> > contexts;
                    if (_mappingLookup.TryGetValue(strRep1, out segments) && segments.TryGetValue(strRep2, out contexts))
                    {
                        return(contexts.Any(ctxt => CheckEnvironment(ctxt.Item1, leftNode1, rightNode1) && CheckEnvironment(ctxt.Item2, leftNode2, rightNode2)));
                    }
                }
            }
            return(false);
        }
Пример #2
0
        public static bool TryGetMatchingSoundClass(this IEnumerable <SoundClass> soundClasses, SegmentPool segmentPool, ShapeNode node, out SoundClass soundClass)
        {
            Annotation <ShapeNode> stemAnn = ((Shape)node.List).Annotations.First(ann => ann.Type() == CogFeatureSystem.StemType);
            ShapeNode left = null;

            if (stemAnn.Range.Contains(node) || node.Annotation.CompareTo(stemAnn) > 0)
            {
                ShapeNode leftNode = node.GetPrev(NodeFilter);
                if (leftNode != null)
                {
                    left = stemAnn.Range.Contains(leftNode) ? leftNode : node.List.Begin;
                }
            }

            Ngram <Segment> target = stemAnn.Range.Contains(node) ? segmentPool.Get(node) : Segment.Anchor;

            ShapeNode right = null;

            if (stemAnn.Range.Contains(node) || node.Annotation.CompareTo(stemAnn) < 0)
            {
                ShapeNode rightNode = node.GetNext(NodeFilter);
                if (rightNode != null)
                {
                    right = stemAnn.Range.Contains(rightNode) ? rightNode : node.List.End;
                }
            }

            soundClass = soundClasses.FirstOrDefault(sc => sc.Matches(left, target, right));
            return(soundClass != null);
        }
Пример #3
0
        public ProcessingMulti(string domain)
        {
            if (string.IsNullOrEmpty(domain))
            {
                return;
            }

            CurrentDomain = domain;

            DomainLength                    = domain.Length;
            NumberDigits                    = Regex.Matches(domain, @"[0123456789]", RegexOptions.IgnoreCase).Count;
            NumberVowels                    = Regex.Matches(domain, @"[eyuioa]", RegexOptions.IgnoreCase).Count;
            NumberConsonants                = Regex.Matches(domain, @"[qwrtpsdfghjklzxcvbnm]", RegexOptions.IgnoreCase).Count;
            NumberSpecialCharacters         = Regex.Matches(domain, @"[\|!#$%&/()=?»«*@£§€{};'<>_,]", RegexOptions.IgnoreCase).Count;
            NumberUniqueCharacter           = domain.Distinct().Count();
            NumberRepetitions               = DomainLength - NumberConsonants;
            LengthDigitsSequence            = CalculateSequence(Digits);
            LengthVowelsSequence            = CalculateSequence(Vowels);
            LengthConsonantSequence         = CalculateSequence(Consonants);
            LengthSpecialCharactersSequence = CalculateSequence(SpecialCharacters);
            RatioDigitToLength              = NumberDigits / DomainLength;
            RatioVowelsToLength             = NumberVowels / DomainLength;
            RatioConsonantsToLength         = NumberConsonants / DomainLength;
            RatioSpecialCharactersToLength  = NumberSpecialCharacters / DomainLength;
            RatioVowelsToConsonants         = NumberVowels / NumberConsonants;
            RatioUniqueCharacterToLength    = NumberUniqueCharacter / DomainLength;
            FrequencyRepeat                 = CalculateFrequencyRepeat();

            Ngram ngram = Ngram.getInstance();

            FrequencyNgram2 = ngram.CalculateFrequencyNgram(domain, 2);
            FrequencyNgram3 = ngram.CalculateFrequencyNgram(domain, 3);
            FrequencyNgram4 = ngram.CalculateFrequencyNgram(domain, 4);
            FrequencyNgram5 = ngram.CalculateFrequencyNgram(domain, 5);
        }
Пример #4
0
        private void UpdateSelectedChangeWordPairs(WordPairsViewModel wordPairs)
        {
            IWordAligner aligner = _projectService.Project.WordAligners[ComponentIdentifiers.PrimaryWordAligner];

            wordPairs.SelectedCorrespondenceWordPairs.Clear();
            foreach (WordPairViewModel wordPair in wordPairs.WordPairs)
            {
                bool selected = false;
                foreach (AlignedNodeViewModel node in wordPair.AlignedNodes)
                {
                    if (_selectedSoundChange == null)
                    {
                        node.IsSelected = false;
                    }
                    else
                    {
                        SoundContext    lhs  = wordPair.DomainAlignment.ToSoundContext(_segmentPool, 0, node.Column, aligner.ContextualSoundClasses);
                        Ngram <Segment> corr = wordPair.DomainAlignment[1, node.Column].ToNgram(_segmentPool);
                        node.IsSelected = lhs.Equals(_selectedSoundChange.DomainSoundChangeLhs) && corr.Equals(_selectedSoundChange.DomainCorrespondence);
                        if (node.IsSelected)
                        {
                            selected = true;
                        }
                    }
                }

                if (selected)
                {
                    wordPairs.SelectedCorrespondenceWordPairs.Add(wordPair);
                }
            }
        }
Пример #5
0
        private int GetMaxSoundChangeScore(Word word, ShapeNode node, Word otherWord)
        {
            if (word.Variety == otherWord.Variety)
            {
                return(0);
            }

            VarietyPair varietyPair = word.Variety.VarietyPairs[otherWord.Variety];

            if (varietyPair.SoundChangeProbabilityDistribution == null)
            {
                return(0);
            }

            double prob;

            if (varietyPair.Variety1 == word.Variety)
            {
                SoundContext lhs = node.ToSoundContext(_segmentPool, _contextualSoundClasses);
                prob = varietyPair.DefaultCorrespondenceProbability;
                IProbabilityDistribution <Ngram <Segment> > probDist;
                if (varietyPair.SoundChangeProbabilityDistribution.TryGetProbabilityDistribution(lhs, out probDist) && probDist.Samples.Count > 0)
                {
                    prob = probDist.Samples.Max(nseg => probDist[nseg]);
                }
            }
            else
            {
                Ngram <Segment> corr = _segmentPool.GetExisting(node);
                prob = varietyPair.SoundChangeProbabilityDistribution.Conditions.Count == 0 ? 0
                                        : varietyPair.SoundChangeProbabilityDistribution.Conditions.Max(lhs => varietyPair.SoundChangeProbabilityDistribution[lhs][corr]);
            }
            return((int)(MaxSoundChangeScore * prob));
        }
Пример #6
0
        private bool IsRegular(WordPair wordPair, IWordAlignerResult alignerResult, Alignment <Word, ShapeNode> alignment, int column,
                               Ngram <Segment> v)
        {
            VarietyPair  vp      = wordPair.VarietyPair;
            SoundContext context = alignment.ToSoundContext(_segmentPool, 0, column, alignerResult.WordAligner.ContextualSoundClasses);
            FrequencyDistribution <Ngram <Segment> > freqDist = vp.CognateSoundCorrespondenceFrequencyDistribution[context];
            int threshold;

            if (AutomaticRegularCorrespondenceThreshold)
            {
                int seg2Count = vp.CognateSoundCorrespondenceFrequencyDistribution.Conditions
                                .Where(sc => sc.LeftEnvironment == context.LeftEnvironment && sc.RightEnvironment == context.RightEnvironment)
                                .Sum(sc => vp.CognateSoundCorrespondenceFrequencyDistribution[sc][v]);
                if (!_regularCorrespondenceThresholdTable.TryGetThreshold(vp.CognateCount, freqDist.SampleOutcomeCount, seg2Count,
                                                                          out threshold))
                {
                    threshold = DefaultRegularCorrespondenceThreshold;
                }
            }
            else
            {
                threshold = DefaultRegularCorrespondenceThreshold;
            }
            return(freqDist[v] >= threshold);
        }
Пример #7
0
        private IEnumerable <Tuple <AffixInfo, Ngram <TItem> > > GetAffixes(Ngram <TItem> word, AffixType type)
        {
            Direction dir;
            Dictionary <Ngram <TItem>, AffixInfo> affixes;

            if (type == AffixType.Prefix)
            {
                dir     = Direction.LeftToRight;
                affixes = _prefixes;
            }
            else
            {
                dir     = Direction.RightToLeft;
                affixes = _suffixes;
            }

            var affix = new Ngram <TItem>();

            yield return(Tuple.Create(affixes[affix], word));

            foreach (TItem item in word.GetItems(dir).Take(Math.Min(MaxAffixLength, word.Length - 1)))
            {
                affix = affix.Concat(item, dir);
                word  = word.SkipFirst(dir);
                AffixInfo ai;
                if (affixes.TryGetValue(affix, out ai))
                {
                    yield return(Tuple.Create(ai, word));
                }
            }
        }
Пример #8
0
 public SoundChangeViewModel(SoundContext lhs, Ngram<Segment> correspondence, double probability, int frequency)
 {
     _domainLhs = lhs;
     _correspondence = correspondence;
     _lhs = new SoundChangeLhsViewModel(lhs);
     _prob = probability;
     _frequency = frequency;
 }
Пример #9
0
 public SoundChangeViewModel(SoundContext lhs, Ngram <Segment> correspondence, double probability, int frequency)
 {
     _domainLhs      = lhs;
     _correspondence = correspondence;
     _lhs            = new SoundChangeLhsViewModel(lhs);
     _prob           = probability;
     _frequency      = frequency;
 }
Пример #10
0
 public static bool Add(string table, Ngram ngram)
 {
     MysqlHelper.Open();
     MysqlHelper.cmd.Parameters.Clear();
     MysqlHelper.cmd.Connection  = MysqlHelper.conn;
     MysqlHelper.cmd.CommandText = "INSERT INRO " + table + " VALUES ({},{},{})";
     return(false);
 }
Пример #11
0
 public bool IsMapped(ShapeNode leftNode1, Ngram<Segment> target1, ShapeNode rightNode1, ShapeNode leftNode2, Ngram<Segment> target2, ShapeNode rightNode2)
 {
     if ((target1.Length == 0 || target1.First.Type == CogFeatureSystem.VowelType) && (target2.Length == 0 || target2.First.Type == CogFeatureSystem.VowelType))
         return _vowelMappings.IsMapped(leftNode1, target1, rightNode1, leftNode2, target2, rightNode2);
     if ((target1.Length == 0 || target1.First.Type == CogFeatureSystem.ConsonantType) && (target2.Length == 0 || target2.First.Type == CogFeatureSystem.ConsonantType))
         return _consMappings.IsMapped(leftNode1, target1, rightNode1, leftNode2, target2, rightNode2);
     return false;
 }
Пример #12
0
    public static void Main(string[] arg)
    {
        string input_sentences = "Hello world my age is 23.5";
        int    N    = 3;
        Ngram  test = new Ngram(input_sentences, N);

        //test.ngram_n2("Hello world my name is xd", 3);
        test.readNgram();
    }
Пример #13
0
        static void Main(string[] args)
        {
            string str1  = "Going to play basketball this afternoon ?";
            string str2  = "Going to play basketball in the afternoon ?";
            Ngram  test  = new Ngram(str1, 4);
            Ngram  test1 = new Ngram(str2, 4);

            bleu(test, test1);
        }
Пример #14
0
 public override bool Matches(ShapeNode leftNode, Ngram<Segment> target, ShapeNode rightNode)
 {
     foreach (Segment seg in target)
     {
         if (_fs.IsUnifiable(seg.FeatureStruct))
             return true;
     }
     return false;
 }
Пример #15
0
        public void CreateNGram()
        {
            var ngram = new Ngram();
            var words = ngram.Create("This a sentence of my test", 3);

            CollectionAssert.AreEqual(words, new List <string> {
                "This a sentence", "a sentence of", "sentence of my", "of my test"
            });
        }
Пример #16
0
        public static bool TryGetMatchingSoundClass(this IEnumerable <SoundClass> soundClasses, SegmentPool segmentPool, Alignment <Word, ShapeNode> alignment, int seq, int col, out SoundClass soundClass)
        {
            ShapeNode       leftNode  = alignment.GetLeftNode(seq, col);
            Ngram <Segment> target    = alignment[seq, col].ToNgram(segmentPool);
            ShapeNode       rightNode = alignment.GetRightNode(seq, col);

            soundClass = soundClasses.FirstOrDefault(sc => sc.Matches(leftNode, target, rightNode));
            return(soundClass != null);
        }
Пример #17
0
        public void getNgramTextForSearchTest()
        {
            string result = Ngram.getNgramTextForSearch("test", 2);

            Debug.Assert(string.Compare(result, "te es st") == 0);

            string result2 = Ngram.getNgramTextForSearch("t", 2);

            Debug.Assert(string.Compare(result2, "t*") == 0);
        }
Пример #18
0
        public void ToString_MultipleWords_ReturnsWhiteSpaceSeparatedWords()
        {
            string line             = " 2301 a\tja\tsom\tsa";
            string expectedToString = "a ja som sa";
            Ngram  ngram            = new Ngram(line);

            var result = ngram.ToString();

            Assert.AreEqual(result, expectedToString);
        }
Пример #19
0
        public void Frequency_MultipleWords_ReturnsIntFrequency()
        {
            string line      = " 2301 a\tja\tsom\tsa";
            int    frequency = 2301;
            Ngram  ngram     = new Ngram(line);

            var result = ngram.Frequency;

            Assert.AreEqual(result, frequency);
        }
Пример #20
0
 private void UpdateCognateCorrespondenceCounts(IWordAligner aligner, ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> > cognateCorrCounts,
                                                Alignment <Word, ShapeNode> alignment)
 {
     for (int column = 0; column < alignment.ColumnCount; column++)
     {
         SoundContext    lhs  = alignment.ToSoundContext(_segmentPool, 0, column, aligner.ContextualSoundClasses);
         Ngram <Segment> corr = alignment[1, column].ToNgram(_segmentPool);
         cognateCorrCounts[lhs].Increment(corr);
     }
 }
Пример #21
0
        public VarietyPair ToVarietyPair(SegmentPool segmentPool, CogProject project)
        {
            var vp = new VarietyPair(project.Varieties[Variety1], project.Varieties[Variety2])
            {
                PhoneticSimilarityScore          = PhoneticSimilarityScore,
                LexicalSimilarityScore           = LexicalSimilarityScore,
                DefaultCorrespondenceProbability = DefaultCorrespondenceProbability
            };
            var wordPairs = new Dictionary <WordPairSurrogate, WordPair>();

            vp.WordPairs.AddRange(_wordPairs.Select(surrogate => wordPairs.GetValue(surrogate, () => surrogate.ToWordPair(project, vp))));
            var soundChanges = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >();

            foreach (KeyValuePair <SoundContextSurrogate, Tuple <string[], int>[]> fd in _soundChanges)
            {
                SoundContext ctxt = fd.Key.ToSoundContext(project, segmentPool);
                FrequencyDistribution <Ngram <Segment> > freqDist = soundChanges[ctxt];
                foreach (Tuple <string[], int> sample in fd.Value)
                {
                    Ngram <Segment> corr = sample.Item1 == null ? new Ngram <Segment>() : new Ngram <Segment>(sample.Item1.Select(segmentPool.GetExisting));
                    freqDist.Increment(corr, sample.Item2);
                }
            }
            vp.SoundChangeFrequencyDistribution = soundChanges;
            IWordAligner aligner       = project.WordAligners[ComponentIdentifiers.PrimaryWordAligner];
            int          segmentCount  = vp.Variety2.SegmentFrequencyDistribution.ObservedSamples.Count;
            int          possCorrCount = aligner.ExpansionCompressionEnabled ? (segmentCount * segmentCount) + segmentCount + 1 : segmentCount + 1;

            vp.SoundChangeProbabilityDistribution = new ConditionalProbabilityDistribution <SoundContext, Ngram <Segment> >(soundChanges,
                                                                                                                            (sc, freqDist) => new WittenBellProbabilityDistribution <Ngram <Segment> >(freqDist, possCorrCount));

            foreach (KeyValuePair <string, List <SoundCorrespondenceSurrogate> > kvp in _soundCorrespondenceCollections)
            {
                if (kvp.Value != null)
                {
                    FeatureSymbol pos = null;
                    switch (kvp.Key)
                    {
                    case "onset":
                        pos = CogFeatureSystem.Onset;
                        break;

                    case "nucleus":
                        pos = CogFeatureSystem.Nucleus;
                        break;

                    case "coda":
                        pos = CogFeatureSystem.Coda;
                        break;
                    }
                    vp.SoundCorrespondenceCollections[pos].AddRange(kvp.Value.Select(surrogate => surrogate.ToSoundCorrespondence(segmentPool, wordPairs)));
                }
            }
            return(vp);
        }
Пример #22
0
 public override bool Matches(ShapeNode leftNode, Ngram <Segment> target, ShapeNode rightNode)
 {
     foreach (Segment seg in target)
     {
         if (_fs.IsUnifiable(seg.FeatureStruct))
         {
             return(true);
         }
     }
     return(false);
 }
Пример #23
0
        public void Words_MultipleWords_ReturnsArrayOfStrings()
        {
            string line = " 2301 a\tja\tsom\tsa";

            string[] words = { "a", "ja", "som", "sa" };
            Ngram    ngram = new Ngram(line);

            var result = ngram.Words;

            Assert.AreEqual(result, words);
        }
Пример #24
0
 public bool IsMapped(ShapeNode leftNode1, Ngram <Segment> target1, ShapeNode rightNode1, ShapeNode leftNode2, Ngram <Segment> target2, ShapeNode rightNode2)
 {
     if ((target1.Length == 0 || target1.First.Type == CogFeatureSystem.VowelType) && (target2.Length == 0 || target2.First.Type == CogFeatureSystem.VowelType))
     {
         return(_vowelMappings.IsMapped(leftNode1, target1, rightNode1, leftNode2, target2, rightNode2));
     }
     if ((target1.Length == 0 || target1.First.Type == CogFeatureSystem.ConsonantType) && (target2.Length == 0 || target2.First.Type == CogFeatureSystem.ConsonantType))
     {
         return(_consMappings.IsMapped(leftNode1, target1, rightNode1, leftNode2, target2, rightNode2));
     }
     return(false);
 }
Пример #25
0
        public void Process(VarietyPair data)
        {
            IWordAligner aligner = _project.WordAligners[_alignerId];

            var correspondenceColls = new Dictionary <FeatureSymbol, SoundCorrespondenceCollection>
            {
                { CogFeatureSystem.Onset, new SoundCorrespondenceCollection() },
                { CogFeatureSystem.Nucleus, new SoundCorrespondenceCollection() },
                { CogFeatureSystem.Coda, new SoundCorrespondenceCollection() }
            };

            foreach (WordPair wordPair in data.WordPairs.Where(wp => wp.Cognacy))
            {
                Alignment <Word, ShapeNode> alignment = aligner.Compute(wordPair).GetAlignments().First();
                for (int i = 0; i < alignment.ColumnCount; i++)
                {
                    AlignmentCell <ShapeNode> cell1 = alignment[0, i];
                    AlignmentCell <ShapeNode> cell2 = alignment[1, i];

                    if (!cell1.IsNull && !cell2.IsNull && cell1.Count == 1 && cell2.Count == 1)
                    {
                        SymbolicFeatureValue pos1, pos2;
                        if (cell1.First.Annotation.FeatureStruct.TryGetValue(CogFeatureSystem.SyllablePosition, out pos1) &&
                            cell2.First.Annotation.FeatureStruct.TryGetValue(CogFeatureSystem.SyllablePosition, out pos2) &&
                            (FeatureSymbol)pos1 == (FeatureSymbol)pos2)
                        {
                            Ngram <Segment> ngram1 = cell1.ToNgram(_segmentPool);
                            Ngram <Segment> ngram2 = cell2.ToNgram(_segmentPool);
                            Segment         seg1   = ngram1.First;
                            Segment         seg2   = ngram2.First;
                            if (!seg1.Equals(seg2))
                            {
                                SoundCorrespondenceCollection correspondences = correspondenceColls[(FeatureSymbol)pos1];
                                SoundCorrespondence           corr;
                                if (!correspondences.TryGet(seg1, seg2, out corr))
                                {
                                    corr = new SoundCorrespondence(seg1, seg2);
                                    correspondences.Add(corr);
                                }
                                corr.Frequency++;
                                corr.WordPairs.Add(wordPair);
                            }
                        }
                    }
                }
            }

            foreach (KeyValuePair <FeatureSymbol, SoundCorrespondenceCollection> kvp in correspondenceColls)
            {
                data.CognateSoundCorrespondencesByPosition[kvp.Key].ReplaceAll(kvp.Value);
            }
        }
Пример #26
0
    public void Load()
    {
        if (File.Exists(Application.persistentDataPath + "/ngram.dat"))
        {
            Debug.Log("Loaded");
            BinaryFormatter bf   = new BinaryFormatter();
            FileStream      file = File.Open(Application.persistentDataPath + "/ngram.dat", FileMode.Open);
            Ngram           g    = (Ngram)bf.Deserialize(file);
            file.Close();

            nGram = g.gram;
        }
    }
Пример #27
0
    public void Save()
    {
        Debug.Log("Saved");
        BinaryFormatter bf   = new BinaryFormatter();
        FileStream      file = File.Create(Application.persistentDataPath + "/ngram.dat");

        Ngram g = new Ngram();

        g.gram = nGram;

        bf.Serialize(file, g);
        file.Close();
    }
Пример #28
0
        private void UpdateCounts(IWordAligner aligner, ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> > counts, Alignment <Word, ShapeNode> alignment)
        {
            if (alignment.NormalizedScore < _initialAlignmentThreshold)
            {
                return;
            }

            for (int column = 0; column < alignment.ColumnCount; column++)
            {
                SoundContext    lhs  = alignment.ToSoundContext(_segmentPool, 0, column, aligner.ContextualSoundClasses);
                Ngram <Segment> corr = alignment[1, column].ToNgram(_segmentPool);
                counts[lhs].Increment(corr);
            }
        }
Пример #29
0
        public override bool Matches(ShapeNode leftNode, Ngram<Segment> target, ShapeNode rightNode)
        {
            string strRep = target.ToString();
            if (_ignoreModifiers)
                strRep = StripModifiers(strRep);

            if (leftNode != null && leftNode.Type() == CogFeatureSystem.AnchorType && _normalizedSegments.Contains(string.Format("#{0}", strRep)))
                return true;

            if (rightNode != null && rightNode.Type() == CogFeatureSystem.AnchorType && _normalizedSegments.Contains(string.Format("{0}#", strRep)))
                return true;

            return _normalizedSegments.Contains(strRep);
        }
Пример #30
0
        private void WriteNgramCountsFile(string lmPrefix, int ngramSize)
        {
            int wordCount = 0;
            var ngrams    = new Dictionary <Ngram <string>, int>();
            var vocab     = new HashSet <string>();

            foreach (TextSegment segment in _parallelCorpus.TargetSegments
                     .Where((s, i) => !_tuneCorpusIndices.Contains(i) && !s.IsEmpty))
            {
                var words = new List <string> {
                    "<s>"
                };
                foreach (string word in segment.Segment.Preprocess(_targetPreprocessor))
                {
                    if (vocab.Contains(word))
                    {
                        words.Add(word);
                    }
                    else
                    {
                        vocab.Add(word);
                        words.Add("<unk>");
                    }
                }
                words.Add("</s>");
                if (words.Count == 2)
                {
                    continue;
                }
                wordCount += words.Count;
                for (int n = 1; n <= ngramSize; n++)
                {
                    for (int i = 0; i <= words.Count - n; i++)
                    {
                        var ngram = new Ngram <string>(Enumerable.Range(i, n).Select(j => words[j]));
                        ngrams.UpdateValue(ngram, () => 0, c => c + 1);
                    }
                }
            }

            using (var writer = new StreamWriter(lmPrefix))
            {
                foreach (KeyValuePair <Ngram <string>, int> kvp in ngrams.OrderBy(kvp => kvp.Key.Length)
                         .ThenBy(kvp => string.Join(" ", kvp.Key)))
                {
                    writer.Write("{0} {1} {2}\n", string.Join(" ", kvp.Key),
                                 kvp.Key.Length == 1 ? wordCount : ngrams[kvp.Key.TakeAllExceptLast()], kvp.Value);
                }
            }
        }
Пример #31
0
        private int GetSonority(ShapeNode node)
        {
            ShapeNode       prevNode = node.Prev;
            Ngram <Segment> target   = _segmentPool.Get(node);
            ShapeNode       nextNode = node.Next;

            foreach (SonorityClass level in _sonorityScale)
            {
                if (level.SoundClass.Matches(prevNode, target, nextNode))
                {
                    return(level.Sonority);
                }
            }
            return(0);
        }
Пример #32
0
        private IEnumerable <Affix> IdentifyAffixes(Word[] words, string category)
        {
            foreach (Affix <Segment> affix in _affixIdentifier.IdentifyAffixes(words, AffixType.Prefix))
            {
                var ngram = new Ngram <Segment>(Segment.Anchor.ToEnumerable().Concat(affix.Ngram));
                yield return(CreateAffix(ngram, category, affix.Score));
            }

            foreach (Affix <Segment> affix in _affixIdentifier.IdentifyAffixes(words, AffixType.Suffix))
            {
                var ngram = new Ngram <Segment>(affix.Ngram.SkipWhile(seg => seg.Type == CogFeatureSystem.ToneLetterType).Concat(Segment.Anchor));
                if (ngram.Length > 1)
                {
                    yield return(CreateAffix(ngram, category, affix.Score));
                }
            }
        }
Пример #33
0
    public static void bleu(Ngram N1, Ngram N2)
    {
        List <Dictionary <string, int> > N1gram = new List <Dictionary <string, int> >();
        List <Dictionary <string, int> > N2gram = new List <Dictionary <string, int> >();

        for (int i = 1; i <= 4; i++)
        {
            N1gram.Add(N1.NgramToDict(i));
            N2gram.Add(N2.NgramToDict(i));
        }
        Dictionary <string, double> result = compareTwoGram(N1gram, N2gram);

        foreach (string i in result.Keys)
        {
            Console.WriteLine("p_{0} is {1}", i, result[i]);
        }
    }
Пример #34
0
        public void ExecuteMoreSelectTableFTSTest()
        {
            string dbfile = "taskaludb11.sqlite";
            string path   = Path.GetTempPath() + "\\" + dbfile;

            TouchTestDB(dbfile);

            SQLiteClass.moreSize = 10;
            for (int i = 1; i <= 20; i++)
            {
                InsertTableTaskList(dbfile, "hoge", i);
                SQLiteClass.ExecuteInsertTableFTSString(path, i, "tasklist_name", Ngram.getNgramText("hoge", 2));
            }
            InsertTableTaskList(dbfile, "ogem", 21);
            SQLiteClass.ExecuteInsertTableFTSString(path, 21, "tasklist_name", Ngram.getNgramText("ogem", 2));

            Debug.Assert(SQLiteClass.ExecuteFirstSelectTable(path, "oge"));
        }
Пример #35
0
        public bool IsMapped(ShapeNode leftNode1, Ngram<Segment> target1, ShapeNode rightNode1, ShapeNode leftNode2, Ngram<Segment> target2, ShapeNode rightNode2)
        {
            if (_threshold == 0 || target1.Length == 0 || target2.Length == 0)
                return false;

            IWordAligner aligner = _project.WordAligners[_alignerID];

            foreach (Segment seg1 in target1)
            {
                foreach (Segment seg2 in target2)
                {
                    if (aligner.Delta(seg1.FeatureStruct, seg2.FeatureStruct) <= _threshold)
                        return true;
                }
            }

            return false;
        }
Пример #36
0
 public abstract bool Matches(ShapeNode leftNode, Ngram<Segment> target, ShapeNode rightNode);
Пример #37
0
 private bool IsRegular(WordPair wordPair, IWordAlignerResult alignerResult, Alignment<Word, ShapeNode> alignment, int column, Ngram<Segment> v)
 {
     VarietyPair vp = wordPair.VarietyPair;
     SoundContext context = alignment.ToSoundContext(_segmentPool, 0, column, alignerResult.WordAligner.ContextualSoundClasses);
     FrequencyDistribution<Ngram<Segment>> freqDist = vp.CognateSoundCorrespondenceFrequencyDistribution[context];
     int threshold;
     if (_automaticRegularCorrespondenceThreshold)
     {
         int seg2Count = vp.CognateSoundCorrespondenceFrequencyDistribution.Conditions.Where(sc => sc.LeftEnvironment == context.LeftEnvironment && sc.RightEnvironment == context.RightEnvironment)
             .Sum(sc => vp.CognateSoundCorrespondenceFrequencyDistribution[sc][v]);
         if (!_regularCorrespondenceThresholdTable.TryGetThreshold(vp.CognateCount, freqDist.SampleOutcomeCount, seg2Count, out threshold))
             threshold = _defaultRegularCorrepondenceThreshold;
     }
     else
     {
         threshold = _defaultRegularCorrepondenceThreshold;
     }
     return freqDist[v] >= threshold;
 }
Пример #38
0
        public bool IsMapped(ShapeNode leftNode1, Ngram<Segment> target1, ShapeNode rightNode1, ShapeNode leftNode2, Ngram<Segment> target2, ShapeNode rightNode2)
        {
            if (_mappings.Count == 0)
                return false;

            foreach (string strRep1 in GetStrReps(target1))
            {
                foreach (string strRep2 in GetStrReps(target2))
                {
                    if (strRep1 == strRep2)
                        return true;

                    Dictionary<string, List<Tuple<Environment, Environment>>> segments;
                    List<Tuple<Environment, Environment>> contexts;
                    if (_mappingLookup.TryGetValue(strRep1, out segments) && segments.TryGetValue(strRep2, out contexts))
                        return contexts.Any(ctxt => CheckEnvironment(ctxt.Item1, leftNode1, rightNode1) && CheckEnvironment(ctxt.Item2, leftNode2, rightNode2));
                }
            }
            return false;
        }
Пример #39
0
 public bool IsMapped(ShapeNode leftNode1, Ngram<Segment> target1, ShapeNode rightNode1, ShapeNode leftNode2, Ngram<Segment> target2, ShapeNode rightNode2)
 {
     return _segmentMappingsComponents.Any(sm => sm.IsMapped(leftNode1, target1, rightNode1, leftNode2, target2, rightNode2));
 }
Пример #40
0
 private IEnumerable<string> GetStrReps(Ngram<Segment> target)
 {
     if (target.Length == 0)
     {
         yield return "-";
     }
     else
     {
         foreach (Segment seg in target)
         {
             yield return seg.StrRep;
             if (_implicitComplexSegments && seg.IsComplex)
             {
                 Shape shape = _segmenter.Segment(seg.StrRep);
                 foreach (ShapeNode node in shape)
                     yield return node.StrRep();
             }
         }
     }
 }