public bool IsMapped(ShapeNode leftNode1, Ngram <Segment> target1, ShapeNode rightNode1, ShapeNode leftNode2, Ngram <Segment> target2, ShapeNode rightNode2) { if (_mappings.Count == 0) { return(false); } foreach (string strRep1 in GetStrReps(target1)) { foreach (string strRep2 in GetStrReps(target2)) { if (strRep1 == strRep2) { return(true); } Dictionary <string, List <Tuple <Environment, Environment> > > segments; List <Tuple <Environment, Environment> > contexts; if (_mappingLookup.TryGetValue(strRep1, out segments) && segments.TryGetValue(strRep2, out contexts)) { return(contexts.Any(ctxt => CheckEnvironment(ctxt.Item1, leftNode1, rightNode1) && CheckEnvironment(ctxt.Item2, leftNode2, rightNode2))); } } } return(false); }
public static bool TryGetMatchingSoundClass(this IEnumerable <SoundClass> soundClasses, SegmentPool segmentPool, ShapeNode node, out SoundClass soundClass) { Annotation <ShapeNode> stemAnn = ((Shape)node.List).Annotations.First(ann => ann.Type() == CogFeatureSystem.StemType); ShapeNode left = null; if (stemAnn.Range.Contains(node) || node.Annotation.CompareTo(stemAnn) > 0) { ShapeNode leftNode = node.GetPrev(NodeFilter); if (leftNode != null) { left = stemAnn.Range.Contains(leftNode) ? leftNode : node.List.Begin; } } Ngram <Segment> target = stemAnn.Range.Contains(node) ? segmentPool.Get(node) : Segment.Anchor; ShapeNode right = null; if (stemAnn.Range.Contains(node) || node.Annotation.CompareTo(stemAnn) < 0) { ShapeNode rightNode = node.GetNext(NodeFilter); if (rightNode != null) { right = stemAnn.Range.Contains(rightNode) ? rightNode : node.List.End; } } soundClass = soundClasses.FirstOrDefault(sc => sc.Matches(left, target, right)); return(soundClass != null); }
public ProcessingMulti(string domain) { if (string.IsNullOrEmpty(domain)) { return; } CurrentDomain = domain; DomainLength = domain.Length; NumberDigits = Regex.Matches(domain, @"[0123456789]", RegexOptions.IgnoreCase).Count; NumberVowels = Regex.Matches(domain, @"[eyuioa]", RegexOptions.IgnoreCase).Count; NumberConsonants = Regex.Matches(domain, @"[qwrtpsdfghjklzxcvbnm]", RegexOptions.IgnoreCase).Count; NumberSpecialCharacters = Regex.Matches(domain, @"[\|!#$%&/()=?»«*@£§€{};'<>_,]", RegexOptions.IgnoreCase).Count; NumberUniqueCharacter = domain.Distinct().Count(); NumberRepetitions = DomainLength - NumberConsonants; LengthDigitsSequence = CalculateSequence(Digits); LengthVowelsSequence = CalculateSequence(Vowels); LengthConsonantSequence = CalculateSequence(Consonants); LengthSpecialCharactersSequence = CalculateSequence(SpecialCharacters); RatioDigitToLength = NumberDigits / DomainLength; RatioVowelsToLength = NumberVowels / DomainLength; RatioConsonantsToLength = NumberConsonants / DomainLength; RatioSpecialCharactersToLength = NumberSpecialCharacters / DomainLength; RatioVowelsToConsonants = NumberVowels / NumberConsonants; RatioUniqueCharacterToLength = NumberUniqueCharacter / DomainLength; FrequencyRepeat = CalculateFrequencyRepeat(); Ngram ngram = Ngram.getInstance(); FrequencyNgram2 = ngram.CalculateFrequencyNgram(domain, 2); FrequencyNgram3 = ngram.CalculateFrequencyNgram(domain, 3); FrequencyNgram4 = ngram.CalculateFrequencyNgram(domain, 4); FrequencyNgram5 = ngram.CalculateFrequencyNgram(domain, 5); }
private void UpdateSelectedChangeWordPairs(WordPairsViewModel wordPairs) { IWordAligner aligner = _projectService.Project.WordAligners[ComponentIdentifiers.PrimaryWordAligner]; wordPairs.SelectedCorrespondenceWordPairs.Clear(); foreach (WordPairViewModel wordPair in wordPairs.WordPairs) { bool selected = false; foreach (AlignedNodeViewModel node in wordPair.AlignedNodes) { if (_selectedSoundChange == null) { node.IsSelected = false; } else { SoundContext lhs = wordPair.DomainAlignment.ToSoundContext(_segmentPool, 0, node.Column, aligner.ContextualSoundClasses); Ngram <Segment> corr = wordPair.DomainAlignment[1, node.Column].ToNgram(_segmentPool); node.IsSelected = lhs.Equals(_selectedSoundChange.DomainSoundChangeLhs) && corr.Equals(_selectedSoundChange.DomainCorrespondence); if (node.IsSelected) { selected = true; } } } if (selected) { wordPairs.SelectedCorrespondenceWordPairs.Add(wordPair); } } }
private int GetMaxSoundChangeScore(Word word, ShapeNode node, Word otherWord) { if (word.Variety == otherWord.Variety) { return(0); } VarietyPair varietyPair = word.Variety.VarietyPairs[otherWord.Variety]; if (varietyPair.SoundChangeProbabilityDistribution == null) { return(0); } double prob; if (varietyPair.Variety1 == word.Variety) { SoundContext lhs = node.ToSoundContext(_segmentPool, _contextualSoundClasses); prob = varietyPair.DefaultCorrespondenceProbability; IProbabilityDistribution <Ngram <Segment> > probDist; if (varietyPair.SoundChangeProbabilityDistribution.TryGetProbabilityDistribution(lhs, out probDist) && probDist.Samples.Count > 0) { prob = probDist.Samples.Max(nseg => probDist[nseg]); } } else { Ngram <Segment> corr = _segmentPool.GetExisting(node); prob = varietyPair.SoundChangeProbabilityDistribution.Conditions.Count == 0 ? 0 : varietyPair.SoundChangeProbabilityDistribution.Conditions.Max(lhs => varietyPair.SoundChangeProbabilityDistribution[lhs][corr]); } return((int)(MaxSoundChangeScore * prob)); }
private bool IsRegular(WordPair wordPair, IWordAlignerResult alignerResult, Alignment <Word, ShapeNode> alignment, int column, Ngram <Segment> v) { VarietyPair vp = wordPair.VarietyPair; SoundContext context = alignment.ToSoundContext(_segmentPool, 0, column, alignerResult.WordAligner.ContextualSoundClasses); FrequencyDistribution <Ngram <Segment> > freqDist = vp.CognateSoundCorrespondenceFrequencyDistribution[context]; int threshold; if (AutomaticRegularCorrespondenceThreshold) { int seg2Count = vp.CognateSoundCorrespondenceFrequencyDistribution.Conditions .Where(sc => sc.LeftEnvironment == context.LeftEnvironment && sc.RightEnvironment == context.RightEnvironment) .Sum(sc => vp.CognateSoundCorrespondenceFrequencyDistribution[sc][v]); if (!_regularCorrespondenceThresholdTable.TryGetThreshold(vp.CognateCount, freqDist.SampleOutcomeCount, seg2Count, out threshold)) { threshold = DefaultRegularCorrespondenceThreshold; } } else { threshold = DefaultRegularCorrespondenceThreshold; } return(freqDist[v] >= threshold); }
private IEnumerable <Tuple <AffixInfo, Ngram <TItem> > > GetAffixes(Ngram <TItem> word, AffixType type) { Direction dir; Dictionary <Ngram <TItem>, AffixInfo> affixes; if (type == AffixType.Prefix) { dir = Direction.LeftToRight; affixes = _prefixes; } else { dir = Direction.RightToLeft; affixes = _suffixes; } var affix = new Ngram <TItem>(); yield return(Tuple.Create(affixes[affix], word)); foreach (TItem item in word.GetItems(dir).Take(Math.Min(MaxAffixLength, word.Length - 1))) { affix = affix.Concat(item, dir); word = word.SkipFirst(dir); AffixInfo ai; if (affixes.TryGetValue(affix, out ai)) { yield return(Tuple.Create(ai, word)); } } }
public SoundChangeViewModel(SoundContext lhs, Ngram<Segment> correspondence, double probability, int frequency) { _domainLhs = lhs; _correspondence = correspondence; _lhs = new SoundChangeLhsViewModel(lhs); _prob = probability; _frequency = frequency; }
public SoundChangeViewModel(SoundContext lhs, Ngram <Segment> correspondence, double probability, int frequency) { _domainLhs = lhs; _correspondence = correspondence; _lhs = new SoundChangeLhsViewModel(lhs); _prob = probability; _frequency = frequency; }
public static bool Add(string table, Ngram ngram) { MysqlHelper.Open(); MysqlHelper.cmd.Parameters.Clear(); MysqlHelper.cmd.Connection = MysqlHelper.conn; MysqlHelper.cmd.CommandText = "INSERT INRO " + table + " VALUES ({},{},{})"; return(false); }
public bool IsMapped(ShapeNode leftNode1, Ngram<Segment> target1, ShapeNode rightNode1, ShapeNode leftNode2, Ngram<Segment> target2, ShapeNode rightNode2) { if ((target1.Length == 0 || target1.First.Type == CogFeatureSystem.VowelType) && (target2.Length == 0 || target2.First.Type == CogFeatureSystem.VowelType)) return _vowelMappings.IsMapped(leftNode1, target1, rightNode1, leftNode2, target2, rightNode2); if ((target1.Length == 0 || target1.First.Type == CogFeatureSystem.ConsonantType) && (target2.Length == 0 || target2.First.Type == CogFeatureSystem.ConsonantType)) return _consMappings.IsMapped(leftNode1, target1, rightNode1, leftNode2, target2, rightNode2); return false; }
public static void Main(string[] arg) { string input_sentences = "Hello world my age is 23.5"; int N = 3; Ngram test = new Ngram(input_sentences, N); //test.ngram_n2("Hello world my name is xd", 3); test.readNgram(); }
static void Main(string[] args) { string str1 = "Going to play basketball this afternoon ?"; string str2 = "Going to play basketball in the afternoon ?"; Ngram test = new Ngram(str1, 4); Ngram test1 = new Ngram(str2, 4); bleu(test, test1); }
public override bool Matches(ShapeNode leftNode, Ngram<Segment> target, ShapeNode rightNode) { foreach (Segment seg in target) { if (_fs.IsUnifiable(seg.FeatureStruct)) return true; } return false; }
public void CreateNGram() { var ngram = new Ngram(); var words = ngram.Create("This a sentence of my test", 3); CollectionAssert.AreEqual(words, new List <string> { "This a sentence", "a sentence of", "sentence of my", "of my test" }); }
public static bool TryGetMatchingSoundClass(this IEnumerable <SoundClass> soundClasses, SegmentPool segmentPool, Alignment <Word, ShapeNode> alignment, int seq, int col, out SoundClass soundClass) { ShapeNode leftNode = alignment.GetLeftNode(seq, col); Ngram <Segment> target = alignment[seq, col].ToNgram(segmentPool); ShapeNode rightNode = alignment.GetRightNode(seq, col); soundClass = soundClasses.FirstOrDefault(sc => sc.Matches(leftNode, target, rightNode)); return(soundClass != null); }
public void getNgramTextForSearchTest() { string result = Ngram.getNgramTextForSearch("test", 2); Debug.Assert(string.Compare(result, "te es st") == 0); string result2 = Ngram.getNgramTextForSearch("t", 2); Debug.Assert(string.Compare(result2, "t*") == 0); }
public void ToString_MultipleWords_ReturnsWhiteSpaceSeparatedWords() { string line = " 2301 a\tja\tsom\tsa"; string expectedToString = "a ja som sa"; Ngram ngram = new Ngram(line); var result = ngram.ToString(); Assert.AreEqual(result, expectedToString); }
public void Frequency_MultipleWords_ReturnsIntFrequency() { string line = " 2301 a\tja\tsom\tsa"; int frequency = 2301; Ngram ngram = new Ngram(line); var result = ngram.Frequency; Assert.AreEqual(result, frequency); }
private void UpdateCognateCorrespondenceCounts(IWordAligner aligner, ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> > cognateCorrCounts, Alignment <Word, ShapeNode> alignment) { for (int column = 0; column < alignment.ColumnCount; column++) { SoundContext lhs = alignment.ToSoundContext(_segmentPool, 0, column, aligner.ContextualSoundClasses); Ngram <Segment> corr = alignment[1, column].ToNgram(_segmentPool); cognateCorrCounts[lhs].Increment(corr); } }
public VarietyPair ToVarietyPair(SegmentPool segmentPool, CogProject project) { var vp = new VarietyPair(project.Varieties[Variety1], project.Varieties[Variety2]) { PhoneticSimilarityScore = PhoneticSimilarityScore, LexicalSimilarityScore = LexicalSimilarityScore, DefaultCorrespondenceProbability = DefaultCorrespondenceProbability }; var wordPairs = new Dictionary <WordPairSurrogate, WordPair>(); vp.WordPairs.AddRange(_wordPairs.Select(surrogate => wordPairs.GetValue(surrogate, () => surrogate.ToWordPair(project, vp)))); var soundChanges = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >(); foreach (KeyValuePair <SoundContextSurrogate, Tuple <string[], int>[]> fd in _soundChanges) { SoundContext ctxt = fd.Key.ToSoundContext(project, segmentPool); FrequencyDistribution <Ngram <Segment> > freqDist = soundChanges[ctxt]; foreach (Tuple <string[], int> sample in fd.Value) { Ngram <Segment> corr = sample.Item1 == null ? new Ngram <Segment>() : new Ngram <Segment>(sample.Item1.Select(segmentPool.GetExisting)); freqDist.Increment(corr, sample.Item2); } } vp.SoundChangeFrequencyDistribution = soundChanges; IWordAligner aligner = project.WordAligners[ComponentIdentifiers.PrimaryWordAligner]; int segmentCount = vp.Variety2.SegmentFrequencyDistribution.ObservedSamples.Count; int possCorrCount = aligner.ExpansionCompressionEnabled ? (segmentCount * segmentCount) + segmentCount + 1 : segmentCount + 1; vp.SoundChangeProbabilityDistribution = new ConditionalProbabilityDistribution <SoundContext, Ngram <Segment> >(soundChanges, (sc, freqDist) => new WittenBellProbabilityDistribution <Ngram <Segment> >(freqDist, possCorrCount)); foreach (KeyValuePair <string, List <SoundCorrespondenceSurrogate> > kvp in _soundCorrespondenceCollections) { if (kvp.Value != null) { FeatureSymbol pos = null; switch (kvp.Key) { case "onset": pos = CogFeatureSystem.Onset; break; case "nucleus": pos = CogFeatureSystem.Nucleus; break; case "coda": pos = CogFeatureSystem.Coda; break; } vp.SoundCorrespondenceCollections[pos].AddRange(kvp.Value.Select(surrogate => surrogate.ToSoundCorrespondence(segmentPool, wordPairs))); } } return(vp); }
public override bool Matches(ShapeNode leftNode, Ngram <Segment> target, ShapeNode rightNode) { foreach (Segment seg in target) { if (_fs.IsUnifiable(seg.FeatureStruct)) { return(true); } } return(false); }
public void Words_MultipleWords_ReturnsArrayOfStrings() { string line = " 2301 a\tja\tsom\tsa"; string[] words = { "a", "ja", "som", "sa" }; Ngram ngram = new Ngram(line); var result = ngram.Words; Assert.AreEqual(result, words); }
public bool IsMapped(ShapeNode leftNode1, Ngram <Segment> target1, ShapeNode rightNode1, ShapeNode leftNode2, Ngram <Segment> target2, ShapeNode rightNode2) { if ((target1.Length == 0 || target1.First.Type == CogFeatureSystem.VowelType) && (target2.Length == 0 || target2.First.Type == CogFeatureSystem.VowelType)) { return(_vowelMappings.IsMapped(leftNode1, target1, rightNode1, leftNode2, target2, rightNode2)); } if ((target1.Length == 0 || target1.First.Type == CogFeatureSystem.ConsonantType) && (target2.Length == 0 || target2.First.Type == CogFeatureSystem.ConsonantType)) { return(_consMappings.IsMapped(leftNode1, target1, rightNode1, leftNode2, target2, rightNode2)); } return(false); }
public void Process(VarietyPair data) { IWordAligner aligner = _project.WordAligners[_alignerId]; var correspondenceColls = new Dictionary <FeatureSymbol, SoundCorrespondenceCollection> { { CogFeatureSystem.Onset, new SoundCorrespondenceCollection() }, { CogFeatureSystem.Nucleus, new SoundCorrespondenceCollection() }, { CogFeatureSystem.Coda, new SoundCorrespondenceCollection() } }; foreach (WordPair wordPair in data.WordPairs.Where(wp => wp.Cognacy)) { Alignment <Word, ShapeNode> alignment = aligner.Compute(wordPair).GetAlignments().First(); for (int i = 0; i < alignment.ColumnCount; i++) { AlignmentCell <ShapeNode> cell1 = alignment[0, i]; AlignmentCell <ShapeNode> cell2 = alignment[1, i]; if (!cell1.IsNull && !cell2.IsNull && cell1.Count == 1 && cell2.Count == 1) { SymbolicFeatureValue pos1, pos2; if (cell1.First.Annotation.FeatureStruct.TryGetValue(CogFeatureSystem.SyllablePosition, out pos1) && cell2.First.Annotation.FeatureStruct.TryGetValue(CogFeatureSystem.SyllablePosition, out pos2) && (FeatureSymbol)pos1 == (FeatureSymbol)pos2) { Ngram <Segment> ngram1 = cell1.ToNgram(_segmentPool); Ngram <Segment> ngram2 = cell2.ToNgram(_segmentPool); Segment seg1 = ngram1.First; Segment seg2 = ngram2.First; if (!seg1.Equals(seg2)) { SoundCorrespondenceCollection correspondences = correspondenceColls[(FeatureSymbol)pos1]; SoundCorrespondence corr; if (!correspondences.TryGet(seg1, seg2, out corr)) { corr = new SoundCorrespondence(seg1, seg2); correspondences.Add(corr); } corr.Frequency++; corr.WordPairs.Add(wordPair); } } } } } foreach (KeyValuePair <FeatureSymbol, SoundCorrespondenceCollection> kvp in correspondenceColls) { data.CognateSoundCorrespondencesByPosition[kvp.Key].ReplaceAll(kvp.Value); } }
public void Load() { if (File.Exists(Application.persistentDataPath + "/ngram.dat")) { Debug.Log("Loaded"); BinaryFormatter bf = new BinaryFormatter(); FileStream file = File.Open(Application.persistentDataPath + "/ngram.dat", FileMode.Open); Ngram g = (Ngram)bf.Deserialize(file); file.Close(); nGram = g.gram; } }
public void Save() { Debug.Log("Saved"); BinaryFormatter bf = new BinaryFormatter(); FileStream file = File.Create(Application.persistentDataPath + "/ngram.dat"); Ngram g = new Ngram(); g.gram = nGram; bf.Serialize(file, g); file.Close(); }
private void UpdateCounts(IWordAligner aligner, ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> > counts, Alignment <Word, ShapeNode> alignment) { if (alignment.NormalizedScore < _initialAlignmentThreshold) { return; } for (int column = 0; column < alignment.ColumnCount; column++) { SoundContext lhs = alignment.ToSoundContext(_segmentPool, 0, column, aligner.ContextualSoundClasses); Ngram <Segment> corr = alignment[1, column].ToNgram(_segmentPool); counts[lhs].Increment(corr); } }
public override bool Matches(ShapeNode leftNode, Ngram<Segment> target, ShapeNode rightNode) { string strRep = target.ToString(); if (_ignoreModifiers) strRep = StripModifiers(strRep); if (leftNode != null && leftNode.Type() == CogFeatureSystem.AnchorType && _normalizedSegments.Contains(string.Format("#{0}", strRep))) return true; if (rightNode != null && rightNode.Type() == CogFeatureSystem.AnchorType && _normalizedSegments.Contains(string.Format("{0}#", strRep))) return true; return _normalizedSegments.Contains(strRep); }
private void WriteNgramCountsFile(string lmPrefix, int ngramSize) { int wordCount = 0; var ngrams = new Dictionary <Ngram <string>, int>(); var vocab = new HashSet <string>(); foreach (TextSegment segment in _parallelCorpus.TargetSegments .Where((s, i) => !_tuneCorpusIndices.Contains(i) && !s.IsEmpty)) { var words = new List <string> { "<s>" }; foreach (string word in segment.Segment.Preprocess(_targetPreprocessor)) { if (vocab.Contains(word)) { words.Add(word); } else { vocab.Add(word); words.Add("<unk>"); } } words.Add("</s>"); if (words.Count == 2) { continue; } wordCount += words.Count; for (int n = 1; n <= ngramSize; n++) { for (int i = 0; i <= words.Count - n; i++) { var ngram = new Ngram <string>(Enumerable.Range(i, n).Select(j => words[j])); ngrams.UpdateValue(ngram, () => 0, c => c + 1); } } } using (var writer = new StreamWriter(lmPrefix)) { foreach (KeyValuePair <Ngram <string>, int> kvp in ngrams.OrderBy(kvp => kvp.Key.Length) .ThenBy(kvp => string.Join(" ", kvp.Key))) { writer.Write("{0} {1} {2}\n", string.Join(" ", kvp.Key), kvp.Key.Length == 1 ? wordCount : ngrams[kvp.Key.TakeAllExceptLast()], kvp.Value); } } }
private int GetSonority(ShapeNode node) { ShapeNode prevNode = node.Prev; Ngram <Segment> target = _segmentPool.Get(node); ShapeNode nextNode = node.Next; foreach (SonorityClass level in _sonorityScale) { if (level.SoundClass.Matches(prevNode, target, nextNode)) { return(level.Sonority); } } return(0); }
private IEnumerable <Affix> IdentifyAffixes(Word[] words, string category) { foreach (Affix <Segment> affix in _affixIdentifier.IdentifyAffixes(words, AffixType.Prefix)) { var ngram = new Ngram <Segment>(Segment.Anchor.ToEnumerable().Concat(affix.Ngram)); yield return(CreateAffix(ngram, category, affix.Score)); } foreach (Affix <Segment> affix in _affixIdentifier.IdentifyAffixes(words, AffixType.Suffix)) { var ngram = new Ngram <Segment>(affix.Ngram.SkipWhile(seg => seg.Type == CogFeatureSystem.ToneLetterType).Concat(Segment.Anchor)); if (ngram.Length > 1) { yield return(CreateAffix(ngram, category, affix.Score)); } } }
public static void bleu(Ngram N1, Ngram N2) { List <Dictionary <string, int> > N1gram = new List <Dictionary <string, int> >(); List <Dictionary <string, int> > N2gram = new List <Dictionary <string, int> >(); for (int i = 1; i <= 4; i++) { N1gram.Add(N1.NgramToDict(i)); N2gram.Add(N2.NgramToDict(i)); } Dictionary <string, double> result = compareTwoGram(N1gram, N2gram); foreach (string i in result.Keys) { Console.WriteLine("p_{0} is {1}", i, result[i]); } }
public void ExecuteMoreSelectTableFTSTest() { string dbfile = "taskaludb11.sqlite"; string path = Path.GetTempPath() + "\\" + dbfile; TouchTestDB(dbfile); SQLiteClass.moreSize = 10; for (int i = 1; i <= 20; i++) { InsertTableTaskList(dbfile, "hoge", i); SQLiteClass.ExecuteInsertTableFTSString(path, i, "tasklist_name", Ngram.getNgramText("hoge", 2)); } InsertTableTaskList(dbfile, "ogem", 21); SQLiteClass.ExecuteInsertTableFTSString(path, 21, "tasklist_name", Ngram.getNgramText("ogem", 2)); Debug.Assert(SQLiteClass.ExecuteFirstSelectTable(path, "oge")); }
public bool IsMapped(ShapeNode leftNode1, Ngram<Segment> target1, ShapeNode rightNode1, ShapeNode leftNode2, Ngram<Segment> target2, ShapeNode rightNode2) { if (_threshold == 0 || target1.Length == 0 || target2.Length == 0) return false; IWordAligner aligner = _project.WordAligners[_alignerID]; foreach (Segment seg1 in target1) { foreach (Segment seg2 in target2) { if (aligner.Delta(seg1.FeatureStruct, seg2.FeatureStruct) <= _threshold) return true; } } return false; }
public abstract bool Matches(ShapeNode leftNode, Ngram<Segment> target, ShapeNode rightNode);
private bool IsRegular(WordPair wordPair, IWordAlignerResult alignerResult, Alignment<Word, ShapeNode> alignment, int column, Ngram<Segment> v) { VarietyPair vp = wordPair.VarietyPair; SoundContext context = alignment.ToSoundContext(_segmentPool, 0, column, alignerResult.WordAligner.ContextualSoundClasses); FrequencyDistribution<Ngram<Segment>> freqDist = vp.CognateSoundCorrespondenceFrequencyDistribution[context]; int threshold; if (_automaticRegularCorrespondenceThreshold) { int seg2Count = vp.CognateSoundCorrespondenceFrequencyDistribution.Conditions.Where(sc => sc.LeftEnvironment == context.LeftEnvironment && sc.RightEnvironment == context.RightEnvironment) .Sum(sc => vp.CognateSoundCorrespondenceFrequencyDistribution[sc][v]); if (!_regularCorrespondenceThresholdTable.TryGetThreshold(vp.CognateCount, freqDist.SampleOutcomeCount, seg2Count, out threshold)) threshold = _defaultRegularCorrepondenceThreshold; } else { threshold = _defaultRegularCorrepondenceThreshold; } return freqDist[v] >= threshold; }
public bool IsMapped(ShapeNode leftNode1, Ngram<Segment> target1, ShapeNode rightNode1, ShapeNode leftNode2, Ngram<Segment> target2, ShapeNode rightNode2) { if (_mappings.Count == 0) return false; foreach (string strRep1 in GetStrReps(target1)) { foreach (string strRep2 in GetStrReps(target2)) { if (strRep1 == strRep2) return true; Dictionary<string, List<Tuple<Environment, Environment>>> segments; List<Tuple<Environment, Environment>> contexts; if (_mappingLookup.TryGetValue(strRep1, out segments) && segments.TryGetValue(strRep2, out contexts)) return contexts.Any(ctxt => CheckEnvironment(ctxt.Item1, leftNode1, rightNode1) && CheckEnvironment(ctxt.Item2, leftNode2, rightNode2)); } } return false; }
public bool IsMapped(ShapeNode leftNode1, Ngram<Segment> target1, ShapeNode rightNode1, ShapeNode leftNode2, Ngram<Segment> target2, ShapeNode rightNode2) { return _segmentMappingsComponents.Any(sm => sm.IsMapped(leftNode1, target1, rightNode1, leftNode2, target2, rightNode2)); }
private IEnumerable<string> GetStrReps(Ngram<Segment> target) { if (target.Length == 0) { yield return "-"; } else { foreach (Segment seg in target) { yield return seg.StrRep; if (_implicitComplexSegments && seg.IsComplex) { Shape shape = _segmenter.Segment(seg.StrRep); foreach (ShapeNode node in shape) yield return node.StrRep(); } } } }