/// <summary> /// Method for collecting candidates for replacing target words (at the sentence level). Candidates are retrieved as most similar words obtained via word embeddings (cosine similarity between embedding vectors) /// </summary> /// <param name="sentence">Sentence to be simplified</param> /// <param name="substCandidates">List of all substitution candidates</param> /// <param name="icTreshold">The information content treshold</param> /// <param name="word">Target word to be replaced (if not provided, all content words are considered for replacing)</param> public void CollectCandidates(SentenceAnnotation sentence, List <Tuple <TokenAnnotation, List <Tuple <string, double> > > > substCandidates, double icTreshold, string word = null) { List <string> sides = new List <string> { "north", "west", "south", "east" }; var contentTokens = sentence.Tokens.Where(t => t.IsContent() && !t.POSTag.StartsWith("C") && string.IsNullOrEmpty(t.NamedEntity) && !sides.Any(si => t.Text.ToLower().Contains(si))).ToList(); contentTokens.ForEach(ct => { var ic = InformationContent.GetRelativeInformationContent(string.IsNullOrEmpty(ct.Lemma) ? ct.Text.ToLower() : ct.Lemma.ToLower()); if (ic > icTreshold && (!char.IsUpper(ct.Text[0]) || sentence.Tokens.IndexOf(ct) == 0)) { if (word == null || ct.Text.ToLower() == word.ToLower()) { var candidates = new List <Tuple <string, double> >(); var cands1 = VectorSpace.GetMostSimilar(ct.Text.ToLower(), 30); if (cands1 != null) { candidates.AddRange(cands1); } if (!string.IsNullOrEmpty(ct.Lemma)) { var cands2 = VectorSpace.GetMostSimilar(ct.Lemma.ToLower(), 30); if (cands2 != null) { candidates.AddRange(cands2); } } candidates = candidates.OrderByDescending(x => x.Item2).ToList(); substCandidates.Add(new Tuple <TokenAnnotation, List <Tuple <string, double> > >(ct, candidates)); } } }); }
/// <summary> /// Creates a new instance of Manipulator /// </summary> /// <param name="graphics">The IGraphicsDeviceService with which drawing will be performed</param> /// <param name="camera">A provider for camera view and projection data</param> /// <param name="input">A provider for input data</param> public Manipulator(IGraphicsDeviceService graphics, XICamera camera, XIInputProvider input) { mGraphics = graphics; mCamera = camera; mInput = input; mSelectedAxes = AxisFlags.None; mActiveMode = TransformationMode.None; mEnabledModes = TransformationMode.None; mVectorSpace = VectorSpace.World; mPickBuffer = new XPickBuffer(graphics); mSettings = new ManipulatorSettings(); mSettings.RestoreDefaults(); mGraphics.DeviceCreated += new EventHandler(OnDeviceCreated); mGraphics.DeviceReset += new EventHandler(CreateDepthBuffer); mGraphics.DeviceResetting += new EventHandler(DisposeDepthBuffer); mGraphics.DeviceDisposing += new EventHandler(OnDeviceDisposing); if ((mGraphics.GraphicsDevice != null) && !mGraphics.GraphicsDevice.IsDisposed) { OnDeviceCreated(this, null); CreateDepthBuffer(this, null); } mUndoStack = new Stack <TransformState>(); mRedoStack = new Stack <TransformState>(); mDrawFunctions = new DrawFunctions(); mManipFunctions = new ManipFunctions(); mDrawFunctions[TransformationMode.None][AxisFlags.X] = mDrawFunctions[TransformationMode.None][AxisFlags.Y] = mDrawFunctions[TransformationMode.None][AxisFlags.Z] = delegate(AxisFlags axis) { Vector3 unit = GetUnitAxis(axis); XPrimitives.DrawLine(mGraphics.GraphicsDevice, Vector3.Zero, unit); }; InitTranslation(); InitRotation(); InitScale(); }
public void Poke(ushort address, byte value) { // Regular RAM if (address < 0xFC00) { Ram.Poke(address, value); return; } // "These overlays are controlled by the bits in the hardware register at FFF9." if (address == 0xFFF9) // Special case for memory map control register { ConfigureMemoryMapControl(value); return; } // "FFFE, FFFF CPU Interrupt Vector (RAM or ROM) // FFFC, FFFD CPU Reset Vector (RAM or ROM) // FFFA, FFFB CPU NMI Vector (RAM or ROM)" if (address > 0xFFFA) { VectorSpace.Poke(address, value); return; } // "FE00 thru FFF7 ROM Space" if (address >= 0xFE00) { RomSpace.Poke(address, value); return; } // "FD00 thru FDFF Mikey Space" if (address >= 0xFD00) { MikeySpace.Poke(address, value); return; } // "FC00 thru FCFF Suzy Space" if (address >= 0xFC00) { SuzySpace.Poke(address, value); return; } }
/// <summary> /// Deploys the specified settings. /// </summary> /// <param name="settings">The settings.</param> /// <param name="vectorSpace">The vector space.</param> public void Deploy(FeatureVectorConstructorSettings settings, VectorSpace vectorSpace) { // dimensionFunctionSet = new List<FeatureSpaceDimensionBase>(); // creates instances of dimension value providers - for each label in the vector space foreach (dimensionSpecification ld in settings.labelDimensions) { foreach (VectorLabel label in vectorSpace.labels) { FeatureSpaceDimensionSimilarity dimensionInstance = new FeatureSpaceDimensionSimilarity(); IVectorSimilarityFunction functionInstance = (IVectorSimilarityFunction)TypeProviders.similarityFunctions.GetInstance(ld.functionName); IVector classVector = vectorSpace.labels.First(x => x.name == label.name); dimensionInstance.similarityFunction = functionInstance; dimensionInstance.classVector = classVector; dimensionFunctionSet.Add(dimensionInstance); } } }
public byte Peek(ushort address) { // Regular RAM if (address < 0xFC00) { return(Ram.Peek(address)); //return directAccessRam[address]; } // "These overlays are controlled by the bits in the hardware register at FFF9." if (address == 0xFFF9) // Special case for memory map control register { // "Both Mikey and Suzy accept a write at those addresses but only Mikey responds to a read." // Since we will be passing regular RAM memory to Suzy, it is OK to always return value // because only Mikey will be going through this MMU. return(MAPCTL.ByteData); } // For details on address ranges see Poke() implementation if (address >= 0xFFFA) { return(VectorSpace.Peek(address)); } if (address >= 0xFE00) { return(RomSpace.Peek(address)); } if (address >= 0xFD00) { return(MikeySpace.Peek(address)); } if (address >= 0xFC00) { return(SuzySpace.Peek(address)); } return(0); }
/// <summary> /// Oredring the candidates for the ranking task, when the candidates are given. The ordering is somewhat different /// </summary> /// <param name="document">The document whose text is to be simplified</param> /// <param name="substitutionCandidates">Substitution candidates</param> /// <param name="target">Target word</param> /// <param name="contextSize">The size of the context of the target word to be compared semantically with candidate replacements</param> /// <returns></returns> public List <string> OrderGivenSubstitutionCandidates(Document document, List <string> substitutionCandidates, string target, int contextSize) { EngMorphology morph = new EngMorphology(); List <Tuple <TokenAnnotation, string> > substitutions = new List <Tuple <TokenAnnotation, string> >(); //List<string> metrics = new List<string> { "sim", "ic-diff", "context-sim", "length", "lm-bigram-pre", "lm-bigram-post", "lm-trigram-pre", "lm-trigram-post" }; List <string> metrics = new List <string> { "context-sim" /*, "lm-bigram-pre", "lm-bigram-post"*/, "ic-diff" /*, "lm-trigram-pre", "lm-trigram-post"*/ }; //List<string> metrics = new List<string> { "ic-diff" }; Dictionary <string, string> candidateChanges = new Dictionary <string, string>(); var targetToken = document.AllTokens.Where(t => t.Text == target).Last(); var preceedingSentencePart = document.Text.Substring(0, targetToken.StartPositionSentence); var followingSentencePart = document.Text.Substring(targetToken.StartPositionSentence + targetToken.Text.Length); var targetLemmaIC = InformationContent.GetRelativeInformationContent(targetToken.Lemma.ToLower()); var targetWordIC = InformationContent.GetRelativeInformationContent(targetToken.Text.ToLower()); var targetContextTokens = document.AllTokens.Where(t => Math.Abs(document.AllTokens.IndexOf(t) - targetToken.SentenceIndex) > 0 && Math.Abs(document.AllTokens.IndexOf(t) - targetToken.SentenceIndex) <= contextSize && t.IsContent()).ToList(); Dictionary <string, Dictionary <string, double> > scores = new Dictionary <string, Dictionary <string, double> >(); substitutionCandidates.ForEach(candidate => { var candidateText = candidate; if (candidateText.Contains(",")) { var splitCand = candidateText.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries).ToList(); string selected = string.Empty; double maxIC = double.MinValue; splitCand.ForEach(sc => { var scic = InformationContent.GetRelativeInformationContent(sc.Trim().ToLower()); if (scic > maxIC) { selected = sc.Trim(); maxIC = scic; } }); candidateText = selected; } if (candidateText.Trim().Contains(" ")) { var tokens = (new EngPOSTagger()).Annotate(candidateText.Trim()).ToList(); var contentTokens = tokens.Where(x => ((TokenAnnotation)x).IsContent()).ToList(); var change = contentTokens.Count > 0 ? ((TokenAnnotation)(contentTokens.Last())).Text.Trim() : ((TokenAnnotation)(tokens.First())).Text.Trim(); candidateText = change; } var candidateLemmaIC = InformationContent.GetRelativeInformationContent(candidateText.ToLower()); var candidateWordIC = !string.IsNullOrEmpty(candidateText) ? InformationContent.GetRelativeInformationContent(candidateText.ToLower()) : 1; var candidateIC = candidateWordIC == 1 ? candidateLemmaIC : candidateWordIC; var targetIC = targetWordIC == 1 ? targetLemmaIC : targetWordIC; var candidateContextSimilarities = targetContextTokens.Select(x => VectorSpace.Similarity(x.Lemma.ToLower(), candidateText.ToLower())).Where(x => x >= -1).ToList(); var candidateContextSimilarity = candidateContextSimilarities.Count > 0 ? candidateContextSimilarities.Average() : 0; scores.Add(candidate, new Dictionary <string, double>()); var sim = VectorSpace.Similarity(targetToken.Text.ToLower().Trim(), candidateText.ToLower().Trim()); if (sim < 1) { scores[candidate].Add("sim", sim); } scores[candidate].Add("ic-diff", candidateIC); scores[candidate].Add("context-sim", candidateContextSimilarity); scores[candidate].Add("length", candidateText.Length); var tokenIndex = document.AllTokens.IndexOf(targetToken); // bigram LM if (tokenIndex > 0) { var lmScore = EnglishLanguageModel.Instance.GetBigramLMScore(document.AllTokens[document.AllTokens.IndexOf(targetToken) - 1].Text.ToLower(), candidateText); scores[candidate].Add("lm-bigram-pre", lmScore.HasValue ? lmScore.Value : -100); } else { scores[candidate].Add("lm-bigram-pre", 0); } if (tokenIndex < document.AllTokens.Count - 1) { var lmScore = EnglishLanguageModel.Instance.GetBigramLMScore(candidateText, document.AllTokens[document.AllTokens.IndexOf(targetToken) + 1].Text.ToLower()); scores[candidate].Add("lm-bigram-post", lmScore.HasValue ? lmScore.Value : -100); } else { scores[candidate].Add("lm-bigram-pre", 0); } // trigram LM if (tokenIndex > 1) { var lmScore = EnglishLanguageModel.Instance.GetTrigramLMScore(document.AllTokens[document.AllTokens.IndexOf(targetToken) - 2].Text.ToLower(), document.AllTokens[document.AllTokens.IndexOf(targetToken) - 1].Text.ToLower(), candidateText); scores[candidate].Add("lm-trigram-pre", lmScore.HasValue ? lmScore.Value : -100); } else { scores[candidate].Add("lm-trigram-pre", 0); } if (tokenIndex < document.AllTokens.Count - 2) { var lmScore = EnglishLanguageModel.Instance.GetTrigramLMScore(candidateText, document.AllTokens[document.AllTokens.IndexOf(targetToken) + 1].Text.ToLower(), document.AllTokens[document.AllTokens.IndexOf(targetToken) + 2].Text.ToLower()); scores[candidate].Add("lm-trigram-post", lmScore.HasValue ? lmScore.Value : -100); } else { scores[candidate].Add("lm-trigram-post", 0); } }); LastSubstitutionCandidates = new List <Tuple <TokenAnnotation, List <string> > >(); LastSubstitutionCandidates.Add(new Tuple <TokenAnnotation, List <string> >(targetToken, scores.Select(x => x.Key).ToList())); var allRanks = new List <Dictionary <string, int> >(); metrics.ForEach(m => { var featDict = scores.Where(x => x.Value.ContainsKey(m)).ToDictionary(x => x.Key, x => x.Value[m]); allRanks.Add(TrainingExample.RankExamplesByNumericFeature(featDict, m == "length" || m == "ic-diff")); }); var allCandidates = scores.Select(x => x.Key).ToList(); Dictionary <string, double> averageRankings = allCandidates.ToDictionary(x => x, x => allRanks.Where(y => y.ContainsKey(x)).Select(r => r[x]).Average()); return(averageRankings.OrderBy(r => r.Value).Select(x => x.Key).ToList()); }
/// <summary> /// Choosing the substitutions for target words. This means we rank the candidates according to several features (similarity with target word, information content reduction, similarity with context words, ...) /// </summary> /// <param name="document">The document whose text is to be simplified</param> /// <param name="substCandidates">All the pairs of target words and collected candidate replacements</param> /// <param name="contextSize">The size of the context of the target word to be considered for measuring the similarity between candidate words and target word context</param> /// <param name="noSubstitutionWords">Stopwords, never to be considered for simplification</param> /// <param name="similarityTreshold">The treshold for semantic similarity between target word and candidate replacement</param> /// <param name="icReplacementCandidateTreshold">Information content treshold for replacing the target word</param> /// <param name="word"></param> /// <returns>The list of substitutions (tuple of target token and candidate replacement word)</returns> public List <Tuple <TokenAnnotation, string> > GetSubstitutions(Document document, List <Tuple <TokenAnnotation, List <Tuple <string, double> > > > substCandidates, int contextSize, List <string> noSubstitutionWords, double similarityTreshold, double icReplacementCandidateTreshold, string word = null) { EngMorphology morph = new EngMorphology(); List <Tuple <TokenAnnotation, string> > substitutions = new List <Tuple <TokenAnnotation, string> >(); List <string> metrics = new List <string> { "sim", "ic-diff", "context-sim", "lm-bigram-pre", "lm-bigram-post", "lm-trigram-pre", "lm-trigram-post" }; //List<string> metrics = new List<string> { "sim", "lm-bigram-pre", "lm-bigram-post", /*"ic-diff", "lm-trigram-pre", "lm-trigram-post"*/ }; substCandidates.ForEach(sc => { if (!noSubstitutionWords.Contains(sc.Item1.Text.ToLower())) { if (word == null || sc.Item1.Text.ToLower() == word) { Dictionary <string, Dictionary <string, double> > scores = new Dictionary <string, Dictionary <string, double> >(); var targetToken = sc.Item1; var sentence = document.Sentences.Where(s => s.Tokens.Any(t => t.StartPosition == targetToken.StartPosition && t.Text == targetToken.Text)).Single(); var targetTokenCopy = sentence.Tokens.Where(t => t.StartPosition == targetToken.StartPosition && t.Text == targetToken.Text).Single(); var preceedingSentencePart = sentence.Text.Substring(0, targetTokenCopy.StartPositionSentence); var followingSentencePart = sentence.Text.Substring(targetTokenCopy.StartPositionSentence + targetTokenCopy.Text.Length); var targetLemmaIC = InformationContent.GetRelativeInformationContent(targetToken.Lemma.ToLower()); var targetWordIC = InformationContent.GetRelativeInformationContent(targetToken.Text.ToLower()); var targetContextTokens = sentence.Tokens.Where(t => Math.Abs(sentence.Tokens.IndexOf(t) - targetTokenCopy.SentenceIndex) > 0 && Math.Abs(sentence.Tokens.IndexOf(t) - targetTokenCopy.SentenceIndex) <= contextSize && t.IsContent()).ToList(); var targetCtxtSimilarities = targetContextTokens.Select(x => VectorSpace.Similarity(x.Lemma.ToLower(), targetToken.Lemma.ToLower())).Where(x => x >= -1).ToList(); var targetContextSimilarity = targetCtxtSimilarities.Count > 0 ? targetCtxtSimilarities.Average() : 0; if (sc.Item2 != null) { sc.Item2.ForEach(candidate => { try { var candidateLemmaIC = InformationContent.GetRelativeInformationContent(candidate.Item1.ToLower()); string key = candidate.Item1 + "<->" + targetToken.POSTag; //var candidateInPOS = EngMorphology.GetForm(candidate.Item1, targetToken.POSTag); //if (!CandidateInPoSLookup.ContainsKey(key)) CandidateInPoSLookup.Add(key, candidateInPOS); var candidateInPOS = CandidateInPoSLookup.ContainsKey(key) ? CandidateInPoSLookup[key] : candidate.Item1; var candidateWordIC = !string.IsNullOrEmpty(candidateInPOS) ? InformationContent.GetRelativeInformationContent(candidateInPOS.ToLower()) : 1; var candidateIC = candidateWordIC == 1 ? candidateLemmaIC : candidateWordIC; var targetIC = targetWordIC == 1 ? targetLemmaIC : targetWordIC; if (!string.IsNullOrEmpty(candidateInPOS) && targetLemmaIC > icReplacementCandidateTreshold && (candidateIC < targetIC /*|| Math.Abs(targetIC - candidateIC) < 0.05*/)) { var artificialSentence = preceedingSentencePart + candidateInPOS + followingSentencePart; var artTokens = (new EngPOSTagger()).Annotate(artificialSentence).Select(x => (TokenAnnotation)x).ToList(); morph.AnnotateMorphology(artTokens); var candidateToken = artTokens.Where(x => x.StartPositionSentence == targetTokenCopy.StartPositionSentence /*&& x.Text == candidateInPOS*/).Single(); var candidateContextSimilarities = targetContextTokens.Select(x => VectorSpace.Similarity(x.Lemma.ToLower(), candidateToken.Lemma.ToLower())).Where(x => x >= -1).ToList(); var candidateContextSimilarity = candidateContextSimilarities.Count > 0 ? candidateContextSimilarities.Average() : targetContextSimilarity; // POS-tag compatibility is a second prerequisite bool sameWord = candidate.Item1.Contains(targetToken.Text) || targetToken.Text.Contains(candidate.Item1) || candidate.Item1.Contains(targetToken.Lemma) || targetToken.Lemma.Contains(candidate.Item1) || candidateInPOS.Contains(targetToken.Text) || targetToken.Text.Contains(candidateInPOS) || candidateInPOS.Contains(targetToken.Lemma) || targetToken.Lemma.Contains(candidateInPOS); bool sameAsContext = targetContextTokens.Any(ct => candidate.Item1.Contains(ct.Text) || ct.Text.Contains(candidate.Item1) || candidate.Item1.Contains(ct.Lemma) || ct.Lemma.Contains(candidate.Item1) || candidateInPOS.Contains(ct.Text) || ct.Text.Contains(candidateInPOS) || candidateInPOS.Contains(ct.Lemma) || ct.Lemma.Contains(candidateInPOS)); if (candidate.Item2 >= similarityTreshold && (candidateToken.POSTag == targetToken.POSTag) && !sameWord && !sameAsContext) { if (!scores.ContainsKey(candidateInPOS)) { scores.Add(candidateInPOS, new Dictionary <string, double>()); scores[candidateInPOS].Add("sim", candidate.Item2); scores[candidateInPOS].Add("ic-diff", targetIC - candidateIC); scores[candidateInPOS].Add("context-sim", candidateContextSimilarity); scores[candidateInPOS].Add("length", candidateInPOS.Length); var tokenIndex = sentence.Tokens.IndexOf(targetTokenCopy); // bigram LM if (tokenIndex > 0) { var lmScore = EnglishLanguageModel.Instance.GetBigramLMScore(sentence.Tokens[sentence.Tokens.IndexOf(targetTokenCopy) - 1].Text.ToLower(), candidateInPOS); scores[candidateInPOS].Add("lm-bigram-pre", lmScore.HasValue ? lmScore.Value : -100); } else { scores[candidateInPOS].Add("lm-bigram-pre", 0); } if (tokenIndex < sentence.Tokens.Count - 1) { var lmScore = EnglishLanguageModel.Instance.GetBigramLMScore(candidateInPOS, sentence.Tokens[sentence.Tokens.IndexOf(targetTokenCopy) + 1].Text.ToLower()); scores[candidateInPOS].Add("lm-bigram-post", lmScore.HasValue ? lmScore.Value : -100); } else { scores[candidateInPOS].Add("lm-bigram-post", 0); } // trigram LM if (tokenIndex > 1) { var lmScore = EnglishLanguageModel.Instance.GetTrigramLMScore(sentence.Tokens[sentence.Tokens.IndexOf(targetTokenCopy) - 2].Text.ToLower(), sentence.Tokens[sentence.Tokens.IndexOf(targetTokenCopy) - 1].Text.ToLower(), candidateInPOS); scores[candidateInPOS].Add("lm-trigram-pre", lmScore.HasValue ? lmScore.Value : -100); } else { scores[candidateInPOS].Add("lm-trigram-pre", 0); } if (tokenIndex < sentence.Tokens.Count - 2) { var lmScore = EnglishLanguageModel.Instance.GetTrigramLMScore(candidateInPOS, sentence.Tokens[sentence.Tokens.IndexOf(targetTokenCopy) + 1].Text.ToLower(), sentence.Tokens[sentence.Tokens.IndexOf(targetTokenCopy) + 2].Text.ToLower()); scores[candidateInPOS].Add("lm-trigram-post", lmScore.HasValue ? lmScore.Value : -100); } else { scores[candidateInPOS].Add("lm-trigram-post", 0); } } } } } catch { } }); } LastSubstitutionCandidates = new List <Tuple <TokenAnnotation, List <string> > >(); LastSubstitutionCandidates.Add(new Tuple <TokenAnnotation, List <string> >(targetToken, scores.Select(x => x.Key).ToList())); if (scores.Count > 0) { var allRanks = new List <Dictionary <string, int> >(); metrics.ForEach(m => { var featDict = scores.ToDictionary(x => x.Key, x => x.Value[m]); allRanks.Add(TrainingExample.RankExamplesByNumericFeature(featDict, m == "length")); }); var allCandidates = scores.Select(x => x.Key).ToList(); Dictionary <string, double> averageRankings = allCandidates.ToDictionary(x => x, x => allRanks.Select(r => r[x]).Average()); var finalRanking = averageRankings.OrderBy(r => r.Value).ToList(); double topScore = finalRanking[0].Value; var equal = new List <string>(); finalRanking.ForEach(fr => { if (fr.Value == topScore) { equal.Add(fr.Key); } }); var finalChoice = equal.Where(eq => equal.Where(eq2 => eq2 != eq).All(eq2 => scores[eq]["sim"] >= scores[eq2]["sim"])).First(); substitutions.Add(new Tuple <TokenAnnotation, string>(targetToken, finalChoice)); } } } }); return(substitutions); }