private double CalculateDistance(Entity entity1, Entity entity2, DistanceFunctionType distanceFunctionType) { var distance = 0d; switch (distanceFunctionType) { case DistanceFunctionType.Manhattan: for (int i = 0; i < ParametersCount; i++) { distance += Math.Abs(entity1.NormalizedParameters[i] - entity2.NormalizedParameters[i]); } break; case DistanceFunctionType.Euclidean: for (int i = 0; i < ParametersCount; i++) { distance += Math.Pow(entity1.NormalizedParameters[i] - entity2.NormalizedParameters[i], 2); } distance = Math.Sqrt(distance); break; case DistanceFunctionType.Chebyshev: for (int i = 0; i < ParametersCount; i++) { var iDistance = Math.Abs(entity1.NormalizedParameters[i] - entity2.NormalizedParameters[i]); distance = iDistance > distance ? iDistance : distance; } break; default: break; } return(distance); }
static void ReadConfiguration(ConfigurationMgr mgr) { _fastaFile = mgr.SmithWatermanSection.FastaFile; _distanceFile = mgr.SmithWatermanSection.DistanceMatrixFile; _indexFile = mgr.SmithWatermanSection.IndexFile; _timingFile = mgr.SmithWatermanSection.TimingFile; _summaryFile = mgr.SmithWatermanSection.SummaryFile; _writeFullResults = mgr.SmithWatermanSection.WriteFullMatrix; _writePartialResults = mgr.SmithWatermanSection.WritePartialMatrix; _writeAlignments = mgr.SmithWatermanSection.WriteAlignments; _writeAlignmentFile = mgr.SmithWatermanSection.WriteAlignmentsFile; _nodeCount = mgr.SmithWatermanSection.NodeCount; _processPerNodeCount = mgr.SmithWatermanSection.ProcessPerNodeCount; _sequenceCount = 0; _gapOpen = mgr.SmithWatermanSection.GapOpenPenalty; _gapExtension = mgr.SmithWatermanSection.GapExtensionPenalty; _alignmentType = mgr.SmithWatermanSection.AlignmentType; _distanceFunction = mgr.SmithWatermanSection.DistanceFunctionType; _scoringMatrixName = mgr.SmithWatermanSection.ScoringMatrixName; _emailResultsTo = string.Join(",", mgr.GlobalSection.EmailAddresses); }
/// <summary>Sets multi-class k-nearest neighbours classifier</summary> /// <remarks><para>Removes any existing classifier and sets k-NN with specified settings</para></remarks> /// <param name="distance">Distance function to be used with k-NN classifier</param> /// <param name="k">K parameter - number of neighbours to vote for class membership</param> /// <seealso cref="aceOperationSetExecutorBase"/> public void aceOperation_setkNN( [Description("Distance function to be used with k-NN classifier")] DistanceFunctionType distance = DistanceFunctionType.SquareEuclidean, [Description("K parameter - number of neighbours to vote for class membership")] Int32 k = 5) { data.classifierSettings.type = imbNLP.Toolkit.Classifiers.ClassifierType.kNearestNeighbors; data.classifierSettings.distanceFunction = distance; data.classifierSettings.kNN_k = k; }
public TSNE( int dimensions, double perplexity = 30, DistanceFunctionType distanceFunctionType = DistanceFunctionType.Euclidean, int iterations = 1000) { _dimensions = dimensions; _perplexity = perplexity; _distanceFunctionType = distanceFunctionType; _iterations = iterations; }
public Spline(T[] list, bool closed = false) { Closed = closed; points = new List <T>(list); PointsArray = new T[4]; ArcLengthDivisions = 200; ArcLengthsCache = null; Invalid = false; var Distance = typeof(T).GetMethod("Distance"); DistanceFunction = (Distance != null ? (DistanceFunctionType)Delegate.CreateDelegate(typeof(DistanceFunctionType), Distance) : (a, b) => (float)((dynamic)a - (dynamic)b)); }
/// <summary> /// Calculates a matrix which organises probabilities proportional to the similarity of each vector pair. /// </summary> private static Dictionary <string, Dictionary <string, double> > CalculateProbabilityMatrix( IReadOnlyCollection <IEmbedding> embeddings, DistanceFunctionType distanceFunctionType, double perplexity) { var distanceMatrix = CalculateDistanceMatrix(embeddings, distanceFunctionType); var logPerplexity = Math.Log(perplexity); var probabilityMatrix = new Dictionary <string, Dictionary <string, double> >(); foreach (var embedding in embeddings) { var probabilities = embeddings.ToDictionary( otherIEmbedding => otherIEmbedding.Label, otherIEmbedding => 0d); CalculateProbabilities( embedding.Label, distanceMatrix[embedding.Label], probabilities, perplexity, logPerplexity); probabilityMatrix.Add(embedding.Label, probabilities); } var sum = 0d; foreach (var labelI in embeddings.Select(w => w.Label)) { var probabilitiesGivenI = probabilityMatrix[labelI]; foreach (var labelJ in embeddings.Select(w => w.Label)) { sum += probabilitiesGivenI[labelJ] += probabilityMatrix[labelJ][labelI]; } } var scale = 0.5 / sum; foreach (var labelI in embeddings.Select(w => w.Label)) { var probabilitiesGivenI = probabilityMatrix[labelI]; foreach (var labelJ in embeddings.Select(w => w.Label)) { probabilitiesGivenI[labelJ] = probabilityMatrix[labelJ][labelI] = Math.Max(probabilityMatrix[labelJ][labelI] * scale, MinPij); } } return(probabilityMatrix); }
public Parameters( List <Entity> entities, List <List <Distance> > distancesForEachElement, DistanceFunctionType distanceFunctionType, WindowType windowType, KernelFunctionType kernelFunctionType, double windowWidth, int neighborsCount) { Entities = entities; DistancesForEachElement = distancesForEachElement; DistanceFunctionType = distanceFunctionType; WindowType = windowType; KernelFunctionType = kernelFunctionType; WindowWidth = windowWidth; NeighborsCount = neighborsCount; }
/// <summary> /// Calculates a matrix which organises the distances between data points in the embeddings. /// </summary> private static Dictionary <string, Dictionary <string, double> > CalculateDistanceMatrix( IReadOnlyCollection <IEmbedding> embeddings, DistanceFunctionType distanceFunctionType) { var distanceFunction = DistanceFunctionResolver.ResolveDistanceFunction(distanceFunctionType); var matrix = new Dictionary <string, Dictionary <string, double> >(); foreach (var embedding in embeddings) { var distances = embeddings.ToDictionary( otherIEmbedding => otherIEmbedding.Label, otherIEmbedding => distanceFunction.Invoke(embedding.Vector, otherIEmbedding.Vector)); matrix.Add(embedding.Label, distances); } return(matrix); }
public static Dictionary <string, int> GetLabelClusterMap( IEnumerable <IEmbedding> embeddings, double epsilon = 0.5, int minimumSamples = 5, DistanceFunctionType distanceFunctionType = DistanceFunctionType.Euclidean, int concurrentThreads = 4) { var embeddingsList = embeddings.ToList(); var distanceFunction = DistanceFunctionResolver.ResolveDistanceFunction(distanceFunctionType); var clusterLabels = new ConcurrentDictionary <string, int>(); var clusterRelationships = new ConcurrentBag <ConcurrentBag <int> >(); var clusterIndex = 0; var sampleSize = (int)Math.Ceiling((double)embeddingsList.Count / concurrentThreads); Parallel.For(0, concurrentThreads, threadIndex => { foreach (var embedding in embeddingsList.Skip(threadIndex * sampleSize).Take(sampleSize)) { if (clusterLabels.ContainsKey(embedding.Label)) { continue; } var neighbors = GetNeighborsAndWeight( embedding, embeddingsList, distanceFunction, epsilon); if (neighbors.Count < minimumSamples) { clusterLabels.AddOrUpdate( embedding.Label, -1, (key, existingClusterIndex) => existingClusterIndex); continue; } var localClusterIndex = clusterIndex++; clusterLabels.AddOrUpdate( embedding.Label, (key) => { clusterRelationships.Add(new ConcurrentBag <int> { localClusterIndex }); return(localClusterIndex); }, (key, existingClusterIndex) => { clusterRelationships.First(r => r.Contains(existingClusterIndex)).Add(localClusterIndex); return(localClusterIndex); }); for (var i = 0; i < neighbors.Count; i++) { var currentNeighbor = neighbors[i]; if (clusterLabels.TryGetValue(currentNeighbor.Label, out var existingClusterId)) { if (existingClusterId != -1 && existingClusterId != localClusterIndex) { clusterRelationships.First(r => r.Contains(existingClusterId)).Add(localClusterIndex); } clusterLabels[currentNeighbor.Label] = localClusterIndex; continue; } clusterLabels.AddOrUpdate( currentNeighbor.Label, localClusterIndex, (key, existingClusterIndex) => { clusterRelationships.First(r => r.Contains(existingClusterIndex)).Add(localClusterIndex); return(localClusterIndex); }); var currentNeighborsNeighbors = GetNeighborsAndWeight( currentNeighbor, embeddingsList, distanceFunction, epsilon); if (currentNeighborsNeighbors.Count >= minimumSamples) { neighbors = neighbors.Union(currentNeighborsNeighbors).ToList(); } } } }); var clusterIndexMap = GetClusterIndexMap(clusterRelationships); return(clusterLabels.ToDictionary( x => x.Key, x => clusterIndexMap[x.Value])); }
public static Func <double[], double[], double> ResolveDistanceFunction( DistanceFunctionType distanceFunctionType) => distanceFunctionType switch {