Пример #1
0
        public bool ExportNetworkGraph(object ownerViewModel, SimilarityMetric similarityMetric, double scoreFilter)
        {
            FileDialogResult result = _dialogService.ShowSaveFileDialog("Export Network Graph", this, new FileType("PNG image", ".png"));

            if (result.IsValid)
            {
                IBidirectionalGraph <NetworkGraphVertex, NetworkGraphEdge> graph = _graphService.GenerateNetworkGraph(similarityMetric);

                var graphLayout = new NetworkGraphLayout
                {
                    IsAnimationEnabled    = false,
                    CreationTransition    = null,
                    DestructionTransition = null,
                    LayoutAlgorithmType   = "StressMajorization",
                    LayoutParameters      = new StressMajorizationLayoutParameters {
                        WeightAdjustment = 1.0
                    },
                    OverlapRemovalAlgorithmType = "FSA",
                    OverlapRemovalParameters    = new OverlapRemovalParameters {
                        HorizontalGap = 2, VerticalGap = 2
                    },
                    Graph        = graph,
                    Background   = Brushes.White,
                    WeightFilter = scoreFilter
                };
                SaveElement(graphLayout, result.FileName, null);
                return(true);
            }

            return(false);
        }
Пример #2
0
        public string Print()
        {
            StringBuilder sb = new StringBuilder();

            switch (Type)
            {
            case FactorType.Constant:
                sb.Append(ConstantValue.ToString());
                break;

            case FactorType.SimilarityMetric:
                sb.Append(SimilarityMetric.GetType().Name);
                break;

            case FactorType.Expression:
                sb.Append("(");
                sb.Append(Expression.Print());
                sb.Append(")");
                break;

            case FactorType.Function:
                sb.Append(FunctionType.ToString());
                break;
            }
            return(sb.ToString());
        }
Пример #3
0
        /// <summary>
        /// Compares two arrays of items.
        /// </summary>
        /// <param name="lhs">First array</param>
        /// <param name="rhs">Second array</param>
        /// <param name="metric">Metric to use</param>
        /// <returns>A similarity matrix</returns>
        public static SimilarityMatrix <T> Compare(T[] lhs, T[] rhs, SimilarityMetric <T> metric)
        {
            if (lhs.Length == 0 || rhs.Length == 0)
            {
                return(new SimilarityMatrix <T>(new float[1, 1]));
            }
            UpdateManager.Clear();
            UpdateManager.WriteLine("Comparing items...");
            int lhsLength = lhs.Length;
            int rhsLength = rhs.Length;
            int maximum   = lhsLength * rhsLength;
            int count     = 0;

            float[,] similarities = new float[lhsLength, rhsLength];
            for (int r = 0; r < lhsLength; r++)
            {
                T x = lhs[r];
                for (int c = 0; c < rhsLength; c++)
                {
                    T y = rhs[c];
                    similarities[r, c] = metric(x, y);
                    UpdateManager.RaiseProgress(++count, maximum);
                }
            }
            UpdateManager.WriteLine("Done");
            return(new SimilarityMatrix <T>(similarities));
        }
Пример #4
0
 public bool ExportSimilarityMatrix(object ownerViewModel, SimilarityMetric similarityMetric)
 {
     FileDialogResult result = _dialogService.ShowSaveFileDialog(ownerViewModel, "Export Similarity Matrix", SimilarityMatrixExporters.Keys);
     if (result.IsValid)
         return Export(ownerViewModel, result.FileName, stream => SimilarityMatrixExporters[result.SelectedFileType].Export(stream, _projectService.Project, similarityMetric));
     return false;
 }
Пример #5
0
 public SimilarityMatrixVarietyPairViewModel(SimilarityMetric similarityMetric, Variety thisVariety, VarietyPair varietyPair)
 {
     _varietyPair  = varietyPair;
     _thisVariety  = thisVariety;
     _otherVariety = _varietyPair.GetOtherVariety(_thisVariety);
     _switchToVarietyPairCommand = new RelayCommand(SwitchToVarietyPair);
     _similarityMetric           = similarityMetric;
 }
 public SimilarityMatrixVarietyPairViewModel(SimilarityMetric similarityMetric, Variety thisVariety, VarietyPair varietyPair)
 {
     _varietyPair = varietyPair;
     _thisVariety = thisVariety;
     _otherVariety = _varietyPair.GetOtherVariety(_thisVariety);
     _switchToVarietyPairCommand = new RelayCommand(SwitchToVarietyPair);
     _similarityMetric = similarityMetric;
 }
Пример #7
0
        public bool ExportSimilarityMatrix(object ownerViewModel, SimilarityMetric similarityMetric)
        {
            FileDialogResult result = _dialogService.ShowSaveFileDialog(ownerViewModel, "Export Similarity Matrix", SimilarityMatrixExporters.Keys);

            if (result.IsValid)
            {
                return(Export(ownerViewModel, result.FileName, stream => SimilarityMatrixExporters[result.SelectedFileType].Export(stream, _projectService.Project, similarityMetric)));
            }
            return(false);
        }
Пример #8
0
 public SimilarityMatrixVarietyViewModel(SimilarityMetric similarityMetric, IEnumerable<Variety> varieties, Variety variety)
     : base(variety)
 {
     var varietyPairs = new List<SimilarityMatrixVarietyPairViewModel>();
     foreach (Variety v in varieties)
     {
         VarietyPair vp;
         varietyPairs.Add(variety.VarietyPairs.TryGetValue(v, out vp) ? new SimilarityMatrixVarietyPairViewModel(similarityMetric, variety, vp) : new SimilarityMatrixVarietyPairViewModel(variety, v));
     }
     _varietyPairs = new ReadOnlyList<SimilarityMatrixVarietyPairViewModel>(varietyPairs);
 }
        public SimilarityMatrixVarietyViewModel(SimilarityMetric similarityMetric, IEnumerable <Variety> varieties, Variety variety)
            : base(variety)
        {
            var varietyPairs = new List <SimilarityMatrixVarietyPairViewModel>();

            foreach (Variety v in varieties)
            {
                VarietyPair vp;
                varietyPairs.Add(variety.VarietyPairs.TryGetValue(v, out vp) ? new SimilarityMatrixVarietyPairViewModel(similarityMetric, variety, vp) : new SimilarityMatrixVarietyPairViewModel(variety, v));
            }
            _varietyPairs = new ReadOnlyList <SimilarityMatrixVarietyPairViewModel>(varietyPairs);
        }
Пример #10
0
        public double Evaluate(string firstWord, string secondWord)
        {
            double rank = 0d;

            switch (Type)
            {
            case FactorType.Constant:
                rank = ConstantValue;
                break;

            case FactorType.SimilarityMetric:
                rank = SimilarityMetric.GetSimilarity(firstWord, secondWord);
                break;

            case FactorType.Expression:
                rank = Expression.Evaluate(firstWord, secondWord);
                break;

            case FactorType.Function:
                switch (FunctionType)
                {
                case FactorFunctionType.MaxLength:
                    rank = Math.Max(firstWord.Length, secondWord.Length);
                    break;

                case FactorFunctionType.MinLength:
                    rank = Math.Min(firstWord.Length, secondWord.Length);
                    break;

                case FactorFunctionType.SumLength:
                    rank = firstWord.Length + secondWord.Length;
                    break;

                case FactorFunctionType.FirstLength:
                    rank = firstWord.Length;
                    break;

                case FactorFunctionType.SecondLength:
                    rank = secondWord.Length;
                    break;
                }
                break;
            }
            return(rank);
        }
Пример #11
0
        public void Export(Stream stream, CogProject project, SimilarityMetric similarityMetric)
        {
            var optics = new Optics <Variety>(variety => variety.VarietyPairs.Select(pair =>
            {
                double score = 0;
                switch (similarityMetric)
                {
                case SimilarityMetric.Lexical:
                    score = pair.LexicalSimilarityScore;
                    break;

                case SimilarityMetric.Phonetic:
                    score = pair.PhoneticSimilarityScore;
                    break;
                }
                return(Tuple.Create(pair.GetOtherVariety(variety), 1.0 - score));
            }).Concat(Tuple.Create(variety, 0.0)), 2);

            Variety[] varietyArray = optics.ClusterOrder(project.Varieties).Select(oe => oe.DataObject).ToArray();
            using (var writer = new StreamWriter(new NonClosingStreamWrapper(stream)))
            {
                foreach (Variety variety in varietyArray)
                {
                    writer.Write("\t");
                    writer.Write(variety.Name);
                }
                writer.WriteLine();
                for (int i = 0; i < varietyArray.Length; i++)
                {
                    writer.Write(varietyArray[i].Name);
                    for (int j = 0; j < varietyArray.Length; j++)
                    {
                        writer.Write("\t");
                        if (i != j)
                        {
                            VarietyPair varietyPair = varietyArray[i].VarietyPairs[varietyArray[j]];
                            double      score       = similarityMetric == SimilarityMetric.Lexical ? varietyPair.LexicalSimilarityScore : varietyPair.PhoneticSimilarityScore;
                            writer.Write("{0:0.00}", score);
                        }
                    }
                    writer.WriteLine();
                }
            }
        }
Пример #12
0
        public void Export(Stream stream, CogProject project, SimilarityMetric similarityMetric)
        {
            var optics = new Optics<Variety>(variety => variety.VarietyPairs.Select(pair =>
                {
                    double score = 0;
                    switch (similarityMetric)
                    {
                        case SimilarityMetric.Lexical:
                            score = pair.LexicalSimilarityScore;
                            break;
                        case SimilarityMetric.Phonetic:
                            score = pair.PhoneticSimilarityScore;
                            break;
                    }
                    return Tuple.Create(pair.GetOtherVariety(variety), 1.0 - score);
                }).Concat(Tuple.Create(variety, 0.0)), 2);

            Variety[] varietyArray = optics.ClusterOrder(project.Varieties).Select(oe => oe.DataObject).ToArray();
            using (var writer = new StreamWriter(new NonClosingStreamWrapper(stream)))
            {
                foreach (Variety variety in varietyArray)
                {
                    writer.Write("\t");
                    writer.Write(variety.Name);
                }
                writer.WriteLine();
                for (int i = 0; i < varietyArray.Length; i++)
                {
                    writer.Write(varietyArray[i].Name);
                    for (int j = 0; j < varietyArray.Length; j++)
                    {
                        writer.Write("\t");
                        if (i != j)
                        {
                            VarietyPair varietyPair = varietyArray[i].VarietyPairs[varietyArray[j]];
                            double score = similarityMetric == SimilarityMetric.Lexical ? varietyPair.LexicalSimilarityScore : varietyPair.PhoneticSimilarityScore;
                            writer.Write("{0:0.00}", score);
                        }
                    }
                    writer.WriteLine();
                }
            }
        }
Пример #13
0
        /// <summary>
        /// Compares an array of items to itself.
        /// </summary>
        /// <param name="items">Item array</param>
        /// <param name="metric">Metric to use</param>
        /// <returns>Similarity matrix</returns>
        public static SimilarityMatrix <T> Compare(T[] items, SimilarityMetric <T> metric)
        {
            if (items.Length == 0)
            {
                return(new SimilarityMatrix <T>(new float[1, 1]));
            }
            UpdateManager.Clear();
            UpdateManager.WriteLine("Comparing items...");
            int count   = items.Length;
            int maximum = 0;

            for (int r = 0; r < items.Length; r++)
            {
                maximum += items.Length - r;
            }

            float[,] similarities = new float[items.Length, items.Length];
            int index = 0;

            for (int r = 0; r < count; r++)
            {
                T x = items[r];
                for (int c = r; c < count; c++)
                {
                    T y = items[c];
                    similarities[r, c] = metric(x, y);
                    UpdateManager.RaiseProgress(++index, maximum);
                }
                GC.Collect();
            }
            for (int r = 0; r < count; r++)
            {
                for (int c = 0; c < r; c++)
                {
                    similarities[r, c] = similarities[c, r];
                }
            }
            UpdateManager.WriteLine("Done");
            return(new SimilarityMatrix <T>(similarities));
        }
Пример #14
0
 /// <summary>
 /// Creates a similarity matrix between two list of items.  The values themselves are stored in a lookup dictionary.
 /// </summary>
 /// <param name="itemLookup">Stores the values</param>
 /// <param name="lhs">First list of items</param>
 /// <param name="rhs">Second list of items</param>
 /// <param name="metric">The metric to use</param>
 /// <returns>A similarity matrix</returns>
 public static SimilarityMatrix <T> Compare(Dictionary <string, T> itemLookup, List <string> lhs, List <string> rhs, SimilarityMetric <T> metric)
 {
     if (lhs.Count == 0 || rhs.Count == 0 || itemLookup.Count == 0)
     {
         return(new SimilarityMatrix <T>(new float[1, 1]));
     }
     T[] items0 = (from id in lhs
                   select itemLookup[id]).ToArray();
     T[] items1 = (from id in rhs
                   select itemLookup[id]).ToArray();
     return(Compare(items0, items1, metric));
 }
Пример #15
0
        public void Export(Stream stream, CogProject project, SimilarityMetric similarityMetric)
        {
            using (var writer = new StreamWriter(new NonClosingStreamWrapper(stream)))
            {
                writer.WriteLine("#NEXUS");

                writer.WriteLine("BEGIN Taxa;");
                writer.WriteLine("\tDIMENSIONS NTax={0};", project.Varieties.Count);
                writer.Write("\tTAXLABELS");

                int maxNameLen = 0;
                foreach (Variety variety in project.Varieties)
                {
                    string name = variety.Name.RemoveNexusSpecialChars();
                    maxNameLen = Math.Max(maxNameLen, name.Length);
                    writer.WriteLine();
                    writer.Write("\t\t{0}", name);
                }

                writer.WriteLine(";");
                writer.WriteLine("END;");

                writer.WriteLine("BEGIN Distances;");
                writer.WriteLine("\tDIMENSIONS NTax={0};", project.Varieties.Count);
                writer.WriteLine("\tFORMAT Triangle=LOWER Diagonal Labels Missing=?;");
                writer.Write("\tMATRIX");

                for (int i = 0; i < project.Varieties.Count; i++)
                {
                    Variety variety1 = project.Varieties[i];
                    string name = variety1.Name.RemoveNexusSpecialChars();
                    writer.WriteLine();
                    writer.Write("\t\t{0}{1} ", name, new string(' ', maxNameLen - name.Length));
                    for (int j = 0; j <= i; j++)
                    {
                        if (i == j)
                        {
                            writer.Write("0.00");
                        }
                        else
                        {
                            Variety variety2 = project.Varieties[j];
                            VarietyPair vp = variety1.VarietyPairs[variety2];
                            double sim;
                            switch (similarityMetric)
                            {
                                case SimilarityMetric.Lexical:
                                    sim = vp.LexicalSimilarityScore;
                                    break;
                                case SimilarityMetric.Phonetic:
                                    sim = vp.PhoneticSimilarityScore;
                                    break;
                                default:
                                    throw new InvalidEnumArgumentException();
                            }

                            writer.Write("{0:0.00} ", 1.0 - sim);
                        }
                    }
                }

                writer.WriteLine(";");
                writer.WriteLine("END;");
            }
        }
Пример #16
0
        public IBidirectionalGraph <NetworkGraphVertex, NetworkGraphEdge> GenerateNetworkGraph(SimilarityMetric similarityMetric)
        {
            var graph = new BidirectionalGraph <NetworkGraphVertex, NetworkGraphEdge>();
            var dict  = new Dictionary <Variety, NetworkGraphVertex>();

            foreach (Variety variety in _projectService.Project.Varieties)
            {
                var vertex = new NetworkGraphVertex(variety);
                graph.AddVertex(vertex);
                dict[variety] = vertex;
            }
            foreach (VarietyPair pair in _projectService.Project.VarietyPairs)
            {
                graph.AddEdge(new NetworkGraphEdge(dict[pair.Variety1], dict[pair.Variety2], pair, similarityMetric));
            }

            return(graph);
        }
Пример #17
0
        public bool ExportHierarchicalGraph(object ownerViewModel, HierarchicalGraphType graphType, ClusteringMethod clusteringMethod, SimilarityMetric similarityMetric)
        {
            FileDialogResult result = _dialogService.ShowSaveFileDialog("Export Hierarchical Graph", ownerViewModel, new FileType("PNG image", ".png"));

            if (result.IsValid)
            {
                IBidirectionalGraph <HierarchicalGraphVertex, HierarchicalGraphEdge> graph = _graphService.GenerateHierarchicalGraph(graphType, clusteringMethod, similarityMetric);

                Action <double>  scaleUpdate = null;
                FrameworkElement graphLayout = null;
                switch (graphType)
                {
                case HierarchicalGraphType.Dendrogram:
                    graphLayout = new DendrogramLayout {
                        Graph = graph, Background = Brushes.White
                    };
                    break;

                case HierarchicalGraphType.Tree:
                    var hgl = new HierarchicalGraphLayout
                    {
                        IsAnimationEnabled    = false,
                        CreationTransition    = null,
                        DestructionTransition = null,
                        LayoutAlgorithmType   = "RadialTree",
                        LayoutParameters      = new RadialTreeLayoutParameters {
                            BranchLengthScaling = BranchLengthScaling.MinimizeLabelOverlapMinimum
                        },
                        Graph             = graph,
                        Background        = Brushes.White,
                        ScaleLabelsToZoom = 1.0
                    };
                    hgl.Resources[typeof(VertexControl)] = System.Windows.Application.Current.Resources["HierarchicalVertexControlStyle"];
                    hgl.Resources[typeof(EdgeControl)]   = System.Windows.Application.Current.Resources["HierarchicalEdgeControlStyle"];
                    graphLayout = hgl;
                    scaleUpdate = scale => hgl.ScaleLabelsToZoom = scale;
                    break;
                }
                Debug.Assert(graphLayout != null);
                SaveElement(graphLayout, result.FileName, scaleUpdate);
                return(true);
            }

            return(false);
        }
Пример #18
0
        static void Main(string[] args)
        {
            try
            {
                //Parse command line
                //   Seq2SeqOptions opts = new Seq2SeqOptions();
                ArgParser argParser = new ArgParser(args, opts);

                if (!opts.ConfigFilePath.IsNullOrEmpty())
                {
                    Logger.WriteLine($"Loading config file from '{opts.ConfigFilePath}'");
                    opts = JsonConvert.DeserializeObject <SeqSimilarityOptions>(File.ReadAllText(opts.ConfigFilePath));
                }

                Logger.LogFile = $"{nameof(SeqSimilarityConsole)}_{opts.Task}_{Utils.GetTimeStamp(DateTime.Now)}.log";
                ShowOptions(args, opts);

                DecodingOptions decodingOptions = opts.CreateDecodingOptions();
                SeqSimilarity   ss = null;
                if (opts.Task == ModeEnums.Train)
                {
                    // Load train corpus
                    SeqClassificationMultiTasksCorpus trainCorpus = new SeqClassificationMultiTasksCorpus(corpusFilePath: opts.TrainCorpusPath, srcLangName: opts.SrcLang, tgtLangName: opts.TgtLang, batchSize: opts.BatchSize, shuffleBlockSize: opts.ShuffleBlockSize,
                                                                                                          maxSentLength: opts.MaxTrainSentLength, shuffleEnums: opts.ShuffleType);

                    // Load valid corpus
                    List <SeqClassificationMultiTasksCorpus> validCorpusList = new List <SeqClassificationMultiTasksCorpus>();
                    if (!opts.ValidCorpusPaths.IsNullOrEmpty())
                    {
                        string[] validCorpusPathList = opts.ValidCorpusPaths.Split(';');
                        foreach (var validCorpusPath in validCorpusPathList)
                        {
                            validCorpusList.Add(new SeqClassificationMultiTasksCorpus(opts.ValidCorpusPaths, srcLangName: opts.SrcLang, tgtLangName: opts.TgtLang, opts.ValBatchSize, opts.ShuffleBlockSize, opts.MaxTestSentLength, shuffleEnums: opts.ShuffleType));
                        }
                    }

                    // Create learning rate
                    ILearningRate learningRate = new DecayLearningRate(opts.StartLearningRate, opts.WarmUpSteps, opts.WeightsUpdateCount);

                    // Create metrics
                    IMetric metric = null;

                    if (opts.SimilarityType == "Continuous")
                    {
                        metric = new SimilarityMetric();
                    }

                    // Create optimizer
                    IOptimizer optimizer = Misc.CreateOptimizer(opts);

                    if (!opts.ModelFilePath.IsNullOrEmpty() && File.Exists(opts.ModelFilePath))
                    {
                        //Incremental training
                        Logger.WriteLine($"Loading model from '{opts.ModelFilePath}'...");
                        ss = new SeqSimilarity(opts);

                        if (metric == null)
                        {
                            metric = new MultiLabelsFscoreMetric("", ss.ClsVocab.GetAllTokens(keepBuildInTokens: false));
                        }
                    }
                    else
                    {
                        // Load or build vocabulary
                        Vocab        srcVocab  = null;
                        List <Vocab> tgtVocabs = null;
                        if (!opts.SrcVocab.IsNullOrEmpty() && !opts.TgtVocab.IsNullOrEmpty())
                        {
                            Logger.WriteLine($"Loading source vocabulary from '{opts.SrcVocab}' and target vocabulary from '{opts.TgtVocab}'.");
                            // Vocabulary files are specified, so we load them
                            srcVocab = new Vocab(opts.SrcVocab);

                            tgtVocabs = new List <Vocab>
                            {
                                new Vocab(opts.TgtVocab)
                            };
                        }
                        else
                        {
                            Logger.WriteLine($"Building vocabulary from training corpus.");
                            // We don't specify vocabulary, so we build it from train corpus
                            (srcVocab, tgtVocabs) = trainCorpus.BuildVocabs(opts.SrcVocabSize, opts.TgtVocabSize);
                        }

                        if (metric == null)
                        {
                            metric = new MultiLabelsFscoreMetric("", tgtVocabs[0].GetAllTokens(keepBuildInTokens: false));
                        }

                        //New training
                        ss = new SeqSimilarity(opts, srcVocab, tgtVocabs[0]);
                    }

                    // Add event handler for monitoring
                    ss.StatusUpdateWatcher += Misc.Ss_StatusUpdateWatcher;
                    ss.EvaluationWatcher   += Ss_EvaluationWatcher;

                    // Kick off training
                    ss.Train(maxTrainingEpoch: opts.MaxEpochNum, trainCorpus: trainCorpus, validCorpusList: validCorpusList.ToArray(), learningRate: learningRate, optimizer: optimizer, metrics: new List <IMetric>()
                    {
                        metric
                    }, decodingOptions: decodingOptions);
                }
                //else if (opts.Task == ModeEnums.Valid)
                //{
                //    Logger.WriteLine($"Evaluate model '{opts.ModelFilePath}' by valid corpus '{opts.ValidCorpusPath}'");

                //    // Create metrics
                //    List<IMetric> metrics = new List<IMetric>
                //{
                //    new BleuMetric(),
                //    new LengthRatioMetric()
                //};

                //    // Load valid corpus
                //    ParallelCorpus validCorpus = new ParallelCorpus(opts.ValidCorpusPath, opts.SrcLang, opts.TgtLang, opts.ValBatchSize, opts.ShuffleBlockSize, opts.MaxSrcTestSentLength, opts.MaxTgtTestSentLength, shuffleEnums: shuffleType);

                //    ss = new Seq2Seq(opts);
                //    ss.EvaluationWatcher += ss_EvaluationWatcher;
                //    ss.Valid(validCorpus: validCorpus, metrics: metrics);
                //}
                else if (opts.Task == ModeEnums.Test)
                {
                    if (File.Exists(opts.OutputFile))
                    {
                        Logger.WriteLine(Logger.Level.err, ConsoleColor.Yellow, $"Output file '{opts.OutputFile}' exist. Delete it.");
                        File.Delete(opts.OutputFile);
                    }

                    //Test trained model
                    ss = new SeqSimilarity(opts);
                    Stopwatch stopwatch = Stopwatch.StartNew();

                    ss.Test <SeqClassificationMultiTasksCorpusBatch>(opts.InputTestFile, opts.OutputFile, opts.BatchSize, decodingOptions, opts.SrcSentencePieceModelPath, opts.TgtSentencePieceModelPath);

                    stopwatch.Stop();

                    Logger.WriteLine($"Test mode execution time elapsed: '{stopwatch.Elapsed}'");
                }
                //else if (opts.Task == ModeEnums.DumpVocab)
                //{
                //    ss = new Seq2Seq(opts);
                //    ss.DumpVocabToFiles(opts.SrcVocab, opts.TgtVocab);
                //}
                else
                {
                    Logger.WriteLine(Logger.Level.err, ConsoleColor.Red, $"Task '{opts.Task}' is not supported.");
                    argParser.Usage();
                }
            }
            catch (Exception err)
            {
                Logger.WriteLine($"Exception: '{err.Message}'");
                Logger.WriteLine($"Call stack: '{err.StackTrace}'");
            }
        }
Пример #19
0
        public IBidirectionalGraph <HierarchicalGraphVertex, HierarchicalGraphEdge> GenerateHierarchicalGraph(HierarchicalGraphType graphType,
                                                                                                              ClusteringMethod clusteringMethod, SimilarityMetric similarityMetric)
        {
            switch (clusteringMethod)
            {
            case ClusteringMethod.Upgma:
                Func <Variety, Variety, double> upgmaGetDistance = null;
                switch (similarityMetric)
                {
                case SimilarityMetric.Lexical:
                    upgmaGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].LexicalSimilarityScore;
                    break;

                case SimilarityMetric.Phonetic:
                    upgmaGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].PhoneticSimilarityScore;
                    break;
                }

                var upgma = new UpgmaClusterer <Variety>(upgmaGetDistance);
                IBidirectionalGraph <Cluster <Variety>, ClusterEdge <Variety> > upgmaTree = upgma.GenerateClusters(_projectService.Project.Varieties);
                return(BuildHierarchicalGraph(upgmaTree));

            case ClusteringMethod.NeighborJoining:
                Func <Variety, Variety, double> njGetDistance = null;
                switch (similarityMetric)
                {
                case SimilarityMetric.Lexical:
                    njGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].LexicalSimilarityScore;
                    break;

                case SimilarityMetric.Phonetic:
                    njGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].PhoneticSimilarityScore;
                    break;
                }
                var nj = new NeighborJoiningClusterer <Variety>(njGetDistance);
                IUndirectedGraph <Cluster <Variety>, ClusterEdge <Variety> > njTree = nj.GenerateClusters(_projectService.Project.Varieties);
                switch (graphType)
                {
                case HierarchicalGraphType.Dendrogram:
                    IBidirectionalGraph <Cluster <Variety>, ClusterEdge <Variety> > rootedTree = njTree.ToRootedTree();
                    return(BuildHierarchicalGraph(rootedTree));

                case HierarchicalGraphType.Tree:
                    return(BuildHierarchicalGraph(njTree));
                }
                break;
            }

            return(null);
        }
Пример #20
0
 // Constructor
 public Searcher(SimilarityMetric similarityMetric)
 {
     _similarityMetric = similarityMetric;
 }
Пример #21
0
        public IBidirectionalGraph<HierarchicalGraphVertex, HierarchicalGraphEdge> GenerateHierarchicalGraph(HierarchicalGraphType graphType,
			ClusteringMethod clusteringMethod, SimilarityMetric similarityMetric)
        {
            switch (clusteringMethod)
            {
                case ClusteringMethod.Upgma:
                    Func<Variety, Variety, double> upgmaGetDistance = null;
                    switch (similarityMetric)
                    {
                        case SimilarityMetric.Lexical:
                            upgmaGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].LexicalSimilarityScore;
                            break;
                        case SimilarityMetric.Phonetic:
                            upgmaGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].PhoneticSimilarityScore;
                            break;
                    }

                    var upgma = new UpgmaClusterer<Variety>(upgmaGetDistance);
                    IBidirectionalGraph<Cluster<Variety>, ClusterEdge<Variety>> upgmaTree = upgma.GenerateClusters(_projectService.Project.Varieties);
                    return BuildHierarchicalGraph(upgmaTree);

                case ClusteringMethod.NeighborJoining:
                    Func<Variety, Variety, double> njGetDistance = null;
                    switch (similarityMetric)
                    {
                        case SimilarityMetric.Lexical:
                            njGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].LexicalSimilarityScore;
                            break;
                        case SimilarityMetric.Phonetic:
                            njGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].PhoneticSimilarityScore;
                            break;
                    }
                    var nj = new NeighborJoiningClusterer<Variety>(njGetDistance);
                    IUndirectedGraph<Cluster<Variety>, ClusterEdge<Variety>> njTree = nj.GenerateClusters(_projectService.Project.Varieties);
                    switch (graphType)
                    {
                        case HierarchicalGraphType.Dendrogram:
                            IBidirectionalGraph<Cluster<Variety>, ClusterEdge<Variety>> rootedTree = njTree.ToRootedTree();
                            return BuildHierarchicalGraph(rootedTree);

                        case HierarchicalGraphType.Tree:
                            return BuildHierarchicalGraph(njTree);
                    }
                    break;
            }

            return null;
        }
Пример #22
0
	    // Constructor
        public ANCSearcher(SimilarityMetric similarityMetric)
        {
		    _similarityMetric = similarityMetric;
	    }
Пример #23
0
 /// <summary>
 /// Creates a similarity matrix by comparing a list of items against themselves.  Thus, the result is a symmetric matrix with a diagonal of the maximum value
 /// for <paramref name="metric"/>.  The values themselves are stored in a lookup dictionary.
 /// </summary>
 /// <param name="itemLookup">Stores the values</param>
 /// <param name="members">The members to compare</param>
 /// <param name="metric">The metric to use</param>
 /// <returns>A similarity matrix</returns>
 public static SimilarityMatrix <T> Compare(Dictionary <string, T> itemLookup, List <string> members, SimilarityMetric <T> metric)
 {
     if (itemLookup.Count == 0 || members.Count == 0)
     {
         return(new SimilarityMatrix <T>(new float[1, 1]));
     }
     T[] items = new T[members.Count];
     for (int i = 0; i < items.Length; i++)
     {
         items[i] = itemLookup[members[i]];
     }
     return(Compare(items, metric));
 }
Пример #24
0
        public void Export(Stream stream, CogProject project, SimilarityMetric similarityMetric)
        {
            using (var writer = new StreamWriter(new NonClosingStreamWrapper(stream)))
            {
                writer.WriteLine("#NEXUS");

                writer.WriteLine("BEGIN Taxa;");
                writer.WriteLine("\tDIMENSIONS NTax={0};", project.Varieties.Count);
                writer.Write("\tTAXLABELS");

                int maxNameLen = 0;
                foreach (Variety variety in project.Varieties)
                {
                    string name = variety.Name.RemoveNexusSpecialChars();
                    maxNameLen = Math.Max(maxNameLen, name.Length);
                    writer.WriteLine();
                    writer.Write("\t\t{0}", name);
                }

                writer.WriteLine(";");
                writer.WriteLine("END;");

                writer.WriteLine("BEGIN Distances;");
                writer.WriteLine("\tDIMENSIONS NTax={0};", project.Varieties.Count);
                writer.WriteLine("\tFORMAT Triangle=LOWER Diagonal Labels Missing=?;");
                writer.Write("\tMATRIX");

                for (int i = 0; i < project.Varieties.Count; i++)
                {
                    Variety variety1 = project.Varieties[i];
                    string  name     = variety1.Name.RemoveNexusSpecialChars();
                    writer.WriteLine();
                    writer.Write("\t\t{0}{1} ", name, new string(' ', maxNameLen - name.Length));
                    for (int j = 0; j <= i; j++)
                    {
                        if (i == j)
                        {
                            writer.Write("0.00");
                        }
                        else
                        {
                            Variety     variety2 = project.Varieties[j];
                            VarietyPair vp       = variety1.VarietyPairs[variety2];
                            double      sim;
                            switch (similarityMetric)
                            {
                            case SimilarityMetric.Lexical:
                                sim = vp.LexicalSimilarityScore;
                                break;

                            case SimilarityMetric.Phonetic:
                                sim = vp.PhoneticSimilarityScore;
                                break;

                            default:
                                throw new InvalidEnumArgumentException();
                            }

                            writer.Write("{0:0.00} ", 1.0 - sim);
                        }
                    }
                }

                writer.WriteLine(";");
                writer.WriteLine("END;");
            }
        }
Пример #25
0
 public NetworkGraphEdge(NetworkGraphVertex source, NetworkGraphVertex target, VarietyPair varietyPair, SimilarityMetric similarityMetric)
     : base(source, target)
 {
     _weight = similarityMetric == SimilarityMetric.Lexical ? varietyPair.LexicalSimilarityScore : varietyPair.PhoneticSimilarityScore;
 }
Пример #26
0
        public IBidirectionalGraph<NetworkGraphVertex, NetworkGraphEdge> GenerateNetworkGraph(SimilarityMetric similarityMetric)
        {
            var graph = new BidirectionalGraph<NetworkGraphVertex, NetworkGraphEdge>();
            var dict = new Dictionary<Variety, NetworkGraphVertex>();
            foreach (Variety variety in _projectService.Project.Varieties)
            {
                var vertex = new NetworkGraphVertex(variety);
                graph.AddVertex(vertex);
                dict[variety] = vertex;
            }
            foreach (VarietyPair pair in _projectService.Project.VarietyPairs)
                graph.AddEdge(new NetworkGraphEdge(dict[pair.Variety1], dict[pair.Variety2], pair, similarityMetric));

            return graph;
        }