コード例 #1
0
ファイル: Analyzer.cs プロジェクト: Naht/FileAnalyzer
        public void ParseDirectory(String path)
        {
            FileList = Directory.EnumerateFiles(path).Select(x => Path.GetFileName(x)).ToList();
            IEnumerable <char> distinctSymbols = FileList.
                                                 Aggregate <String>((x, y) => x + y).
                                                 ToCharArray().
                                                 Distinct().
                                                 Where(x => !Char.IsLetter(x)).
                                                 OrderByDescending(x => x);
            List <string[]> splitNames = FileList.Select(x => x.Split(distinctSymbols.ToArray <char>(), StringSplitOptions.RemoveEmptyEntries)).ToList();
            IEnumerable <IEnumerable <Tuple <string, string> > > splitSingleNames = splitNames.Select(x => x.Select(a => Tuple.Create("", a.ToLower())));
            IEnumerable <IEnumerable <Tuple <string, string> > > splitPairNames   = splitNames.Select(x => x.Zip(x.Skip(1), (a, b) => Tuple.Create(a.ToLower(), b.ToLower())));
            IEnumerable <IEnumerable <Tuple <string, string> > > union            = splitPairNames;//splitSingleNames.Zip(splitPairNames, (a, b) => a.Concat(b));
            HashSet <Tuple <string, string> > vocabSet = new HashSet <Tuple <string, string> >();

            foreach (IEnumerable <Tuple <string, string> > tokens in union)
            {
                foreach (Tuple <string, string> token in tokens)
                {
                    vocabSet.Add(token);
                }
            }

            Vocab       = vocabSet.ToList();
            ParsedNames = union.Select(x => x.Select(y => Vocab.FindIndex(z => z.Item1.ToLower() == y.Item1.ToLower() && z.Item2.ToLower() == y.Item2.ToLower())).ToList()).ToList();
            IEnumerable <double> zeros = Enumerable.Repeat(0.0, Vocab.Count);

            double[] v = ParsedNames
                         .Select(x => zeros.Select((a, i) => x.Contains(i) ? /*IndexLookup.IsNumeric(Vocab[i].Item2)?5.0:*/ 15.0 : 0.0))
                         .Aggregate((x, y) => x.Concat(y))
                         .ToArray();
            Input = CreateMatrix.Dense(Vocab.Count, ParsedNames.Count, v).Transpose().NormalizeRows(2);
        }