コード例 #1
0
ファイル: ScoredDocument.cs プロジェクト: mpvyard/resin
 public ScoredDocument(DocumentTableRow tableRow, double score)
 {
     if (tableRow == null)
     {
         throw new ArgumentNullException("document");
     }
     TableRow = tableRow;
     Score    = score;
 }
コード例 #2
0
        public static IDictionary <string, short> ToKeyIndex(this DocumentTableRow document)
        {
            var keys     = document.Fields.Keys.ToList();
            var keyIndex = new Dictionary <string, short>();

            for (int i = 0; i < keys.Count; i++)
            {
                keyIndex.Add(keys[i], (short)i);
            }

            return(keyIndex);
        }
コード例 #3
0
        public void Write(DocumentTableRow document, IWriteSession session)
        {
            var analyzedTerms = _analyzer.AnalyzeDocument(document);

            foreach (var term in analyzedTerms)
            {
                _treeBuilder.Add(term.Field, term.Value, term);
            }

            session.Write(document);

            Log.DebugFormat("analyzed doc ID {0}", document.TableId);
        }
コード例 #4
0
        private IEnumerable <DocumentTableRow> ReadInternal()
        {
            var files   = Directory.GetFiles(_directory, "*.zip", SearchOption.AllDirectories);
            var skipped = 0;
            var took    = 0;

            foreach (var zipFileName in files)
            {
                if (_skip > 0 && skipped++ < _skip)
                {
                    continue;
                }

                if (took == _take)
                {
                    break;
                }

                DocumentTableRow document = null;

                try
                {
                    using (var fileStream = new FileStream(zipFileName, FileMode.Open))
                        using (var zip = new ZipArchive(fileStream, ZipArchiveMode.Read))
                        {
                            ZipArchiveEntry txtFile = null;
                            foreach (var entry in zip.Entries)
                            {
                                if (entry.Name.EndsWith(".txt"))
                                {
                                    txtFile = entry;
                                    break;
                                }
                            }
                            if (txtFile != null)
                            {
                                using (var txtStream = txtFile.Open())
                                    using (var reader = new StreamReader(txtStream))
                                    {
                                        var    title        = reader.ReadLine() + " " + reader.ReadLine();
                                        var    head         = new StringBuilder();
                                        var    couldNotRead = false;
                                        string encoding     = null;

                                        while (true)
                                        {
                                            var line = reader.ReadLine();

                                            if (line == null)
                                            {
                                                couldNotRead = true;
                                                break;
                                            }
                                            else if (line.Contains("*** "))
                                            {
                                                break;
                                            }

                                            if (line.Contains("encoding: ASCII"))
                                            {
                                                encoding = line;
                                            }
                                            else
                                            {
                                                head.Append(" ");
                                                head.Append(line);
                                            }
                                        }

                                        if (encoding == null || couldNotRead)
                                        {
                                            continue;
                                        }

                                        var body = reader.ReadToEnd();

                                        document = new DocumentTableRow(
                                            new List <Field>
                                        {
                                            new Field("title", title),
                                            new Field("head", head),
                                            new Field("body", body),
                                            new Field("uri", zipFileName.Replace(_directory, ""))
                                        });
                                    }
                            }
                        }
                }
                catch (Exception ex)
                {
                    Log.InfoFormat("unreadable file: {0} {1}", zipFileName, ex.Message);
                    continue;
                }

                if (document != null)
                {
                    yield return(document);

                    took++;
                }
            }
        }