コード例 #1
0
        public static IIndexDocument FromFile(string file, IIndexDataSource parent)
        {
            string s = file.ToLower();

            if (s.EndsWith(".pdf"))
            {
                // PDF
                return(new IndexPDFDocument(file, parent));
            }
            else
            if (s.EndsWith(".txt"))
            {
                return(new IndexedTextFile(file, parent));
            }
            else
            if ((s.EndsWith(".html")) || (s.EndsWith(".html")))
            {
                using (FileStream fs = File.OpenRead(file))
                {
                    return(WebDataSource.FromHtml(fs, file, parent.Name));
                }
            }

            return(null);
        }
コード例 #2
0
ファイル: XmlDataSource.cs プロジェクト: alexarchen/Docodo
 protected override IIndexDocument DocumentFromItem(Document item)
 {
     // load document
     if (item.ContainsKey("file"))
     {
         string url = Path + item["file"];
         if (!url.Contains("://"))
         {
             return(DocumentsDataSource.FromFile(url, this));
         }
         else
         {
             return(WebDataSource.FromUrl(url, this));
         }
     }
     return(null);
 }
コード例 #3
0
ファイル: DBDataSource.cs プロジェクト: alexarchen/Docodo
        /// Add document from BLOB
        public virtual void AddRecord(string name, Stream stream, string fields, ConcurrentQueue <IIndexDocument> queue)
        {
            bool           isText = false;
            IIndexDocument doc    = null;

            AddRecordBase(name, fields);

            if ((indexType == IndexType.File) || (indexType != IndexType.Blob))
            {
                throw new InvalidDataException("Adding record of wrong IndexType");
            }

            BinaryReader reader = new BinaryReader(stream);

            byte[] buff = new byte[4000];
            reader.Read(buff, 0, 4000);
            String det = Encoding.UTF8.GetString(buff, 0, buff.Length);

            stream.Seek(0, SeekOrigin.Begin);
            reader.Dispose();

            // detect type
            if ((buff[0] == '%') && (buff[1] == 'P') && (buff[2] == 'D') && (buff[3] == 'F'))
            {
                DocumentsDataSource.IndexPDFDocument pdf = new DocumentsDataSource.IndexPDFDocument(name, stream, this);
                if (fields != null)
                {
                    pdf.headers = () => { return(fields); }
                }
                ;
                doc = pdf;
            }
            else
            if (det.Contains("<html"))
            {
                IndexPagedTextFile file = WebDataSource.FromHtml(stream, name, Name);
                if (fields != null)
                {
                    file.SetHeaders(fields);
                }
            }
            else
            {
                // detect charset
                Ude.CharsetDetector detector = new Ude.CharsetDetector();
                detector.Feed(buff, 0, buff.Length);
                detector.DataEnd();
                if (detector.Charset != null)
                {
                    Encoding enc = Portable.Text.Encoding.GetEncoding(detector.Charset);
                    using (StreamReader sreader = new StreamReader(stream, enc, false)) {
                        doc = new IndexPagedTextFile("", sreader.ReadToEnd(), fields != null ? fields : "");
                    }
                }
            }

            if (doc != null)
            {
                Enqueue(queue, doc);
            }
        }
コード例 #4
0
        static void Main(string[] args)
        {
            Console.Write("DOCODO Search Engine\nCopyrigt (c) 2018 Alexey Zakharchenko \n");
            int nPort = 9001;

            try
            {
                nPort = Int32.Parse((from a in args where a.StartsWith("-p:") select a).Last().Substring(3));
            }
            catch (Exception e) { }

            if (args.Contains("server"))
            {
                new DocodoServer(nPort);
            }

            List <Vocab> vocs = new List <Vocab>();

            Console.Write("Loaded vocs: ");
            foreach (string file in Directory.GetFiles("Dict\\", "*.voc"))
            {
                vocs.Add(new Vocab(file));
                Console.Write(file.Substring(file.LastIndexOf("\\") + 1).Split('.')[0] + " ");
            }
            if (vocs.Count == 0)
            {
                Console.Write("No!");
            }
            Console.Write("\n");

            // TODO: create voc command, like -cv:en
            foreach (string crvoc in (from a in args where a.StartsWith("-cv:") select a.Substring(4)))
            {
                CreateVoc(crvoc);
            }

            String basepath = ".";

            try
            {
                basepath = (from a in args where a.StartsWith("-i:") select a).Last().Substring(3);
            }
            catch (Exception e) { }
            ind = new Index(basepath, false, vocs.ToArray <Vocab>());


            foreach (string source in (from a in args where a.StartsWith("-source:") select a.Substring(8)))
            {
                var spl = source.Split(',');
                if (spl[0].Equals("doc"))
                {
                    ind.AddDataSource(new DocumentsDataSource("doc", spl[1]));
                }
                else
                if (spl[0].Equals("web"))
                {
                    WebDataSource websource = new WebDataSource("web", spl[1], spl.Length > 2?spl[2]:"");
//                    websource.MaxItems = 100;
                    ind.AddDataSource(websource);
                }
                else
                if (spl[0].Equals("xml"))
                {
                    XmlDataSource websource = new XmlDataSource("xml", spl[1]);
                    ind.AddDataSource(websource);
                }
                else
                if (spl[0].Equals("mysql"))
                {
                    string Connect   = null;
                    string Query     = null;
                    string FieldName = null;
                    string BasePath  = null;
                    try
                    {
                        foreach (string line in File.ReadAllLines(spl[1]))
                        {
                            string[] name = line.Split("=");
                            if (name[0].Equals("Connect"))
                            {
                                Connect = line.Substring(8);
                            }
                            if (name[0].Equals("Query"))
                            {
                                Query = line.Substring(6);
                            }
                            if (name[0].Equals("BasePath"))
                            {
                                BasePath = line.Substring(9);
                            }
                            if (name[0].Equals("IndexType"))
                            {
                                FieldName = name[1];
                            }
                        }

                        if (Connect == null)
                        {
                            throw new InvalidDataException("No Connect key");
                        }
                        if (Query == null)
                        {
                            throw new InvalidDataException("No Query key");
                        }
                        if (FieldName == null)
                        {
                            throw new InvalidDataException("No IndexType key");
                        }
                        if (BasePath == null)
                        {
                            throw new InvalidDataException("No BasePath key");
                        }
                        ind.AddDataSource(new MySqlDBDocSource("mysql_" + spl[1], BasePath, Connect, Query, DBDataSourceBase.IndexType.File, FieldName));// typeof(DBDataSourceBase.IndexType).GetEnumNames().Where(v=>v.Equals( FieldName.Split(':')[0]).,));
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine("Error adding mysql source: " + e.Message);
                    }
                }
            }

            ind.LoadStopWords("Dict\\stop.txt");
            foreach (string sf in (from a in args
                                   where a.StartsWith("-stops:")
                                   select a.Substring(7)))
            {
                ind.LoadStopWords(sf);
            }



            cancelationToken = ind.cancelationToken; // token to cancel something

            if (ind.CanSearch)
            {
                Console.WriteLine("Index loaded, contains {0} words", ind.Count);
            }

            ConsoleKey c;

            do
            {
                Console.WriteLine("Press " + (ind.CanIndex?"I to index, ":"") + (ind.CanSearch ? " S to search, O for info, " : "") + "V to manage vocs, E to exit...");
                c = Console.ReadKey(false).Key;

                if (c == ConsoleKey.V)
                {
                    while (true)
                    {
                        Console.WriteLine("-----------\nCreate vocabs\nType voc name from list below or e to exit:");
                        foreach (string f in Directory.GetDirectories("Dict\\").Select((s) => s.Substring(s.LastIndexOf('\\') + 1)))
                        {
                            Console.Write(f + ",");
                        }
                        Console.WriteLine("");

                        string line = Console.ReadLine();
                        if (!line.Equals("e"))
                        {
                            CreateVoc(line);
                        }
                        else
                        {
                            break;
                        }
                    }
                }
                if (c == ConsoleKey.O)
                {
                    ShowInfo();
                }
                else
                if (c == ConsoleKey.S)
                {
                    Console.WriteLine("Type text to search, e - exit");
                    Console.Write("req:");
                    Console.InputEncoding = Encoding.Unicode;// Windows1251.GetEncoding();
                    string req;
                    while (!(req = ReadSearchRequest()).Equals("e"))
                    {
                        Index.SearchResult result = ind.Search(req);

                        Console.WriteLine("Found {0} pages in {1} docs:", result.foundPages.Count, result.foundDocs.Count);
                        foreach (var d in result.foundDocs)
                        {
                            Console.WriteLine($"Doc: {d.Name}, Found {d.pages.Count} pages");
                            foreach (var p in d.pages)
                            {
                                Console.WriteLine($"  Page {p.id} ({p.pos.Count} times)");
                                Console.WriteLine("    Text: " + p.text);
                            }
                        }
                        Console.Write("req:");
                    }
                }
                else
                if (c == ConsoleKey.I)
                {
                    Console.WriteLine($"Start Indexing ...");

                    // user input task


                    Task cT = new Task(() =>
                    {
                        do
                        {
                            while (Console.In.Peek() == -1)
                            {
                                Thread.Sleep(200);
                                if (cancelationToken == null)
                                {
                                    break;
                                }
                            }

                            if (cancelationToken != null)
                            {
                                //char bc = (char); //In.Read();

                                if (Console.ReadKey().Key == ConsoleKey.C)
                                {
                                    Console.WriteLine("Indexing was interrupted by user.");
                                    cancelationToken.Cancel();
                                    break;
                                }
                            }
                        }while (cancelationToken != null);
                    });
                    //cT.Start(); // listen console to interrupt
                    try
                    {
                        ind.CreateAsync().Wait();
                    }
                    catch (OperationCanceledException e)
                    {
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine("Error creaing index");
                    }
                    cancelationToken = null;


                    Console.WriteLine("Indexing complited.");
                }
            }while (c != ConsoleKey.E);
        }