Exemple #1
0
        public CrawlDocument GetNewRandomUnscannedDocument()
        {
            QueryDocument filter = new QueryDocument();

            filter.Add("Scanned", false);
            filter.Add("ClassName", "File");

            // Not efficient to obtain all collection, but 'files' cooolection shouldn't bee too large
            // http://stackoverflow.com/questions/3975290/produce-a-random-number-in-a-range-using-c-sharp
            Random r        = new Random((int)DateTime.Now.Ticks);
            long   num      = files.Find(filter).Count();
            int    x        = r.Next((int)num);//Max range
            var    allFiles = files.Find(filter).SetSkip(x).SetLimit(1);

            foreach (var file in allFiles)
            {
                CrawlDocument result = new CrawlDocument();
                result.ClassName = "File";
                result.FileId    = file["FileId"].ToString();
                result.Hash      = file["Hash"].ToString();
                result.Path      = file["Path"].ToString();
                result.Scanned   = file["Scanned"].ToBoolean();

                // Check db size is close to maximum
                // FileInfo Fi = new FileInfo(dbPath);
                // long maxsize = 2000*1024*1024;
                // if (Fi.Length > maxsize)
                // return null;

                return(result);
            }

            return(null);
        }
Exemple #2
0
        public List <CrawlDocument> GetFile(string fileId)
        {
            List <CrawlDocument> resultList = new List <CrawlDocument>();

            var           files  = DatabaseMongo.GetCollection <BsonDocument>("files");
            QueryDocument filter = new QueryDocument();

            filter.Add("FileId", fileId);
            filter.Add("ClassName", "File");

            var allFiles = files.Find(filter);

            foreach (BsonDocument file in allFiles)
            {
                CrawlDocument result = new CrawlDocument();
                result.FileId  = file["FileId"].ToString();
                result.Hash    = file["Hash"].ToString();
                result.Path    = file["Path"].ToString();
                result.Scanned = file["Scanned"].ToBoolean();

                resultList.Add(result);
            }

            return(resultList);
        }
Exemple #3
0
        public void TestGetNewRandomUnscannedDocument()
        {
            db.Clear();
            int numRecords = 1000;

            Stopwatch timer = new Stopwatch();

            for (int i = 0; i < numRecords; i++)
            {
                CrawlDocument cd = new CrawlDocument();
                cd.Hash      = Guid.NewGuid().ToString();
                cd.Path      = i.ToString();
                cd.ClassName = "File";
                db.InsertIntoFiles(cd);
            }

            CrawlDocument cd1 = db.GetNewRandomUnscannedDocument();

            CrawlDocument cd2 = db.GetNewRandomUnscannedDocument();

            // Random-selected files from 1000 records should differ
            Assert.IsFalse(cd1.FileId == cd2.FileId);

            timer.Stop();
            // Assume 3ms for each record should be enough
            Assert.IsTrue(timer.ElapsedMilliseconds < 3 * numRecords);
        }
Exemple #4
0
        static void Scan(string dir, string dbName)
        {
            //Открыть папку, выбрать все файлы двг из нее
            string dataDir = @"c:\Data\";

            string[] dwgFiles = Directory.GetFiles(dir, "*.dwg", SearchOption.AllDirectories);
            DbMongo  db       = new DbMongo(dbName);

            foreach (string dwgFile in dwgFiles)
            {
                CrawlDocument cDoc = new CrawlDocument(dwgFile);
                FileCopy(dwgFile, Path.Combine(dataDir, cDoc.FileId + ".dwg"));
                db.InsertIntoFiles(cDoc);
            }

            /*
             * //Запуситить процессы по числу ядер процессоров каждый на своем ядре
             * int numCores = 4;
             * for (int i = 0; i < numCores; i++)
             * {
             *  //crawlinNano();
             *  //http://cplus.about.com/od/learnc/a/multi-threading-using-task-parallel-library.htm
             * Task.Factory.StartNew(() => crawlinNano());
             * //Процесс выбирает из базы случайным образом непросканированный файл и сканирует его в Json
             * //Это пока выполняется вручным запуском нанокадов
             * //Если файл изменился, то записывается его новый hash
             * }
             */
        }
Exemple #5
0
        public void InsertIntoFiles(CrawlDocument crawlDocument)
        {
            BsonDocument doc = crawlDocument.ToBsonDocument();

            var filter    = new QueryDocument("Hash", crawlDocument.Hash);
            var qryResult = files.FindOne(filter);

            // if hash exist - we should skip insertion
            if (qryResult == null)
            {
                // Check hash already exists, if no - insert
                files.Insert(doc);
            }
        }
Exemple #6
0
        public static void Crawl(bool closeAfterComplete = true)
        {
            DbMongo sqlDB = new DbMongo("SingleFile");
            //While Get random dwg from database that not scanned
            CrawlDocument crawlDoc = sqlDB.GetNewRandomUnscannedDocument();

            while (crawlDoc != null)
            {
                crawlAcDbDocument cDoc = new crawlAcDbDocument(crawlDoc);
                cDoc.sqlDB = sqlDB;
                cDoc.ScanDocument();
                crawlDoc = sqlDB.GetNewRandomUnscannedDocument();
            }
            if (closeAfterComplete)
            {
                HostMgd.ApplicationServices.Application.Quit();
            }
        }
Exemple #7
0
        public void TestInsertIntoFiles()
        {
            db.Clear();
            string json1 = @"
            {
	            'ClassName': 'File',
	            'FileId': 'bc6a1669-51ce-444c-94c6-cfec71c0f44d',
	            'Hash': 'd520b80512f226e81dd72294037657fd',
	            'Path': '\\\\FILESERVER\\home\\#АРХИВ 2014\\Объекты\\МНОГОТОПЛИВНАЯ АЗС №15\\задание на фундаменты.dwg',
	            'Scanned': false,
	            '_id': {
		            '$oid': '55a49dfff80dc7180c8228d3'
	            }
            }";
            string json2 = @"
            {
	            'ClassName': 'File',
	            'FileId': '9e2769ff-678f-401b-8d10-e0581aa6bf98',
	            'Hash': '253ffb6063333c5bfc1109c5d7db1945',
	            'Path': '\\\\FILESERVER\\home\\#АРХИВ 2014\\Объекты\\МНОГОТОПЛИВНАЯ АЗС №15\\образец исх данные.dwg',
	            'Scanned': false,
	            '_id': {
		            '$oid': '55a49dfff80dc7180c8228d4'
	            }
            }
            ";

            db.InsertIntoFiles(json1);
            db.InsertIntoFiles(json2);

            Assert.IsTrue(db.HasFileHash("d520b80512f226e81dd72294037657fd"));
            Assert.IsTrue(db.HasFileId("bc6a1669-51ce-444c-94c6-cfec71c0f44d"));
            Assert.IsTrue(db.HasFileHash("253ffb6063333c5bfc1109c5d7db1945"));
            Assert.IsTrue(db.HasFileId("9e2769ff-678f-401b-8d10-e0581aa6bf98"));

            db.Clear();

            Crawl.CrawlDocument cdoc = new CrawlDocument(@"D:\Documents\Desktop\SingleFile\+b3826065-07d1-4d4a-8af4-35ebc3630117.dwg");

            db.InsertIntoFiles(cdoc);

            Assert.IsTrue(db.HasFileHash(cdoc.Hash));
            Assert.IsTrue(db.HasFileId(cdoc.FileId));
        }
Exemple #8
0
        private List <CrawlDocument> GetXrefs(Document aDoc)
        {
            //http://adndevblog.typepad.com/autocad/2012/06/finding-all-xrefs-in-the-current-database-using-cnet.html
            XrefGraph            xGraph   = aDoc.Database.GetHostDwgXrefGraph(false);
            int                  numXrefs = xGraph.NumNodes;
            List <CrawlDocument> result   = new List <CrawlDocument>();

            for (int i = 0; i < numXrefs; i++)
            {
                XrefGraphNode xrefNode = xGraph.GetXrefNode(i);

                if (xrefNode.XrefStatus == XrefStatus.Resolved)
                {
                    //Document theDoc = TeighaApp.DocumentManager.GetDocument(xrefNode.Database);
                    CrawlDocument acDoc = new CrawlDocument(xrefNode.Database.Filename);
                    result.Add(acDoc);
                }
            }
            return(result);
        }
Exemple #9
0
 public crawlAcDbDocument(CrawlDocument crawlDoc)
 {
     this.FullPath       = crawlDoc.Path;
     this.FileId         = crawlDoc.FileId;
     this.teighaDocument = TeighaApp.DocumentManager.Open(Path.Combine(_dataDir, crawlDoc.FileId + ".dwg"));
 }