From 6824f5f93285af40c692a089cc14f8e8dce7d158 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A6=8F=E6=9D=A5Lee?= Date: Tue, 9 Mar 2021 07:21:56 +0800 Subject: [PATCH] =?UTF-8?q?add=20=E7=AC=AC=E4=BA=8C=E6=9C=9F=E8=AE=AD?= =?UTF-8?q?=E7=BB=83=E8=90=A5/3=E7=8F=AD/3=E7=8F=AD=5F=E6=9D=8E=E6=B3=8A/?= =?UTF-8?q?=E7=AC=AC=E5=8D=81=E5=91=A8/=E7=AC=AC=E4=B8=89=E8=8A=82/setting?= =?UTF-8?q?s.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../settings.py" | 103 ++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\346\235\216\346\263\212/\347\254\254\345\215\201\345\221\250/\347\254\254\344\270\211\350\212\202/settings.py" diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\346\235\216\346\263\212/\347\254\254\345\215\201\345\221\250/\347\254\254\344\270\211\350\212\202/settings.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\346\235\216\346\263\212/\347\254\254\345\215\201\345\221\250/\347\254\254\344\270\211\350\212\202/settings.py" new file mode 100644 index 00000000..875dba2b --- /dev/null +++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\346\235\216\346\263\212/\347\254\254\345\215\201\345\221\250/\347\254\254\344\270\211\350\212\202/settings.py" @@ -0,0 +1,103 @@ +# Scrapy settings for jd_crawler_scrapy project +# +# For simplicity, this file contains only settings considered important or +# commonly used. You can find more settings consulting the documentation: +# +# https://docs.scrapy.org/en/latest/topics/settings.html +# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html +# https://docs.scrapy.org/en/latest/topics/spider-middleware.html + +BOT_NAME = 'jd_crawler_scrapy' + +SPIDER_MODULES = ['jd_crawler_scrapy.spiders'] +NEWSPIDER_MODULE = 'jd_crawler_scrapy.spiders' + + +# Crawl responsibly by identifying yourself (and your website) on the user-agent +#USER_AGENT = 'jd_crawler_scrapy (+http://www.yourdomain.com)' + +# Obey robots.txt rules +# ROBOTSTXT_OBEY = False + +# Configure maximum concurrent requests performed by Scrapy (default: 16) +#CONCURRENT_REQUESTS = 32 + +# Configure a delay for requests for the same website (default: 0) +# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay +# See also autothrottle settings and docs +#DOWNLOAD_DELAY = 3 +# The download delay setting will honor only one of: +#CONCURRENT_REQUESTS_PER_DOMAIN = 16 +#CONCURRENT_REQUESTS_PER_IP = 16 + +# Disable cookies (enabled by default) +#COOKIES_ENABLED = False + +# Disable Telnet Console (enabled by default) +#TELNETCONSOLE_ENABLED = False + +# Override the default request headers: +#DEFAULT_REQUEST_HEADERS = { +# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', +# 'Accept-Language': 'en', +#} + +# Enable or disable spider middlewares +# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html +#SPIDER_MIDDLEWARES = { +# 'jd_crawler_scrapy.middlewares.JdCrawlerScrapySpiderMiddleware': 543, +#} + +# Enable or disable downloader middlewares +# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html +DOWNLOADER_MIDDLEWARES = { + # 'jd_crawler_scrapy.middlewares.JdCrawlerScrapyDownloaderMiddleware': 543, + 'jd_crawler_scrapy.middlewares.UAMiddleware': 100, + 'jd_crawler_scrapy.middlewares.ProxyMiddleware': 200, + 'jd_crawler_scrapy.middlewares.ProcessException': 300, +} + +# Enable or disable extensions +# See https://docs.scrapy.org/en/latest/topics/extensions.html +#EXTENSIONS = { +# 'scrapy.extensions.telnet.TelnetConsole': None, +#} + +# Configure item pipelines +# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html +ITEM_PIPELINES = { + 'jd_crawler_scrapy.pipelines.JdCrawlerScrapyPipeline': 300, +} + +# Enable and configure the AutoThrottle extension (disabled by default) +# See https://docs.scrapy.org/en/latest/topics/autothrottle.html +#AUTOTHROTTLE_ENABLED = True +# The initial download delay +#AUTOTHROTTLE_START_DELAY = 5 +# The maximum download delay to be set in case of high latencies +#AUTOTHROTTLE_MAX_DELAY = 60 +# The average number of requests Scrapy should be sending in parallel to +# each remote server +#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 +# Enable showing throttling stats for every response received: +#AUTOTHROTTLE_DEBUG = False + +# Enable and configure HTTP caching (disabled by default) +# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings +#HTTPCACHE_ENABLED = True +#HTTPCACHE_EXPIRATION_SECS = 0 +#HTTPCACHE_DIR = 'httpcache' +#HTTPCACHE_IGNORE_HTTP_CODES = [] +#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' + +#MYSQL CONF +MYSQL_CONF = { + "host": "127.0.0.1", + "user": "root", + "password": "root", + "db": "tunan_class" +} + +#LOG +LOG_FILE = "../jd_crawler_scrapy/log/jd_search.log" +LOG_LEVEL = "DEBUG" \ No newline at end of file -- Gitee