From e11901673e79fc6de74db81c8eb4e00940978bd8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=8C=97=E8=8C=97=E6=98=AF=E5=8F=AA=E5=81=87=E9=B8=BD?=
 =?UTF-8?q?=E5=AD=90?= <471167673@qq.com>
Date: Fri, 5 Mar 2021 02:24:55 +0800
Subject: [PATCH 1/6] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20week10?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../week10/.keep"                                                 | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/.keep"

diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/.keep" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/.keep"
new file mode 100644
index 00000000..e69de29b
-- 
Gitee


From af71f90eaf5b834c5cdd5e4a491f5d5d4a5af0ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=8C=97=E8=8C=97=E6=98=AF=E5=8F=AA=E5=81=87=E9=B8=BD?=
 =?UTF-8?q?=E5=AD=90?= <471167673@qq.com>
Date: Fri, 5 Mar 2021 02:25:41 +0800
Subject: [PATCH 2/6] =?UTF-8?q?=E7=AC=AC=E5=8D=81=E5=91=A8-=E7=AC=AC?=
 =?UTF-8?q?=E4=B8=80=E8=8A=82-=E4=BD=9C=E4=B8=9A=E6=8F=90=E4=BA=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../week10/parser_test.py"                    | 11 +++++++
 ...34\344\270\232\346\217\220\344\272\244.md" | 33 +++++++++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/parser_test.py"
 create mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/\347\254\254\345\215\201\345\221\250-\347\254\254\344\270\200\350\212\202-\344\275\234\344\270\232\346\217\220\344\272\244.md"

diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/parser_test.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/parser_test.py"
new file mode 100644
index 00000000..70e562ce
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/parser_test.py"
@@ -0,0 +1,11 @@
+import sys
+import os
+sys.path.append(os.getcwd())
+print(sys.path)
+from jd_parser.search import parse_jd_item
+
+
+with open(r"test\search.html", "r", encoding="utf-8") as f:
+    html = f.read()
+    result = parse_jd_item(html)
+    print(result)
\ No newline at end of file
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/\347\254\254\345\215\201\345\221\250-\347\254\254\344\270\200\350\212\202-\344\275\234\344\270\232\346\217\220\344\272\244.md" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/\347\254\254\345\215\201\345\221\250-\347\254\254\344\270\200\350\212\202-\344\275\234\344\270\232\346\217\220\344\272\244.md"
new file mode 100644
index 00000000..94d2f17c
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/\347\254\254\345\215\201\345\221\250-\347\254\254\344\270\200\350\212\202-\344\275\234\344\270\232\346\217\220\344\272\244.md"
@@ -0,0 +1,33 @@
+
+
+# 第十周-第一节-作业提交
+
+- 用命令行启动`jd_crawler`
+
+  1. ```
+     import sys
+     print(sys.path)
+     sys.path.append(r"D:\pycharm\PycharmProjects\homework")
+     from jd_crawler.parser.search import parse_jd_item
+     from jd_crawler.settings import MYSQL_CONF, HEADERS
+     ```
+
+  2. ```
+     import sys
+     print(sys.path)
+     from jd_parser.search import parse_jd_item
+     from settings import MYSQL_CONF, HEADERS
+     ```
+
+- 执行结果
+
+  ```
+  (venv) D:\pycharm\PycharmProjects\homework\jd_crawler>python main.py
+  ['D:\\pycharm\\PycharmProjects\\homework\\jd_crawler', 'D:\\Python\\python38.zip
+  ', 'D:\\Python\\DLLs', 'D:\\Python\\lib', 'D:\\Python', 'D:\\pycharm\\PycharmPro
+  jects\\homework\\venv', 'D:\\pycharm\\PycharmProjects\\homework\\venv\\lib\\site
+  -packages']
+  done
+  
+  (venv) D:\pycharm\PycharmProjects\homework\jd_crawler>
+  ```
\ No newline at end of file
-- 
Gitee


From c68e9a271ffa14fab09695d5a2e39ba4ccd3a46f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=8C=97=E8=8C=97=E6=98=AF=E5=8F=AA=E5=81=87=E9=B8=BD?=
 =?UTF-8?q?=E5=AD=90?= <471167673@qq.com>
Date: Sun, 7 Mar 2021 20:01:01 +0800
Subject: [PATCH 3/6] =?UTF-8?q?=E7=AC=AC=E5=8D=81=E5=91=A8-=E7=AC=AC?=
 =?UTF-8?q?=E4=BA=8C=E8=8A=82-=E4=BD=9C=E4=B8=9A=E6=8F=90=E4=BA=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../week10/jd_search.py"                      | 17 ++++++
 ...34\344\270\232\346\217\220\344\272\244.md" | 53 +++++++++++++++++++
 2 files changed, 70 insertions(+)
 create mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/jd_search.py"
 create mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/\347\254\254\345\205\253\345\221\250-\347\254\254\344\272\214\350\212\202-\344\275\234\344\270\232\346\217\220\344\272\244.md"

diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/jd_search.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/jd_search.py"
new file mode 100644
index 00000000..4d7ecb09
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/jd_search.py"
@@ -0,0 +1,17 @@
+import scrapy
+
+class JDSearch(scrapy.Spider):
+    name = "jd_search"
+
+    def start_requests(self):
+        for keyword in ["鼠标", "键盘", "显卡", "耳机"]:
+            for page_num in range(1, 11):
+                url = f"https://search.jd.com/Search?keyword={keyword}&page={page_num}"
+                yield scrapy.FormRequest(
+                    url=url,
+                    method='GET',
+                    callback=self.parse_search
+                )
+
+    def parse_search(self, response):
+        print(response)
\ No newline at end of file
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/\347\254\254\345\205\253\345\221\250-\347\254\254\344\272\214\350\212\202-\344\275\234\344\270\232\346\217\220\344\272\244.md" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/\347\254\254\345\205\253\345\221\250-\347\254\254\344\272\214\350\212\202-\344\275\234\344\270\232\346\217\220\344\272\244.md"
new file mode 100644
index 00000000..a36536c9
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/\347\254\254\345\205\253\345\221\250-\347\254\254\344\272\214\350\212\202-\344\275\234\344\270\232\346\217\220\344\272\244.md"
@@ -0,0 +1,53 @@
+# 第八周-第二节-作业提交
+
+- 用户变量
+
+  ```
+  SET @yesterday = DATE_FORMAT(SUBDATE(CURRENT_DATE,1),"%Y/%m/%d");
+  SELECT @yesterday;
+  ```
+
+![](./images/8-2-1.png)
+
+- 存储过程
+
+  ```
+  CREATE PROCEDURE employees(days INT)
+  BEGIN
+  	DECLARE t_date VARCHAR(255);
+  	SET t_date = DATE_FORMAT(SUBDATE(CURRENT_DATE,1),"%Y/%m/%d");
+  	SELECT department AS 部门,
+  	COUNT(*) AS 部门人数,
+  	FORMAT(AVG(salary),2) AS 平均薪资,
+  	FORMAT(AVG(working_year),2) AS 平均工作年限,
+  	SUM(salary) AS 整体薪资水平,
+  	MAX(salary) AS 最高薪资,
+  	MIN(salary) AS 最低薪资
+  	FROM employees 
+  	WHERE e_day = t_date
+  	GROUP BY department;
+  END
+  ```
+
+  ![](./images/8-2-2.png)
+
+- 触发器
+
+  ```
+  CREATE TRIGGER check_dep
+  BEFORE INSERT
+  ON employees
+  FOR EACH ROW
+  BEGIN
+  	IF new.department NOT IN ('finance', 'sevice', 'engineering') THEN
+  	SET new.department = 'unknow';
+  	END IF;
+  END
+  ```
+
+  ```
+  INSERT INTO employees(id, name, salary, department, working_year,e_day) VALUES (9,'I',15000,'digital',5.0,'2021/02/05')
+  ```
+
+  ![](./images/8-2-3.png)
+
-- 
Gitee


From 80b31c20bbccec6e3f484923555aa8888548f1d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=8C=97=E8=8C=97=E6=98=AF=E5=8F=AA=E5=81=87=E9=B8=BD?=
 =?UTF-8?q?=E5=AD=90?= <471167673@qq.com>
Date: Sun, 7 Mar 2021 23:45:05 +0800
Subject: [PATCH 4/6] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20lesson3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../week10/lesson3/.keep"                                         | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/.keep"

diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/.keep" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/.keep"
new file mode 100644
index 00000000..e69de29b
-- 
Gitee


From c87e8ce10b90e4886778dfe0f8e788bbffc29814 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=8C=97=E8=8C=97=E6=98=AF=E5=8F=AA=E5=81=87=E9=B8=BD?=
 =?UTF-8?q?=E5=AD=90?= <471167673@qq.com>
Date: Sun, 7 Mar 2021 23:49:14 +0800
Subject: [PATCH 5/6] =?UTF-8?q?=E7=AC=AC=E5=8D=81=E5=91=A8-=E7=AC=AC?=
 =?UTF-8?q?=E4=B8=89=E8=8A=82-=E4=BD=9C=E4=B8=9A=E6=8F=90=E4=BA=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../week10/lesson3/items.py"                  |  17 +++
 .../week10/lesson3/jd_search.py"              |  51 +++++++++
 .../week10/lesson3/middlewares.py"            | 108 ++++++++++++++++++
 .../week10/lesson3/pipelines.py"              |  30 +++++
 .../week10/lesson3/run.py"                    |   4 +
 .../week10/lesson3/settings.py"               | 101 ++++++++++++++++
 6 files changed, 311 insertions(+)
 create mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/items.py"
 create mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/jd_search.py"
 create mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/middlewares.py"
 create mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/pipelines.py"
 create mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/run.py"
 create mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/settings.py"

diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/items.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/items.py"
new file mode 100644
index 00000000..80a1c44d
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/items.py"
@@ -0,0 +1,17 @@
+# Define here the models for your scraped items
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/items.html
+
+import scrapy
+
+
+class JdCrawlerScrapyItem(scrapy.Item):
+    # define the fields for your item here like:
+    # name = scrapy.Field()
+    sku_id = scrapy.Field()
+    img = scrapy.Field()
+    price = scrapy.Field()
+    title = scrapy.Field()
+    shop = scrapy.Field()
+    icons = scrapy.Field()
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/jd_search.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/jd_search.py"
new file mode 100644
index 00000000..9fba74e7
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/jd_search.py"
@@ -0,0 +1,51 @@
+import scrapy
+import json
+from bs4 import BeautifulSoup
+from jd_crawler_scrapy.items import JdCrawlerScrapyItem
+
+
+class JdSearch(scrapy.Spider):
+    name = "jd_search"
+
+    def start_requests(self):
+        for keyword in ["鼠标", "键盘", "显卡", "耳机"]:
+            for page_num in range(1, 11):
+                url = f"https://search.jd.com/Search?keyword={keyword}&page={page_num}"
+                yield scrapy.FormRequest(
+                    url=url,
+                    method='GET',
+                    callback=self.parse_search
+                )
+            break
+
+    def parse_search(self, response):
+        print(response)
+        soup = BeautifulSoup(response.text, "lxml")
+        item_array = soup.select("ul[class = 'gl-warp clearfix'] li[class='gl-item']")
+
+        for item in item_array:
+            try:
+                sku_id = item.attrs["data-sku"]
+                img = item.select("img[data-img='1']")
+                price = item.select("div[class='p-price']")
+                title = item.select("div[class='p-name p-name-type-2']")
+                shop = item.select("div[class='p-shop']")
+                icons = item.select("div[class='p-icons']")
+
+                img = img[0].attrs['data-lazy-img'] if img else ""
+                price = price[0].strong.i.text if price else ""
+                title = title[0].text.strip() if title else ""
+                shop = shop[0].span.a.attrs['title'] if shop[0].text.strip() else ""
+                icons = json.dumps([tag_ele.text for tag_ele in icons[0].select("i")]) if icons else ""
+
+                item = JdCrawlerScrapyItem()
+                item["sku_id"] = sku_id
+                item["price"] = price
+                item["shop"] = shop
+                item["img"] = img
+                item["title"] = title
+                item["icons"] = icons
+                yield item
+
+            except Exception as e:
+                print(e.args)
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/middlewares.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/middlewares.py"
new file mode 100644
index 00000000..83d842ea
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/middlewares.py"
@@ -0,0 +1,108 @@
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+from scrapy import signals
+
+# useful for handling different item types with a single interface
+from itemadapter import is_item, ItemAdapter
+
+
+class JdCrawlerScrapySpiderMiddleware:
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the spider middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_spider_input(self, response, spider):
+        # Called for each response that goes through the spider
+        # middleware and into the spider.
+
+        # Should return None or raise an exception.
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        # Called with the results returned from the Spider, after
+        # it has processed the response.
+
+        # Must return an iterable of Request, or item objects.
+        for i in result:
+            yield i
+
+    def process_spider_exception(self, response, exception, spider):
+        # Called when a spider or process_spider_input() method
+        # (from other spider middleware) raises an exception.
+
+        # Should return either None or an iterable of Request or item objects.
+        pass
+
+    def process_start_requests(self, start_requests, spider):
+        # Called with the start requests of the spider, and works
+        # similarly to the process_spider_output() method, except
+        # that it doesn’t have a response associated.
+
+        # Must return only requests (not items).
+        for r in start_requests:
+            yield r
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+
+class JdCrawlerScrapyDownloaderMiddleware:
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the downloader middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_request(self, request, spider):
+        # Called for each request that goes through the downloader
+        # middleware.
+
+        # Must either:
+        # - return None: continue processing this request
+        # - or return a Response object
+        # - or return a Request object
+        # - or raise IgnoreRequest: process_exception() methods of
+        #   installed downloader middleware will be called
+        return None
+
+    def process_response(self, request, response, spider):
+        # Called with the response returned from the downloader.
+
+        # Must either;
+        # - return a Response object
+        # - return a Request object
+        # - or raise IgnoreRequest
+        return response
+
+    def process_exception(self, request, exception, spider):
+        # Called when a download handler or a process_request()
+        # (from other downloader middleware) raises an exception.
+
+        # Must either:
+        # - return None: continue processing this exception
+        # - return a Response object: stops process_exception() chain
+        # - return a Request object: stops process_exception() chain
+        pass
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+class UAMiddleware:
+    def process_request(self, request, spider):
+        request.headers["user-agent"] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36 Edg/88.0.705.81"
+
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/pipelines.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/pipelines.py"
new file mode 100644
index 00000000..053613b9
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/pipelines.py"
@@ -0,0 +1,30 @@
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+
+
+# useful for handling different item types with a single interface
+from itemadapter import ItemAdapter
+from jd_crawler_scrapy.items import JdCrawlerScrapyItem
+import pymysql
+
+
+class JdCrawlerScrapyPipeline:
+    def __init__(self):
+        self.mysql_con = None
+
+    def process_item(self, item, spider):
+        if not self.mysql_con:
+            self.mysql_con = pymysql.connect(**spider.settings["MYSQL_CONF"])
+
+        if isinstance(item, JdCrawlerScrapyItem):
+            cursor = self.mysql_con.cursor()
+            SQL = """insert into jd_search(sku_id, img, price, title, shop, icons) 
+             values ('{}', '{}', '{}' , '{}', '{}', '{}')""".format(
+                item['sku_id'], item['img'], item['price'], item['title'], item['shop'], item['icons']
+            )
+            cursor.execute(SQL)
+            self.mysql_con.commit()
+            cursor.close()
+        return item
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/run.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/run.py"
new file mode 100644
index 00000000..81ab4335
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/run.py"
@@ -0,0 +1,4 @@
+from scrapy import cmdline
+
+command = "scrapy crawl jd_search".split()
+cmdline.execute(command)
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/settings.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/settings.py"
new file mode 100644
index 00000000..67f27b4b
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/settings.py"
@@ -0,0 +1,101 @@
+# Scrapy settings for jd_crawler_scrapy project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+#     https://docs.scrapy.org/en/latest/topics/settings.html
+#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+BOT_NAME = 'jd_crawler_scrapy'
+
+SPIDER_MODULES = ['jd_crawler_scrapy.spiders']
+NEWSPIDER_MODULE = 'jd_crawler_scrapy.spiders'
+
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+#USER_AGENT = 'jd_crawler_scrapy (+http://www.yourdomain.com)'
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = True
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+#CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+#DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+#CONCURRENT_REQUESTS_PER_DOMAIN = 16
+#CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+#COOKIES_ENABLED = False
+
+# Disable Telnet Console (enabled by default)
+#TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+#DEFAULT_REQUEST_HEADERS = {
+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+#   'Accept-Language': 'en',
+#}
+
+# Enable or disable spider middlewares
+# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+#SPIDER_MIDDLEWARES = {
+#    'jd_crawler_scrapy.middlewares.JdCrawlerScrapySpiderMiddleware': 543,
+#}
+
+# Enable or disable downloader middlewares
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+DOWNLOADER_MIDDLEWARES = {
+   # 'jd_crawler_scrapy.middlewares.JdCrawlerScrapyDownloaderMiddleware': 543,
+    'jd_crawler_scrapy.middlewares.JdCrawlerScrapyDownloaderMiddleware': 100,
+
+}
+
+# Enable or disable extensions
+# See https://docs.scrapy.org/en/latest/topics/extensions.html
+#EXTENSIONS = {
+#    'scrapy.extensions.telnet.TelnetConsole': None,
+#}
+
+# Configure item pipelines
+# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+ITEM_PIPELINES = {
+   'jd_crawler_scrapy.pipelines.JdCrawlerScrapyPipeline': 300,
+}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
+#AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+#AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+#AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+#AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+#HTTPCACHE_ENABLED = True
+#HTTPCACHE_EXPIRATION_SECS = 0
+#HTTPCACHE_DIR = 'httpcache'
+#HTTPCACHE_IGNORE_HTTP_CODES = []
+#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
+
+
+MYSQL_CONF = {
+    "host": "127.0.0.1",
+    "user": "root",
+    "password": "xmt66237029",
+    "db": "week9"
+}
+
+LOG_FILE = "D:/log/jd_search.log"
+LOG_LEVEL = "DEBUG"
\ No newline at end of file
-- 
Gitee


From b3c964b53a8cd7c721a1c92be1be0d7230828546 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=8C=97=E8=8C=97=E6=98=AF=E5=8F=AA=E5=81=87=E9=B8=BD?=
 =?UTF-8?q?=E5=AD=90?= <471167673@qq.com>
Date: Mon, 8 Mar 2021 23:12:05 +0800
Subject: [PATCH 6/6] =?UTF-8?q?update=20=E7=AC=AC=E4=BA=8C=E6=9C=9F?=
 =?UTF-8?q?=E8=AE=AD=E7=BB=83=E8=90=A5/3=E7=8F=AD/3=E7=8F=AD=5F=E8=8C=97?=
 =?UTF-8?q?=E8=8C=97=E6=98=AF=E5=8F=AA=E5=81=87=E9=B8=BD=E5=AD=90/week10/l?=
 =?UTF-8?q?esson3/settings.py.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../week10/lesson3/settings.py"                                 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/settings.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/settings.py"
index 67f27b4b..fe25e2cb 100644
--- "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/settings.py"
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\350\214\227\350\214\227\346\230\257\345\217\252\345\201\207\351\270\275\345\255\220/week10/lesson3/settings.py"
@@ -52,7 +52,7 @@ ROBOTSTXT_OBEY = True
 # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
 DOWNLOADER_MIDDLEWARES = {
    # 'jd_crawler_scrapy.middlewares.JdCrawlerScrapyDownloaderMiddleware': 543,
-    'jd_crawler_scrapy.middlewares.JdCrawlerScrapyDownloaderMiddleware': 100,
+    'jd_crawler_scrapy.middlewares.UAMiddleware': 100,
 
 }
 
-- 
Gitee