From 3695009ea9d06aa3d0ba0b90e752dc8589a0ef53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A6=8F=E6=9D=A5Lee?= Date: Mon, 22 Mar 2021 23:21:45 +0800 Subject: [PATCH] =?UTF-8?q?add=20=E7=AC=AC=E4=BA=8C=E6=9C=9F=E8=AE=AD?= =?UTF-8?q?=E7=BB=83=E8=90=A5/3=E7=8F=AD/3=E7=8F=AD=5F=E6=9D=8E=E6=B3=8A/?= =?UTF-8?q?=E7=AC=AC=E5=8D=81=E4=BA=8C=E5=91=A8/=E7=AC=AC=E4=BA=8C?= =?UTF-8?q?=E8=8A=82/jd=5Fsearch.py.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../jd_search.py" | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\346\235\216\346\263\212/\347\254\254\345\215\201\344\272\214\345\221\250/\347\254\254\344\272\214\350\212\202/jd_search.py" diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\346\235\216\346\263\212/\347\254\254\345\215\201\344\272\214\345\221\250/\347\254\254\344\272\214\350\212\202/jd_search.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\346\235\216\346\263\212/\347\254\254\345\215\201\344\272\214\345\221\250/\347\254\254\344\272\214\350\212\202/jd_search.py" new file mode 100644 index 00000000..b289f41e --- /dev/null +++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\346\235\216\346\263\212/\347\254\254\345\215\201\344\272\214\345\221\250/\347\254\254\344\272\214\350\212\202/jd_search.py" @@ -0,0 +1,53 @@ +from selenium import webdriver +import time +from bs4 import BeautifulSoup +import json +driver_path = "/Users/booboomx/Documents/chromedriver" + +chrome_options = webdriver.ChromeOptions() +chrome_options.add_argument("disable-blink-features=AutomationControlled") +#browser = webdriver.Chrome(executable_path='/Users/booboomx/Documents/chromedrive', options=chrome_options) +browser=webdriver.Chrome() +def parse_jd_item(html): + result = [] + + soup = BeautifulSoup(html, "lxml") + item_array = soup.select("ul[class='gl-warp clearfix'] li[class='gl-item']") + for item in item_array: + sku_id = item.attrs["data-sku"] + img = item.select("img[data-img='1']") + price = item.select("div[class='p-price']") + title = item.select("div[class='p-name p-name-type-2']") + shop = item.select("div[class='p-shop']") + icons = item.select("div[class='p-icons']") + + img = img[0].attrs['data-lazy-img'] if img else "" + price = price[0].strong.i.text if price else "" + title = title[0].text.strip() if title else "" + shop = shop[0].span.a.attrs['title'] if shop[0].text.strip() else "" + icons = json.dumps([tag_ele.text for tag_ele in icons[0].select("i")]) if icons else '[]' + + result.append((sku_id, img, price, title, shop, icons)) + + return result + +class whole: + def sim_search(keyword, url): + browser.get(url) + search_input = browser.find_element_by_css_selector('input[aria-label="搜索"]') + search_input.send_keys("鼠标") + search_button = browser.find_element_by_css_selector('button[aria-label="搜索"]') + search_button.click() + + + + def main(keyword, url): + whole.sim_search(keyword, url) + time.sleep(3) + item_array = item_array = parse_jd_item(browser.page_source) + print(item_array) + browser.close() + +if __name__ == "__main__": + jd_url = "https://www.jd.com/" + whole.main("鼠标", jd_url) \ No newline at end of file -- Gitee