diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\346\235\216\346\263\212/\347\254\254\345\215\201\344\272\214\345\221\250/\347\254\254\344\272\214\350\212\202/jd_search.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\346\235\216\346\263\212/\347\254\254\345\215\201\344\272\214\345\221\250/\347\254\254\344\272\214\350\212\202/jd_search.py" new file mode 100644 index 0000000000000000000000000000000000000000..b289f41eb86fd78a8141f992559b55dff9e46148 --- /dev/null +++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/3\347\217\255/3\347\217\255_\346\235\216\346\263\212/\347\254\254\345\215\201\344\272\214\345\221\250/\347\254\254\344\272\214\350\212\202/jd_search.py" @@ -0,0 +1,53 @@ +from selenium import webdriver +import time +from bs4 import BeautifulSoup +import json +driver_path = "/Users/booboomx/Documents/chromedriver" + +chrome_options = webdriver.ChromeOptions() +chrome_options.add_argument("disable-blink-features=AutomationControlled") +#browser = webdriver.Chrome(executable_path='/Users/booboomx/Documents/chromedrive', options=chrome_options) +browser=webdriver.Chrome() +def parse_jd_item(html): + result = [] + + soup = BeautifulSoup(html, "lxml") + item_array = soup.select("ul[class='gl-warp clearfix'] li[class='gl-item']") + for item in item_array: + sku_id = item.attrs["data-sku"] + img = item.select("img[data-img='1']") + price = item.select("div[class='p-price']") + title = item.select("div[class='p-name p-name-type-2']") + shop = item.select("div[class='p-shop']") + icons = item.select("div[class='p-icons']") + + img = img[0].attrs['data-lazy-img'] if img else "" + price = price[0].strong.i.text if price else "" + title = title[0].text.strip() if title else "" + shop = shop[0].span.a.attrs['title'] if shop[0].text.strip() else "" + icons = json.dumps([tag_ele.text for tag_ele in icons[0].select("i")]) if icons else '[]' + + result.append((sku_id, img, price, title, shop, icons)) + + return result + +class whole: + def sim_search(keyword, url): + browser.get(url) + search_input = browser.find_element_by_css_selector('input[aria-label="搜索"]') + search_input.send_keys("鼠标") + search_button = browser.find_element_by_css_selector('button[aria-label="搜索"]') + search_button.click() + + + + def main(keyword, url): + whole.sim_search(keyword, url) + time.sleep(3) + item_array = item_array = parse_jd_item(browser.page_source) + print(item_array) + browser.close() + +if __name__ == "__main__": + jd_url = "https://www.jd.com/" + whole.main("鼠标", jd_url) \ No newline at end of file