From 2b51fa0e2904f7b853ef89d4f8013538b6f25b51 Mon Sep 17 00:00:00 2001 From: lihuanjun <1157725063@qq.com> Date: Wed, 14 Dec 2022 15:22:46 +0800 Subject: [PATCH 1/6] get ubuntu distribution Closes #I662IE Signed-off-by: lihuanjun <1157725063@qq.com> --- getUbuntuDistribution.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 getUbuntuDistribution.py diff --git a/getUbuntuDistribution.py b/getUbuntuDistribution.py new file mode 100644 index 0000000..9f1bb28 --- /dev/null +++ b/getUbuntuDistribution.py @@ -0,0 +1,28 @@ +import urllib +from lxml import etree + +# 定义一个url +url = "http://old-releases.ubuntu.com/releases/" + +# 定义一个请求头的User-Agent字段,User-Agent的内容可以通过fiddle抓取浏览器访问的url的header中的信息,模拟浏览器访问 +head = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.46' +} + +# 自定义请求头信息 +request = urllib.request.Request(url, headers=head) + +# 通过urlopen访问url,服务器返回response对象 +response = urllib.request.urlopen(request) + +# 读取返回结果 +result = response.read().decode("utf-8") + +# 初始化生成一个XPath解析对象 +html = etree.HTML(result) + +# 获取ul/li/a节点下的内容 +data = html.xpath('//ul/li/a/text()') + +distribution = {"Ubuntu" : data} +print(distribution) -- Gitee From b621dfd62caf6217f6d5b3d805944b03b0ef8e07 Mon Sep 17 00:00:00 2001 From: lihuanjun <1157725063@qq.com> Date: Wed, 14 Dec 2022 20:28:18 +0800 Subject: [PATCH 2/6] update --- getUbuntuDistribution.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/getUbuntuDistribution.py b/getUbuntuDistribution.py index 9f1bb28..0f6611f 100644 --- a/getUbuntuDistribution.py +++ b/getUbuntuDistribution.py @@ -2,7 +2,7 @@ import urllib from lxml import etree # 定义一个url -url = "http://old-releases.ubuntu.com/releases/" +url = "https://releases.ubuntu.com/" # 定义一个请求头的User-Agent字段,User-Agent的内容可以通过fiddle抓取浏览器访问的url的header中的信息,模拟浏览器访问 head = { @@ -21,8 +21,11 @@ result = response.read().decode("utf-8") # 初始化生成一个XPath解析对象 html = etree.HTML(result) -# 获取ul/li/a节点下的内容 -data = html.xpath('//ul/li/a/text()') +# 获取/a节点下的内容 +content = html.xpath('//a[@class="p-link--inverted p-heading--four"]/text()') +data = [] +for i in content: + data.append(i[:-2]) distribution = {"Ubuntu" : data} print(distribution) -- Gitee From 599060a55562c8176692317c0d95162ada7367fa Mon Sep 17 00:00:00 2001 From: Conke Hu Date: Wed, 14 Dec 2022 20:40:21 +0800 Subject: [PATCH 3/6] rename get_linux_distro.py Signed-off-by: Conke Hu --- getUbuntuDistribution.py => get_linux_distro.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename getUbuntuDistribution.py => get_linux_distro.py (100%) diff --git a/getUbuntuDistribution.py b/get_linux_distro.py similarity index 100% rename from getUbuntuDistribution.py rename to get_linux_distro.py -- Gitee From 76e76960f704f8559ca4a1748b1ca28af6cd6268 Mon Sep 17 00:00:00 2001 From: Conke Hu Date: Wed, 14 Dec 2022 20:40:51 +0800 Subject: [PATCH 4/6] remove chinese comments Signed-off-by: Conke Hu --- get_linux_distro.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/get_linux_distro.py b/get_linux_distro.py index 0f6611f..cf1e340 100644 --- a/get_linux_distro.py +++ b/get_linux_distro.py @@ -1,27 +1,20 @@ import urllib from lxml import etree -# 定义一个url url = "https://releases.ubuntu.com/" -# 定义一个请求头的User-Agent字段,User-Agent的内容可以通过fiddle抓取浏览器访问的url的header中的信息,模拟浏览器访问 head = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.46' } -# 自定义请求头信息 request = urllib.request.Request(url, headers=head) -# 通过urlopen访问url,服务器返回response对象 response = urllib.request.urlopen(request) -# 读取返回结果 result = response.read().decode("utf-8") -# 初始化生成一个XPath解析对象 html = etree.HTML(result) -# 获取/a节点下的内容 content = html.xpath('//a[@class="p-link--inverted p-heading--four"]/text()') data = [] for i in content: -- Gitee From b39475103b58c0106ee7f6f444dbe39a1169fd61 Mon Sep 17 00:00:00 2001 From: lihuanjun <1157725063@qq.com> Date: Wed, 14 Dec 2022 21:35:25 +0800 Subject: [PATCH 5/6] position elements based on text --- get_linux_distro.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/get_linux_distro.py b/get_linux_distro.py index cf1e340..47119df 100644 --- a/get_linux_distro.py +++ b/get_linux_distro.py @@ -15,7 +15,7 @@ result = response.read().decode("utf-8") html = etree.HTML(result) -content = html.xpath('//a[@class="p-link--inverted p-heading--four"]/text()') +content = html.xpath('//h4[text()="LTS Releases"]/../../div/ul/li/a/text()') data = [] for i in content: data.append(i[:-2]) -- Gitee From 96a2389ea81772912f7256eca2eec06fb3c9bf5e Mon Sep 17 00:00:00 2001 From: Conke Hu Date: Thu, 15 Dec 2022 00:27:32 +0800 Subject: [PATCH 6/6] fix urllib import issue Signed-off-by: Conke Hu --- get_linux_distro.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/get_linux_distro.py b/get_linux_distro.py index 47119df..97f40ef 100644 --- a/get_linux_distro.py +++ b/get_linux_distro.py @@ -1,4 +1,4 @@ -import urllib +import urllib.request from lxml import etree url = "https://releases.ubuntu.com/" -- Gitee