From 190cf68428886b45e202bcb7124722475ce25dfa Mon Sep 17 00:00:00 2001 From: maomaodawang88 <15470932+maomaodawang88@user.noreply.gitee.com> Date: Wed, 26 Mar 2025 08:47:08 +0800 Subject: [PATCH 1/4] finish code --- mao.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 mao.py diff --git a/mao.py b/mao.py new file mode 100644 index 0000000..933d3b7 --- /dev/null +++ b/mao.py @@ -0,0 +1,50 @@ +import numpy as np + + +def load_data(file_path): + dtype = [('stars', int), ('forks', int), ('created_at', 'U10'), ('last_commit', 'U10')] + data = np.genfromtxt(file_path, delimiter=',', dtype=dtype, skip_header=1, + usecols=(2, 3, 5, 6), encoding='utf-8') + + created_dt = data['created_at'].astype('datetime64[D]') + last_dt = data['last_commit'].astype('datetime64[D]') + active_days = (last_dt - created_dt).astype('timedelta64[D]').astype(float) + + stars = data['stars'].astype(float) + forks = data['forks'].astype(float) + + return np.column_stack((stars, forks, active_days)) + + +def calculate_statistics(data): + means = np.mean(data, axis=0) + medians = np.median(data, axis=0) + variances = np.var(data, axis=0, ddof=1) + stds = np.std(data, axis=0, ddof=1) + + return { + 'means': [means[0], means[1], means[2]], + 'medians': [medians[0], medians[1], medians[2]], + 'variances': [variances[0], variances[1], variances[2]], + 'stds': [stds[0], stds[1], stds[2]] + } + + +def print_results(stats): + metrics = ['Stars', 'Forks', 'Active Days'] + for metric, mean, med, var, std in zip(metrics, + stats['means'], + stats['medians'], + stats['variances'], + stats['stds']): + print(f"{metric}:") + print(f" Average: {mean:.1f}") + print(f" Median: {med:.1f}") + print(f" Variance: {var:.1f}") + print(f" Standard Deviation: {std:.1f}") + + +# 使用示例 +repo_data = load_data('china-repos.csv') +stats = calculate_statistics(repo_data) +print_results(stats) \ No newline at end of file -- Gitee From 1acf24f01290b16ba93792fd3457372877936695 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=83=AD=E6=B1=9F=E7=9A=93?= <15470878+guo-jiangh@user.noreply.gitee.com> Date: Wed, 26 Mar 2025 09:26:43 +0800 Subject: [PATCH 2/4] Adding Guojianghao's Work --- pakistan.py | 55 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/pakistan.py b/pakistan.py index f2720fc..46f80c9 100644 --- a/pakistan.py +++ b/pakistan.py @@ -1,17 +1,38 @@ import numpy as np import csv +from datetime import datetime + def load_data(file_path): """ Load repository data and calculate activity metrics Columns: repo_name,owner,stars,forks,language,created_at,last_commit,description Return: 2D NumPy array of shape (repos, 3) containing [stars, forks, active_days] - + 加载仓库数据并计算活跃天数 列:仓库名称,所有者,星标,分支,语言,创建时间,最后提交,描述 返回:形状为(仓库数, 3)的数组,包含[星标数, 分支数, 活跃天数] """ - pass + data = [] + + with open(file_path, 'r', encoding='utf-8') as file: + reader = csv.reader(file) + next(reader) # Skip header + + for row in reader: + # Extract relevant columns + stars = int(row[2]) + forks = int(row[3]) + + # Calculate active days (last_commit - created_at) + created_at = datetime.strptime(row[5], '%Y-%m-%d') + last_commit = datetime.strptime(row[6], '%Y-%m-%d') + active_days = (last_commit - created_at).days + + data.append([stars, forks, active_days]) + + return np.array(data) + def calculate_statistics(data): """ @@ -22,30 +43,42 @@ def calculate_statistics(data): 'variances': [stars_var, forks_var, days_var], 'stds': [stars_std, forks_std, days_std] } - + 计算仓库指标统计量 返回:包含平均值、中位数、方差、标准差的字典 """ - pass + means = np.mean(data, axis=0) + medians = np.median(data, axis=0) + variances = np.var(data, axis=0) + stds = np.std(data, axis=0) + + return { + 'means': means, + 'medians': medians, + 'variances': variances, + 'stds': stds + } + def print_results(stats): """ Print formatted results with proper indentation - + 按严格格式打印结果,保持正确缩进 """ metrics = ['Stars', 'Forks', 'Active Days'] - for metric, mean, med, var, std in zip(metrics, - stats['means'], - stats['medians'], - stats['variances'], - stats['stds']): + for metric, mean, med, var, std in zip(metrics, + stats['means'], + stats['medians'], + stats['variances'], + stats['stds']): print(f"{metric}:") print(f" Average: {mean:.1f}") print(f" Median: {med:.1f}") print(f" Variance: {var:.1f}") print(f" Standard Deviation: {std:.1f}") + repo_data = load_data('pakistan-repos.csv') stats = calculate_statistics(repo_data) -print_results(stats) +print_results(stats) \ No newline at end of file -- Gitee From d917ea5362bc8e40a69fbe71b6f26d58b546d921 Mon Sep 17 00:00:00 2001 From: maomaodawang88 <15470932+maomaodawang88@user.noreply.gitee.com> Date: Wed, 26 Mar 2025 09:30:48 +0800 Subject: [PATCH 3/4] Add maohelin work --- china.py | 65 ++++++++++++++++++++++++++++---------------------------- mao.py | 50 ------------------------------------------- 2 files changed, 32 insertions(+), 83 deletions(-) delete mode 100644 mao.py diff --git a/china.py b/china.py index 59e5547..50b97c7 100644 --- a/china.py +++ b/china.py @@ -1,51 +1,50 @@ import numpy as np -import csv + def load_data(file_path): - """ - Load repository data and calculate activity metrics - Columns: repo_name,owner,stars,forks,language,created_at,last_commit,description - Return: 2D NumPy array of shape (repos, 3) containing [stars, forks, active_days] - - 加载仓库数据并计算活跃天数 - 列:仓库名称,所有者,星标,分支,语言,创建时间,最后提交,描述 - 返回:形状为(仓库数, 3)的数组,包含[星标数, 分支数, 活跃天数] - """ - pass + dtype = [('stars', int), ('forks', int), ('created_at', 'U10'), ('last_commit', 'U10')] + data = np.genfromtxt(file_path, delimiter=',', dtype=dtype, skip_header=1, + usecols=(2, 3, 5, 6), encoding='utf-8') + + created_dt = data['created_at'].astype('datetime64[D]') + last_dt = data['last_commit'].astype('datetime64[D]') + active_days = (last_dt - created_dt).astype('timedelta64[D]').astype(float) + + stars = data['stars'].astype(float) + forks = data['forks'].astype(float) + + return np.column_stack((stars, forks, active_days)) + def calculate_statistics(data): - """ - Calculate repository metrics statistics - Return: Dictionary containing { - 'means': [stars_mean, forks_mean, days_mean], - 'medians': [stars_median, forks_median, days_median], - 'variances': [stars_var, forks_var, days_var], - 'stds': [stars_std, forks_std, days_std] + means = np.mean(data, axis=0) + medians = np.median(data, axis=0) + variances = np.var(data, axis=0, ddof=1) + stds = np.std(data, axis=0, ddof=1) + + return { + 'means': [means[0], means[1], means[2]], + 'medians': [medians[0], medians[1], medians[2]], + 'variances': [variances[0], variances[1], variances[2]], + 'stds': [stds[0], stds[1], stds[2]] } - - 计算仓库指标统计量 - 返回:包含平均值、中位数、方差、标准差的字典 - """ - pass + def print_results(stats): - """ - Print formatted results with proper indentation - - 按严格格式打印结果,保持正确缩进 - """ metrics = ['Stars', 'Forks', 'Active Days'] - for metric, mean, med, var, std in zip(metrics, - stats['means'], - stats['medians'], - stats['variances'], - stats['stds']): + for metric, mean, med, var, std in zip(metrics, + stats['means'], + stats['medians'], + stats['variances'], + stats['stds']): print(f"{metric}:") print(f" Average: {mean:.1f}") print(f" Median: {med:.1f}") print(f" Variance: {var:.1f}") print(f" Standard Deviation: {std:.1f}") + +# 使用示例 repo_data = load_data('china-repos.csv') stats = calculate_statistics(repo_data) print_results(stats) \ No newline at end of file diff --git a/mao.py b/mao.py deleted file mode 100644 index 933d3b7..0000000 --- a/mao.py +++ /dev/null @@ -1,50 +0,0 @@ -import numpy as np - - -def load_data(file_path): - dtype = [('stars', int), ('forks', int), ('created_at', 'U10'), ('last_commit', 'U10')] - data = np.genfromtxt(file_path, delimiter=',', dtype=dtype, skip_header=1, - usecols=(2, 3, 5, 6), encoding='utf-8') - - created_dt = data['created_at'].astype('datetime64[D]') - last_dt = data['last_commit'].astype('datetime64[D]') - active_days = (last_dt - created_dt).astype('timedelta64[D]').astype(float) - - stars = data['stars'].astype(float) - forks = data['forks'].astype(float) - - return np.column_stack((stars, forks, active_days)) - - -def calculate_statistics(data): - means = np.mean(data, axis=0) - medians = np.median(data, axis=0) - variances = np.var(data, axis=0, ddof=1) - stds = np.std(data, axis=0, ddof=1) - - return { - 'means': [means[0], means[1], means[2]], - 'medians': [medians[0], medians[1], medians[2]], - 'variances': [variances[0], variances[1], variances[2]], - 'stds': [stds[0], stds[1], stds[2]] - } - - -def print_results(stats): - metrics = ['Stars', 'Forks', 'Active Days'] - for metric, mean, med, var, std in zip(metrics, - stats['means'], - stats['medians'], - stats['variances'], - stats['stds']): - print(f"{metric}:") - print(f" Average: {mean:.1f}") - print(f" Median: {med:.1f}") - print(f" Variance: {var:.1f}") - print(f" Standard Deviation: {std:.1f}") - - -# 使用示例 -repo_data = load_data('china-repos.csv') -stats = calculate_statistics(repo_data) -print_results(stats) \ No newline at end of file -- Gitee From 801e5bc0029116d1dbda8422f9fee590937eb0c7 Mon Sep 17 00:00:00 2001 From: maomaodawang88 <15470932+maomaodawang88@user.noreply.gitee.com> Date: Thu, 3 Apr 2025 15:34:20 +0800 Subject: [PATCH 4/4] Added folder with name maohelin --- main.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 main.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..e69de29 -- Gitee