diff --git a/china.py b/china.py index 59e5547aa25f59befa5041c50c32ec1edfd84428..6df590bc6bfc635e959984646d775a812d3269ba 100644 --- a/china.py +++ b/china.py @@ -1,6 +1,6 @@ import numpy as np import csv - +from datetime import datetime def load_data(file_path): """ Load repository data and calculate activity metrics @@ -11,7 +11,16 @@ def load_data(file_path): 列:仓库名称,所有者,星标,分支,语言,创建时间,最后提交,描述 返回:形状为(仓库数, 3)的数组,包含[星标数, 分支数, 活跃天数] """ - pass + with open(file_path, 'r', encoding='utf-8') as file: + reader = csv.reader(file) + next(reader) # Skip header + data = [] + for row in reader: + repo_name, owner, stars, forks, language, created_at, last_commit, description = row + stars, forks = int(stars), int(forks) + active_days = (datetime.strptime(last_commit, '%Y-%m-%d') - datetime.strptime(created_at, '%Y-%m-%d')).days + data.append([stars, forks, active_days]) + return np.array(data) def calculate_statistics(data): """ @@ -26,7 +35,16 @@ def calculate_statistics(data): 计算仓库指标统计量 返回:包含平均值、中位数、方差、标准差的字典 """ - pass + means = [np.mean(data[:, i]) for i in range(3)] + medians = [np.median(data[:, i]) for i in range(3)] + variances = [np.var(data[:, i]) for i in range(3)] + stds = [np.std(data[:, i]) for i in range(3)] + return { + 'means': means, + 'medians': medians, + 'variances': variances, + 'stds': stds + } def print_results(stats): """ @@ -34,6 +52,7 @@ def print_results(stats): 按严格格式打印结果,保持正确缩进 """ + metrics = ['Stars', 'Forks', 'Active Days'] for metric, mean, med, var, std in zip(metrics, stats['means'], diff --git a/liujiahui/main.py b/liujiahui/main.py new file mode 100644 index 0000000000000000000000000000000000000000..ab4f27be2705b155422f9876949a05a4c5e5507e --- /dev/null +++ b/liujiahui/main.py @@ -0,0 +1 @@ +print("DA402") \ No newline at end of file diff --git a/pakistan.py b/pakistan.py index f2720fce6bdb14803f8a317c60347ad58f19d43d..be8d0620efe56ab806a0277527f8b8bb14bf2218 100644 --- a/pakistan.py +++ b/pakistan.py @@ -1,17 +1,33 @@ import numpy as np import csv + def load_data(file_path): """ Load repository data and calculate activity metrics Columns: repo_name,owner,stars,forks,language,created_at,last_commit,description Return: 2D NumPy array of shape (repos, 3) containing [stars, forks, active_days] - + 加载仓库数据并计算活跃天数 列:仓库名称,所有者,星标,分支,语言,创建时间,最后提交,描述 返回:形状为(仓库数, 3)的数组,包含[星标数, 分支数, 活跃天数] """ - pass + # 读取CSV文件 + with open(file_path, mode='r', encoding='utf-8') as file: + reader = csv.DictReader(file) + data = [] + for row in reader: + # 提取星标数、分支数、创建时间和最后提交时间 + stars = int(row['stars']) + forks = int(row['forks']) + created_at = np.datetime64(row['created_at']) + last_commit = np.datetime64(row['last_commit']) + # 计算活跃天数 + active_days = (last_commit - created_at).astype('timedelta64[D]').astype(int) + data.append([stars, forks, active_days]) + # 转换为NumPy数组 + return np.array(data) + def calculate_statistics(data): """ @@ -22,30 +38,44 @@ def calculate_statistics(data): 'variances': [stars_var, forks_var, days_var], 'stds': [stars_std, forks_std, days_std] } - + 计算仓库指标统计量 返回:包含平均值、中位数、方差、标准差的字典 """ - pass + # 使用NumPy函数计算统计量 + means = np.mean(data, axis=0).round(1) + medians = np.median(data, axis=0).round(1) + variances = np.var(data, axis=0).round(1) + stds = np.std(data, axis=0).round(1) + + return { + 'means': means, + 'medians': medians, + 'variances': variances, + 'stds': stds + } + def print_results(stats): """ Print formatted results with proper indentation - + 按严格格式打印结果,保持正确缩进 """ metrics = ['Stars', 'Forks', 'Active Days'] - for metric, mean, med, var, std in zip(metrics, - stats['means'], - stats['medians'], - stats['variances'], - stats['stds']): + for metric, mean, med, var, std in zip(metrics, + stats['means'], + stats['medians'], + stats['variances'], + stats['stds']): print(f"{metric}:") print(f" Average: {mean:.1f}") print(f" Median: {med:.1f}") print(f" Variance: {var:.1f}") print(f" Standard Deviation: {std:.1f}") + +# 主程序 repo_data = load_data('pakistan-repos.csv') stats = calculate_statistics(repo_data) -print_results(stats) +print_results(stats) \ No newline at end of file