diff --git a/Tang-Yijia.md b/Tang-Yijia.md new file mode 100644 index 0000000000000000000000000000000000000000..a46d45257af1dfb393efb8ea85d6b9aa83c29096 --- /dev/null +++ b/Tang-Yijia.md @@ -0,0 +1 @@ +# Tang Yijia \ No newline at end of file diff --git a/china.py b/china.py index 59e5547aa25f59befa5041c50c32ec1edfd84428..828a33f2bdcbd3a0c273e4e39cf75597abd443fe 100644 --- a/china.py +++ b/china.py @@ -1,51 +1,75 @@ import numpy as np import csv + def load_data(file_path): """ Load repository data and calculate activity metrics Columns: repo_name,owner,stars,forks,language,created_at,last_commit,description Return: 2D NumPy array of shape (repos, 3) containing [stars, forks, active_days] - + 加载仓库数据并计算活跃天数 列:仓库名称,所有者,星标,分支,语言,创建时间,最后提交,描述 返回:形状为(仓库数, 3)的数组,包含[星标数, 分支数, 活跃天数] """ - pass + data = [] + with open(file_path, 'r', encoding='utf-8') as file: + reader = csv.reader(file) + next(reader) # 跳过标题行 + for row in reader: + stars = int(row[2]) + forks = int(row[3]) + created_at = np.datetime64(row[5]) + last_commit = np.datetime64(row[6]) + active_days = (last_commit - created_at).astype(float) + data.append([stars, forks, active_days]) + return np.array(data) + def calculate_statistics(data): """ Calculate repository metrics statistics Return: Dictionary containing { 'means': [stars_mean, forks_mean, days_mean], - 'medians': [stars_median, forks_median, days_median], - 'variances': [stars_var, forks_var, days_var], - 'stds': [stars_std, forks_std, days_std] + 'medians': [stars_median, forks_median, days_median], + 'variances': [stars_var, forks_var, days_var], + 'stds': [stars_std, forks_std, days_std] } - + 计算仓库指标统计量 返回:包含平均值、中位数、方差、标准差的字典 """ - pass + means = np.round(np.mean(data, axis=0), 1) + medians = np.round(np.median(data, axis=0), 1) + variances = np.round(np.var(data, axis=0), 1) + stds = np.round(np.std(data, axis=0), 1) + return { + 'means': means, + 'medians': medians, + 'variances': variances, + 'stds': stds + } + def print_results(stats): """ Print formatted results with proper indentation - + 按严格格式打印结果,保持正确缩进 """ metrics = ['Stars', 'Forks', 'Active Days'] - for metric, mean, med, var, std in zip(metrics, - stats['means'], - stats['medians'], - stats['variances'], - stats['stds']): + for metric, mean, med, var, std in zip(metrics, + stats['means'], + stats['medians'], + stats['variances'], + stats['stds']): print(f"{metric}:") print(f" Average: {mean:.1f}") print(f" Median: {med:.1f}") print(f" Variance: {var:.1f}") print(f" Standard Deviation: {std:.1f}") + repo_data = load_data('china-repos.csv') stats = calculate_statistics(repo_data) print_results(stats) \ No newline at end of file diff --git a/tls b/tls new file mode 100644 index 0000000000000000000000000000000000000000..2a3b5572a8b756c39fa9561e4f2402f7a885bf30 --- /dev/null +++ b/tls @@ -0,0 +1 @@ +Tong Longsen \ No newline at end of file