diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/china.py b/china.py index 59e5547aa25f59befa5041c50c32ec1edfd84428..e2387fdc6fcdf061428f53c52b1b7b7b3a13ad4c 100644 --- a/china.py +++ b/china.py @@ -1,51 +1,48 @@ import numpy as np import csv - def load_data(file_path): - """ - Load repository data and calculate activity metrics - Columns: repo_name,owner,stars,forks,language,created_at,last_commit,description - Return: 2D NumPy array of shape (repos, 3) containing [stars, forks, active_days] - - 加载仓库数据并计算活跃天数 - 列:仓库名称,所有者,星标,分支,语言,创建时间,最后提交,描述 - 返回:形状为(仓库数, 3)的数组,包含[星标数, 分支数, 活跃天数] - """ - pass + stars = [] + forks = [] + created_at = [] + last_commit = [] + with open(file_path, newline='', encoding='utf-8') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + stars.append(int(row['stars'])) + forks.append(int(row['forks'])) + created_at.append(np.datetime64(row['created_at'])) + last_commit.append(np.datetime64(row['last_commit'])) + stars = np.array(stars) + forks = np.array(forks) + created_at = np.array(created_at) + last_commit = np.array(last_commit) + + active_days = (last_commit - created_at).astype('timedelta64[D]').astype(int) + return np.column_stack((stars, forks, active_days)) def calculate_statistics(data): - """ - Calculate repository metrics statistics - Return: Dictionary containing { - 'means': [stars_mean, forks_mean, days_mean], - 'medians': [stars_median, forks_median, days_median], - 'variances': [stars_var, forks_var, days_var], - 'stds': [stars_std, forks_std, days_std] + means = np.round(np.mean(data, axis=0), 1) + medians = np.round(np.median(data, axis=0), 1) + variances = np.round(np.var(data, axis=0), 1) + stds = np.round(np.std(data, axis=0), 1) + return { + 'means': means, + 'medians': medians, + 'variances': variances, + 'stds': stds } - - 计算仓库指标统计量 - 返回:包含平均值、中位数、方差、标准差的字典 - """ - pass - def print_results(stats): - """ - Print formatted results with proper indentation - - 按严格格式打印结果,保持正确缩进 - """ metrics = ['Stars', 'Forks', 'Active Days'] - for metric, mean, med, var, std in zip(metrics, - stats['means'], - stats['medians'], - stats['variances'], - stats['stds']): + for metric, mean, med, var, std in zip(metrics, + stats['means'], + stats['medians'], + stats['variances'], + stats['stds']): print(f"{metric}:") print(f" Average: {mean:.1f}") print(f" Median: {med:.1f}") print(f" Variance: {var:.1f}") print(f" Standard Deviation: {std:.1f}") - repo_data = load_data('china-repos.csv') stats = calculate_statistics(repo_data) print_results(stats) \ No newline at end of file diff --git a/hexuanyi/main.py b/hexuanyi/main.py new file mode 100644 index 0000000000000000000000000000000000000000..1a3035fd8b6845858d872dee9c3039ed5bfc85e7 --- /dev/null +++ b/hexuanyi/main.py @@ -0,0 +1 @@ +import gitpython \ No newline at end of file diff --git a/pakistan.py b/pakistan.py index f2720fce6bdb14803f8a317c60347ad58f19d43d..9d08c08dbdb32bf0a6c2d577970a3d5385be9025 100644 --- a/pakistan.py +++ b/pakistan.py @@ -1,51 +1,58 @@ import numpy as np import csv + def load_data(file_path): - """ - Load repository data and calculate activity metrics - Columns: repo_name,owner,stars,forks,language,created_at,last_commit,description - Return: 2D NumPy array of shape (repos, 3) containing [stars, forks, active_days] - - 加载仓库数据并计算活跃天数 - 列:仓库名称,所有者,星标,分支,语言,创建时间,最后提交,描述 - 返回:形状为(仓库数, 3)的数组,包含[星标数, 分支数, 活跃天数] - """ - pass + """加载数据并使用NumPy处理日期""" + # 一次性读取CSV数据 + with open(file_path, 'r', encoding='utf-8') as f: + raw_data = list(csv.reader(f))[1:] # 跳过标题 + + # 转换为NumPy结构化数组 + dt = np.dtype([ + ('stars', 'i4'), + ('forks', 'i4'), + ('created', 'datetime64[D]'), + ('commit', 'datetime64[D]') + ]) + data = np.array( + [(r[2], r[3], r[5], r[6]) for r in raw_data], + dtype=dt + ) + + # 向量化计算活跃天数 + active_days = (data['commit'] - data['created']).astype('timedelta64[D]').astype(float) + + # 组合最终数据集 + return np.column_stack(( + data['stars'], + data['forks'], + active_days + )) + def calculate_statistics(data): - """ - Calculate repository metrics statistics - Return: Dictionary containing { - 'means': [stars_mean, forks_mean, days_mean], - 'medians': [stars_median, forks_median, days_median], - 'variances': [stars_var, forks_var, days_var], - 'stds': [stars_std, forks_std, days_std] + """使用NumPy向量化计算统计量""" + return { + 'means': np.around(np.mean(data, axis=0), 1), + 'medians': np.around(np.median(data, axis=0), 1), + 'variances': np.around(np.var(data, axis=0, ddof=0), 1), + 'stds': np.around(np.std(data, axis=0, ddof=0), 1) } - - 计算仓库指标统计量 - 返回:包含平均值、中位数、方差、标准差的字典 - """ - pass + def print_results(stats): - """ - Print formatted results with proper indentation - - 按严格格式打印结果,保持正确缩进 - """ + """严格遵循缩进格式的输出""" metrics = ['Stars', 'Forks', 'Active Days'] - for metric, mean, med, var, std in zip(metrics, - stats['means'], - stats['medians'], - stats['variances'], - stats['stds']): - print(f"{metric}:") + for name, mean, med, var, std in zip(metrics, *stats.values()): + print(f"{name}:") print(f" Average: {mean:.1f}") print(f" Median: {med:.1f}") print(f" Variance: {var:.1f}") print(f" Standard Deviation: {std:.1f}") -repo_data = load_data('pakistan-repos.csv') -stats = calculate_statistics(repo_data) -print_results(stats) + +if __name__ == "__main__": + repo_data = load_data('pakistan-repos.csv') + stats = calculate_statistics(repo_data) + print_results(stats) \ No newline at end of file