Python 数据分析工具链
Python 拥有丰富的数据分析生态系统。本文介绍最核心的库和常见用法。
NumPy
1 2 3 4 5 6 7 8 9 10 11
| import numpy as np
arr = np.array([1, 2, 3, 4, 5]) zeros = np.zeros((3, 4)) random_arr = np.random.randn(100)
print(arr.mean()) print(arr.std()) print(arr.sum())
|
Pandas
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
| import pandas as pd
df = pd.read_csv('data.csv')
print(df.head()) print(df.info()) print(df.describe())
filtered = df[df['age'] > 25] grouped = df.groupby('category').mean()
df.dropna(inplace=True) df.fillna(0, inplace=True)
|
Matplotlib
1 2 3 4 5 6 7 8 9 10 11 12
| import matplotlib.pyplot as plt
plt.plot(df['date'], df['value']) plt.title('Trend Over Time') plt.xlabel('Date') plt.ylabel('Value') plt.show()
df['category'].value_counts().plot(kind='bar') plt.show()
|
简单分析流程
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
| import pandas as pd import matplotlib.pyplot as plt
df = pd.read_csv('sales.csv')
df['date'] = pd.to_datetime(df['date']) df = df.dropna()
monthly = df.groupby(df['date'].dt.month)['revenue'].sum()
monthly.plot(kind='bar', title='Monthly Revenue') plt.show()
|