python图⽚分类源代码_10种常⽤Matplotlib图的Python代码前⾔
本⽂的⽂字及图⽚来源于⽹络,仅供学习、交流使⽤,不具有任何商业⽤途,版权归原作者所有,如有问题请及时以作处理。
喜欢的朋友欢迎关注⼩编,除了分享技术⽂章之外还有很多福利,私信“资料”可以领取包括不限于Python实战演练、PDF电⼦⽂档、⾯试集锦、学习资料等。
柱状图
柱状图有效地传达了项⽬的排名顺序。但是,在图表上⽅添加度量标准的值,⽤户可以从图表本⾝获取精确信息。
# Prepare Data
df_raw = pd.read_csv("github/selva86/datasets/raw/master/mpg_ggplot2.csv")
df = df_raw[['cty', 'manufacturer']].groupby('manufacturer').apply(lambda x: x.mean())
df.sort_values('cty', inplace=True)
# Draw plot
import matplotlib.patches as patches
fig, ax = plt.subplots(figsize=(16,10), facecolor='white', dpi= 80)
ax.vlines(x=df.index, ymin=0, , color='firebrick', alpha=0.7, linewidth=20)
# Annotate Text
for i, cty in ):
<(i, cty+0.5, round(cty, 1), horizontalalignment='center')
# Title, Label, Ticks and Ylim
ax.set_title('Bar Chart for Highway Mileage', fontdict={'size':22})
ax.set(ylabel='Miles Per Gallon', ylim=(0, 30))
# Add patches to color the X axis labels
p1 = patches.Rectangle((.57, -0.005), width=.33, height=.13, alpha=.1, facecolor='green', ansFigure)
p2 = patches.Rectangle((.124, -0.005), width=.446, height=.13, alpha=.1, facecolor='red', ansFigure)
fig.add_artist(p1)
fig.add_artist(p2)
plt.show()
棒棒糖图
棒棒糖图表以⼀种视觉上令⼈愉悦的⽅式提供与有序条形图类似的⽬的。
# Prepare Data
df_raw = pd.read_csv("github/selva86/datasets/raw/master/mpg_ggplot2.csv")
df = df_raw[['cty', 'manufacturer']].groupby('manufacturer').apply(lambda x: x.mean())
df.sort_values('cty', inplace=True)
# Draw plot
fig, ax = plt.subplots(figsize=(16,10), dpi= 80)
ax.vlines(x=df.index, ymin=0, , color='firebrick', alpha=0.7, linewidth=2)
ax.scatter(x=df.index, , s=75, color='firebrick', alpha=0.7)
# Title, Label, Ticks and Ylim
ax.set_title('Lollipop Chart for Highway Mileage', fontdict={'size':22})
ax.set_ylabel('Miles Per Gallon')
ax.set_xticks(df.index)python新手代码图案如何保存
ax.set_xticklabels(df.manufacturer.str.upper(), rotation=60, fontdict={'horizontalalignment': 'right', 'size':12})
ax.set_ylim(0, 30)
# Annotate
for row in df.itertuples():
<(row.Index, +.5, s=, 2), horizontalalignment= 'center', verticalalignment='bottom', fontsize=14) plt.show()
连续变量的直⽅图
直⽅图显⽰给定变量的频率分布。下⾯的表⽰基于分类变量对频率条进⾏分组,从⽽更好地了解连续变量和串联变量。
# Import Data
df = pd.read_csv("github/selva86/datasets/raw/master/mpg_ggplot2.csv")
# Prepare data
x_var = 'displ'
groupby_var = 'class'
df_agg = df.loc[:, [x_var, groupby_var]].groupby(groupby_var)
vals = [df[x_var].list() for i, df in df_agg]
# Draw
plt.figure(figsize=(16,9), dpi= 80)
colors = [Spectral(i/float(len(vals)-1)) for i in range(len(vals))]
n, bins, patches = plt.hist(vals, 30, stacked=True, density=False, color=colors[:len(vals)])
# Decoration
plt.legend({group:col for group, col in zip(np.unique(df[groupby_var]).tolist(), colors[:len(vals)])})
plt.title(f"Stacked Histogram of ${x_var}$ colored by ${groupby_var}$", fontsize=22)
plt.xlabel(x_var)
plt.ylabel("Frequency")
plt.ylim(0, 25)
plt.show()
分类变量的直⽅图
分类变量的直⽅图显⽰该变量的频率分布。通过对条形图进⾏着⾊,您可以将分布与表⽰颜⾊的另⼀个分类变量相关联。
# Import Data
df = pd.read_csv("github/selva86/datasets/raw/master/mpg_ggplot2.csv")
# Prepare data
x_var = 'manufacturer'
groupby_var = 'class'
df_agg = df.loc[:, [x_var, groupby_var]].groupby(groupby_var)
vals = [df[x_var].list() for i, df in df_agg]
# Draw
plt.figure(figsize=(16,9), dpi= 80)
colors = [Spectral(i/float(len(vals)-1)) for i in range(len(vals))]
n, bins, patches = plt.hist(vals, df[x_var].unique().__len__(), stacked=True, density=False, color=colors[:len(vals)])
# Decoration
plt.legend({group:col for group, col in zip(np.unique(df[groupby_var]).tolist(), colors[:len(vals)])})
plt.title(f"Stacked Histogram of ${x_var}$ colored by ${groupby_var}$", fontsize=22)
plt.xlabel(x_var)
plt.ylabel("Frequency")
plt.ylim(0, 40)
plt.show()
散点图
Scatteplot是⽤于研究两个变量之间关系的经典和基本图。如果数据中有多个组,则可能需要以不同颜⾊可视化每个组。在Matplotlib,你可以⽅便地使⽤。
# Import dataset
midwest = pd.read_csv("raw.githubusercontent/selva86/datasets/master/midwest_filter.csv")
# Prepare Data
# Create as many colors as there are unique midwest['category']
categories = np.unique(midwest['category'])
colors = [ab10(i/float(len(categories)-1)) for i in range(len(categories))]
# Draw Plot for Each Category
plt.figure(figsize=(16, 10), dpi= 80, facecolor='w', edgecolor='k')
for i, category in enumerate(categories):
plt.scatter('area', 'poptotal',
data=midwest.loc[midwest.category==category, :],
s=20, c=colors[i], label=str(category))
# Decorations
xlabel='Area', ylabel='Population')
plt.title("Scatterplot of Midwest Area vs Population", fontsize=22)
plt.legend(fontsize=12)
plt.show()
树状图
树状图根据给定的距离度量将相似的点组合在⼀起,并根据该点的相似性将它们组织成树状链接。
import scipy.cluster.hierarchy as shc
# Import Data
df = pd.read_csv('raw.githubusercontent/selva86/datasets/master/USArrests.csv')
# Plot
plt.figure(figsize=(16, 10), dpi= 80)
plt.title("USArrests Dendograms", fontsize=22)
dend = shc.dendrogram(shc.linkage(df[['Murder', 'Assault', 'UrbanPop', 'Rape']], method='ward'), labels=df.State.values,
color_threshold=100)
plt.show()
⼈⼝⾦字塔
⼈⼝⾦字塔可⽤于显⽰按体积排序的组的分布。或者,它也可以⽤来显⽰⼈⼝的逐步过滤,因为它在下⾯⽤于显⽰有多少⼈通过营销渠道的每个阶段。
# Read data
df = pd.read_csv("raw.githubusercontent/selva86/datasets/master/email_campaign_funnel.csv")
# Draw Plot
plt.figure(figsize=(13,10), dpi= 80)
group_col = 'Gender'
order_of_bars = df.Stage.unique()[::-1]
colors = [Spectral(i/float(len(df[group_col].unique())-1)) for i in range(len(df[group_col].unique()))]
for c, group in zip(colors, df[group_col].unique()):
sns.barplot(x='Users', y='Stage', data=df.loc[df[group_col]==group, :], order=order_of_bars, color=c, label=group)
# Decorations
plt.xlabel("$Users$")
plt.ylabel("Stage of Purchase")
plt.title("Population Pyramid of the Marketing Funnel", fontsize=22)
plt.legend()
plt.show()
饼图
饼图是显⽰组组成的经典⽅法。但是,如今⼀般不建议使⽤它,因为馅饼部分的⾯积有时可能会引起误解。因此,如果要使⽤饼图,强烈建议明确写下饼图各部分的百分⽐或数字。
# Import
df_raw = pd.read_csv("github/selva86/datasets/raw/master/mpg_ggplot2.csv")
# Prepare Data
df = upby('class').size()
# Make the plot with pandas
df.plot(kind='pie', subplots=True, figsize=(8, 8), dpi= 80)
plt.title("Pie Chart of Vehicle Class - Bad")
plt.ylabel("")
plt.show()
时间序列图
时间序列图⽤于可视化给定指标如何随时间变化。在这⾥,您可以了解1949年⾄1969年之间的航空客运流量如何变化。
# Import Data
df = pd.read_csv('github/selva86/datasets/raw/master/AirPassengers.csv')
# Draw Plot
plt.figure(figsize=(16,10), dpi= 80)
plt.plot('date', 'traffic', data=df, color='tab:red')
# Decoration
plt.ylim(50, 750)
xtick_location = list()[::12]
xtick_labels = [x[-4:] for x in list()[::12]]