# Prepare Data # Create as many colors as there are unique midwest['category'] categories = np.unique(midwest['category']) colors = [plt.cm.tab10(i/float(len(categories)-1)) for i in range(len(categories))]
# Draw Plot for Each Category plt.figure(figsize=(16, 10), dpi= 80, facecolor='w', edgecolor='k')
for i, category in enumerate(categories): plt.scatter('area', 'poptotal', data=midwest.loc[midwest.category==category, :], s=20, c=colors[i], label=str(category))
plt.xticks(fontsize=12); plt.yticks(fontsize=12) plt.title('Scatterplot of Midwest Area vs Population', fontsize=22) plt.legend(fontsize=12) plt.show()
# Step 1: Prepare Data midwest = pd.read_csv('https://raw./selva86/datasets/master/midwest_filter.csv')
# As many colors as there are unique midwest['category'] categories = np.unique(midwest['category']) colors = [plt.cm.tab10(i/float(len(categories)-1)) for i inrange(len(categories))]
# Step 2: Draw Scatterplot with unique color for each category fig = plt.figure(figsize=(16, 10), dpi=80, facecolor='w', edgecolor='k')
for i, category inenumerate(categories): plt.scatter('area', 'poptotal', data=midwest.loc[midwest.category==category, :], s='dot_size', c=colors[i], label=str(category), edgecolors='black', linewidths=.5)
# Import Data df = pd.read_csv('https://raw./selva86/datasets/master/mpg_ggplot2.csv') df_select = df.loc[df.cyl.isin([4,8]), :]
# Each line in its own column sns.set_style('white') gridobj = sns.lmplot(x='displ', y='hwy', data=df_select, height=7, robust=True, palette='Set1', col='cyl', scatter_kws=dict(s=60, linewidths=.7, edgecolors='black'))
# Scatterplot on main ax ax_main.scatter('displ', 'hwy', s=df.cty*5, c=df.manufacturer.astype('category').cat.codes, alpha=.9, data=df, cmap='Set1', edgecolors='black', linewidths=.5)
# Add a graph in each part sns.boxplot(df.hwy, ax=ax_right, orient='v') sns.boxplot(df.displ, ax=ax_bottom, orient='h')
# Decorations ------------------ # Remove x axis name for the boxplot ax_bottom.set(xlabel='') ax_right.set(ylabel='')
# Main Title, Xlabel and YLabel ax_main.set(title='Scatterplot with Histograms displ vs hwy', xlabel='displ', ylabel='hwy')
# Set font size of different components ax_main.title.set_fontsize(20) for item in ([ax_main.xaxis.label, ax_main.yaxis.label] + ax_main.get_xticklabels() + ax_main.get_yticklabels()): item.set_fontsize(14)
# Decorations plt.gca().set(ylabel='$Model$', xlabel='$Mileage$') plt.yticks(df.index, df.cars, fontsize=12) plt.title('Diverging Bars of Car Mileage', fontdict={'size':20}) plt.grid(linestyle='--', alpha=0.5) plt.show()
11. 发散型文本
分散的文本类似于发散条,如果你想以一种漂亮和可呈现的方式显示图表中每个项目的价值,它更喜欢。
# Prepare Data df = pd.read_csv('https://github.com/selva86/datasets/raw/master/mtcars.csv') x = df.loc[:, ['mpg']] df['mpg_z'] = (x - x.mean())/x.std() df['colors'] = ['red'if x <0else'green'for x in df['mpg_z']] df.sort_values('mpg_z', inplace=True) df.reset_index(inplace=True)
# Draw plot plt.figure(figsize=(14,14), dpi=80) plt.hlines(y=df.index, xmin=0, xmax=df.mpg_z) for x, y, tex inzip(df.mpg_z, df.index, df.mpg_z): t = plt.text(x, y, round(tex, 2), horizontalalignment='right'if x <0else'left', verticalalignment='center', fontdict={'color':'red'if x <0else'green', 'size':14})
# Decorations plt.yticks(df.index, df.cars, fontsize=12) plt.title('Diverging Text Bars of Car Mileage', fontdict={'size':20}) plt.grid(linestyle='--', alpha=0.5) plt.xlim(-2.5, 2.5) plt.show()
# Prepare Data df = pd.read_csv('https://github.com/selva86/datasets/raw/master/mtcars.csv') x = df.loc[:, ['mpg']] df['mpg_z'] = (x - x.mean())/x.std() df['colors'] = ['red' if x < 0 else 'darkgreen' for x in df['mpg_z']] df.sort_values('mpg_z', inplace=True) df.reset_index(inplace=True)
# Draw plot plt.figure(figsize=(14,16), dpi= 80) plt.scatter(df.mpg_z, df.index, s=450, alpha=.6, color=df.colors) for x, y, tex in zip(df.mpg_z, df.index, df.mpg_z): t = plt.text(x, y, round(tex, 1), horizontalalignment='center', verticalalignment='center', fontdict={'color':'white'})
# Decoration ax.set_title('Slopechart: Comparing GDP Per Capita between 1952 vs 1957', fontdict={'size':22}) ax.set(xlim=(0,4), ylim=(0,14000), ylabel='Mean GDP Per Capita') ax.set_xticks([1,3]) ax.set_xticklabels(['1952', '1957']) plt.yticks(np.arange(500, 13000, 2000), fontsize=12)
# Import Data df = pd.read_csv('https://raw./selva86/datasets/master/health.csv') df.sort_values('pct_2014', inplace=True) df.reset_index(inplace=True)
# Func to draw line segment defnewline(p1, p2, color='black'): ax = plt.gca() l = mlines.Line2D([p1[0],p2[0]], [p1[1],p2[1]], color='skyblue') ax.add_line(l) return l
# Figure and Axes fig, ax = plt.subplots(1,1,figsize=(14,14), facecolor='#f7f7f7', dpi=80)
# Import Data df = pd.read_csv('https://github.com/selva86/datasets/raw/master/mpg_ggplot2.csv')
# Prepare data x_var ='displ' groupby_var ='class' df_agg = df.loc[:, [x_var, groupby_var]].groupby(groupby_var) vals = [df[x_var].values.tolist() for i, df in df_agg]
# Draw plt.figure(figsize=(16,9), dpi=80) colors = [plt.cm.Spectral(i/float(len(vals)-1)) for i inrange(len(vals))] n, bins, patches = plt.hist(vals, 30, stacked=True, density=False, color=colors[:len(vals)])
# Decoration plt.legend({group:col for group, col inzip(np.unique(df[groupby_var]).tolist(), colors[:len(vals)])}) plt.title(f'Stacked Histogram of ${x_var}$ colored by ${groupby_var}$', fontsize=22) plt.xlabel(x_var) plt.ylabel('Frequency') plt.ylim(0, 25) plt.xticks(ticks=bins[::3], labels=[round(b,1) for b in bins[::3]]) plt.show()
# Import Data df = pd.read_csv('https://github.com/selva86/datasets/raw/master/mpg_ggplot2.csv')
# Prepare data x_var ='manufacturer' groupby_var ='class' df_agg = df.loc[:, [x_var, groupby_var]].groupby(groupby_var) vals = [df[x_var].values.tolist() for i, df in df_agg]
# Draw plt.figure(figsize=(16,9), dpi=80) colors = [plt.cm.Spectral(i/float(len(vals)-1)) for i inrange(len(vals))] n, bins, patches = plt.hist(vals, df[x_var].unique().__len__(), stacked=True, density=False, color=colors[:len(vals)])
# Decoration plt.legend({group:col for group, col inzip(np.unique(df[groupby_var]).tolist(), colors[:len(vals)])}) plt.title(f'Stacked Histogram of ${x_var}$ colored by ${groupby_var}$', fontsize=22) plt.xlabel(x_var) plt.ylabel('Frequency') plt.ylim(0, 40) plt.xticks(ticks=bins, labels=np.unique(df[x_var]).tolist(), rotation=90, horizontalalignment='left') plt.show()
# Mean and Median city mileage by make df = df_raw[['cty', 'manufacturer']].groupby('manufacturer').apply(lambda x: x.mean()) df.sort_values('cty', ascending=False, inplace=True) df.reset_index(inplace=True) df_median = df_raw[['cty', 'manufacturer']].groupby('manufacturer').apply(lambda x: x.median())