# D19 Python 資料視覺化工具與常見統計圖表介紹

## Matplotlib 操作方式

`import matplotlib.pyplot as plt`

## Matpltlib：常用參數說明

`matplotlib.pyplot.plot(*args,scalex=True,scaley=True,data=None,**kwargs)`
`import numpy as npimport matplotlib.pyplot as pltx = np.arange(0, 5, 0.1)y = np.sin(x)plt.plot(x, y,)`
`import numpy as npimport matplotlib.pyplot as pltx = np.arange(10)plt.plot(x,x*1.5,'go:',x,x*2.5,'rx',x,x*3.5,'^',x,x*4.5,'bd-.')`

## Matpltlib：figure 參數說明

`matpltlib.pyplot.figure(num = None, figsize=None, dpi=None,facecolor=None,edgecolor=None,            frameon=True,FigureClass=<class 'matplotlib.figure.Figure'>,clear=False,            **kwargs)`
`import matplotlib.pyplot as pltx = np.arange(0, 5, 0.1)y = np.sin(x)plt.figure(figsize=(8,6),           facecolor='c')plt.plot(x,y)plt.xlabel("X")plt.ylabel("Y")plt.title("Plot with figsize (8,6)")plt.show()`

## Matpltlib：subplot 參數說明

`matpltlib.pyplot.subplot(nrows, ncols, index, **kwargs)`
`import numpy as npimport matplotlib.pyplot as plt fig, axes = plt.subplots(2, 2) # 建立 2*2 多維視窗 data = pd.Series(np.random.rand(5), index=list('12345'))#ax為選擇畫圖視窗，color為顏色，alpha為透明度設定    data.plot.bar(ax=axes[0,1], color='b', alpha = 1) data.plot.barh(ax=axes[1,1], color='b', alpha=0.5)data.plot.bar(ax=axes[1,0], color='c', alpha = 0.8) data.plot.barh(ax=axes[0,0], color='r', alpha=0.5) plt.show()`

## 範例說明：散點圖(Scatter Plots)

`matplotlib.pyplot.scatter(x, y, s=None, c=None, marker=None, cmap=None, norm=None, vmin=None,             vmax=None, alpha=None, linewidths=None, verts=<deprecated parameter>,             edgecolors=None, *, plotnonfinite=False, data=None, **kwargs)`
`X = np.random.normal(0, 1, 100)Y = np.random.normal(0, 1, 100)plt.scatter(X, Y, color='b', alpha=0.5,s=100, edgecolors='red')plt.title("Scatter plot ")`

## 範例說明:長條圖(Bar Plots)

`x = np.arange(0., 10., 0.7)y = np.arange(0., 10., 0.7)plt.bar(x, y)`

# D20 使用 Matplotlib 繪製各種常用圖表

`# 載入需要的...import matplotlib.pyplot as pltimport numpy as np# 準備數據 ... 假設我要畫一個sin波 從0~180度x = np.arange(0,180)y = np.sin(x * np.pi / 180.0)# 開始畫圖# 設定要畫的的x,y數據list....plt.plot(x,y)# 設定圖的範圍, 不設的話，系統會自行決定plt.xlim(-30,390)plt.ylim(-1.5,1.5)# 照需要寫入x 軸和y軸的 label 以及title  plt.xlabel("x-axis")  plt.ylabel("y-axis")  plt.title("The Title")   # 在這個指令之前，都還在做畫圖的動作  # 這個指令算是 "秀圖"  plt.show()`
`import matplotlib.pyplot as pltfig = plt.figure(figsize=(10,6)) #設定 figure 的尺寸ax1 = fig.add_subplot(3,1,1) #分別畫出三格圖象，都可以針對特定圖象編輯與繪圖ax2 = fig.add_subplot(3,1,2)ax3 = fig.add_subplot(3,1,3)plt.show()`
`import numpy as npimport matplotlib.pyplot as plt#設定 figure 的尺寸fig = plt.figure(figsize=(10,6))#設定 x , y  數值x = np.arange(0, 3 * np.pi, 0.1)y_sin = np.sin(x)y_cos = np.cos(x)#設定雙格畫板以及選特定畫板畫圖plt.subplot(2, 1, 1)plt.plot(x, y_sin)#命名圖象plt.title("Sine")plt.subplot(2, 1, 2)plt.plot(x, y_cos)#命名圖象plt.title("Cosine")plt.show()`

## 除了 PLT 之外的繪圖：figure 參數說明

`matpltlib.pyplot.figure(num = None, figsize=None, dpi=None,facecolor=None,edgecolor=None,frameon=True,FigureClass=<class 'matplotlib.figure.Figure'>,clear=False,**kwargs)`

## 製作繪圖板 Axes

`#import matplotlib.pyplot as plt#決定最外框plt.axes([0.1,0.1,.8,.8])plt.xticks([]), plt.yticks([])plt.text(0.6,0.6, 'axes([0.1,0.1,.8,.8])',ha='center',va='center',size=20,alpha=.5)#決定內框plt.axes([0.2,0.2,.3,.3])plt.xticks([]), plt.yticks([])plt.text(0.5,0.5, 'axes([0.2,0.2,.3,.3])',ha='center',va='center',size=16,alpha=.5)plt.show()`

## 製作 3D 繪圖板

`#導入必要的模組import numpy as npimport matplotlib.pyplot as pltfrom mpl_toolkits.mplot3d import Axes3D# 創建一個3d坐標系fig = plt.figure()ax = Axes3D(fig)#直接查詢參數與設定#help(plt.plot)#help(np.random.sample)# 利用x軸和y軸繪製sin曲線x = np.linspace(0, 1, 100) # linspace創建等差陣列y = np.cos(x * 2 * np.pi) / 2 + 0.5# 通過zdir = 'z' 將資料繪製在z軸，zs = 0.5 則是將資料繪製在z = 0.5的地方ax.plot(x, y, zs = 0.5, zdir = 'z', color = 'black', label = 'curve in (x, y)')`

# D21 使用 Seaborn 進行資料視覺化

`import numpy as npimport matplotlib.pyplot as pltimport seaborn as sns# data 設置data = np.array([3, 12, 5, 18, 45])bars = np.array(['A', 'B', 'C', 'D', 'E'])y_pos = np.arange(len(bars))plt.yticks([3,5,12,18,45]) # y 軸的刻度值sns.barplot(y_pos, data, color=(0.2, 0.8, 0.6, 0.6))#color 為 RGB 及 Alpha 調整plt.legend(['b'])  # 左上圖例添加# Custom Axis title 需要調整 matplotlib 設置 x , y座標plt.xlabel('title of the xlabel', fontweight='bold', color = 'orange', fontsize='17', horizontalalignment='center');plt.ylabel('title of the ylabel ',fontweight='bold', color = 'r', fontsize='10', horizontalalignment='center')`
`sns.set_style('darkgrid') # 設定圖形樣式`
`def sinplot(flip=1):    x = np.linspace(0, 14, 100)    for i in range(1, 7):        plt.plot(x, np.sin(x + i * .5) * (7 - i) * flip)# sns.set()sinplot()`
`def sinplot(flip=1):    x = np.linspace(0, 14, 100)    for i in range(1, 7):        plt.plot(x, np.sin(x + i * .5) * (7 - i) * flip)sns.set_style("whitegrid")#加入sns.set_style()，並於括弧內加入欲選擇的主題，如：sns.set_style(whitegrid)，        #則可得範例二的圖像。範例三、四、五則以此類推。sinplot()`

## seaborn 視覺化線性關係

`sns.set(style="whitegrid")tips = sns.load_dataset("tips")sns.lmplot(x="total_bill", y="tip", data= tips)`
`sns.set(style="whitegrid")tips = sns.load_dataset("tips")sns.lmplot(x="total_bill", y="tip", col='time', hue='smoker', data= tips)`

## seaborn 使用語義映射繪製數據子集

`sns.set(style="whitegrid")fmri = sns.load_dataset("fmri")sns.relplot(x="timepoint", y="signal", kind='line', hue="event" , data=fmri)`
`sns.set(style="whitegrid")fmri = sns.load_dataset("fmri")sns.relplot(x="timepoint", y="signal", kind='line', hue="region" , style="event", data=fmri)`
`sns.set(style="whitegrid")fmri = sns.load_dataset("fmri")sns.relplot(x="timepoint", y="signal", hue="event", style="event",        kind="line", data=fmri)`

## seaborn 聚合和表示不確定性

`sns.set(style="dark")tips = sns.load_dataset("fmri") #載入數據集tips.sample(50)  #可檢視五十筆隨機資料`
`sns.set(style="whitegrid")fmri = sns.load_dataset("fmri")sns.relplot(x="timepoint", y="signal", kind='line',data=fmri)`
`sns.set(style="whitegrid")fmri = sns.load_dataset("fmri")sns.relplot(x="timepoint", y="signal", kind='line', ci="sd",            data=fmri)`
`sns.set(style="whitegrid")fmri = sns.load_dataset("fmri")sns.relplot(x="timepoint", y="signal", kind='line', ci= None,            data=fmri)`

## seaborn 視覺化統計關係

`#定義主題風格sns.set(style="darkgrid")#載入tipstips = sns.load_dataset("tips")#繪製圖形，根據不同類型的三點設定圖sns.relplot(x="total_bill", y="tip", hue="smoker", style="time", data=tips)plt.show()`
`#可繪製有漸變效果的散點圖sns.relplot(x="total_bill", y="tip", hue="size", palette="ch:r=-.5,l=.75", data=tips);plt.show()`
`sns.set(color_codes=True)mean, cov = [0, 1], [(1, .5), (.5, 1)] #設定兩組參數x, y = np.random.multivariate_normal(mean, cov, 1000).Twith sns.axes_style("ticks"):    sns.jointplot(x=x, y=y, kind="hex", color="r")plt.show()`
`flights = sns.load_dataset("flights") # 載入航班數據 flights = flights.pivot("month", "year", "passengers") #修改數據排列# 劃分每格單元的行寬度，使用不同的colormap，取消顏色條sns.heatmap(flights, linewidths=1 , cmap="YlGnBu", cbar=False) plt.show()`

## seaborn 視覺化數據集的分佈

`import numpy as np import pandas as pd from scipy import stats, integrate import matplotlib.pyplot as plt import seaborn as sns sns.set(color_codes=True)np.random.seed(sum(map(ord, "distributions")))`
`x = np.random.normal(size = 100) sns.distplot(x)`
`x = np.random.normal(size=100)sns.distplot(x, kde=False, rug=True)`
`x = np.random.normal(size=100)sns.distplot(x, bins=20 , kde=False, rug=True)`
`x = np.random.normal(size=100)sns.distplot(x, hist=False, rug=True)`
`x = np.random.normal(0, 1, size=30)bandwidth = 1.06 * x.std() * x.size ** (-1 / 5.)support = np.linspace(-4, 4, 200)kernels = []for x_i in x: kernel = stats.norm(x_i, bandwidth).pdf(support) kernels.append(kernel) plt.plot(support, kernel, color="r")sns.rugplot(x, color=".2", linewidth=3)`
`from scipy.integrate import trapzdensity = np.sum(kernels, axis=0)density /= trapz(density, support)plt.plot(support, density)`
`sns.kdeplot(x, shade=True)`
`import numpy as np import pandas as pd from scipy import stats, integrate import matplotlib.pyplot as plt import seaborn as snsmean, cov = [0, 1], [(1, .5), (.5, 1)]data = np.random.multivariate_normal(mean, cov, 200)df = pd.DataFrame(data, columns=["x", "y"])`
`sns.jointplot(x="x", y="y", data=df)`
`x, y = np.random.multivariate_normal(mean, cov, 1000).Twith sns.axes_style("white"): sns.jointplot(x=x, y=y, kind="hex", color="k");`

# D22 運用實際資料集進行資料視覺化練習

`# 導入必要的程式庫import pandas as pdimport seaborn as snsfrom matplotlib import pyplot as plt# 取得鳶尾花資料集df = sns.load_dataset('iris')df.info()`
`sns.boxplot(data = df, orient = "h")`
`sns.stripplot(x = "species", y = "petal_length", data = df)`
`sns.swarmplot(x = "species", y = "petal_length", data = df)`
`sns.boxplot(x = "species", y = "petal_length", data = df)sns.swarmplot(x = "species", y = "petal_length", data = df)`

## 核密度估計(Kernel Density Estimates, KDE)

`sns.set_style("ticks")sns.pairplot(df,hue = 'species',diag_kind = "kde",kind = "scatter",palette = "husl")`
`g = sns.pairplot(df,hue = 'species',diag_kind = "kde",kind = "scatter",palette = "husl")g.map_upper(plt.scatter)g.map_lower(sns.kdeplot, cmap = "Blues_d")g.map_diag(sns.kdeplot, lw = 3, legend = False)`

# D23 結合 Pandas 與 Matplotlib 進行進階資料視覺化練習

`df_white.duplicated().sum(),df_red.duplicated().sum()`
`df_white.drop_duplicates(inplace = True)df_red.drop_duplicates(inplace = True)`
`red_df.isnull().sum().sum(), white_df.isnull().sum().sum()`

## 直方圖

`df_wine.hist(bins=10, color='lightblue',edgecolor='blue',xlabelsize=8, ylabelsize=8, grid=False)plt.tight_layout(rect=(1,1,3,3))`

## 熱力圖：熱圖中的梯度根據屬性之間的相關性強度而變化。

`f, ax = plt.subplots(figsize=(10, 6))b = sns.heatmap(df_all.corr(), annot=True, linewidths=.05, ax=ax)f.subplots_adjust(top=0.93)bottom, top = ax.get_ylim()ax.set_ylim(bottom + 0.5, top - 0.5)title= f.suptitle('Correlation Heatmap for wine attributes', fontsize=12)`

# 聯合圖

`sns.jointplot(data=df_wine, x="fixed acidity", y="volatile acidity", kind = 'reg')`
`sns.jointplot(data=df_wine, x="alcohol", y="chlorides", kind = 'reg')`
`sns.jointplot(data=df_wine, x="volatile acidity", y="citric acid", kind = 'reg')`

# D24 BOKEH 輕鬆以網頁呈現視覺化圖表

## Bokeh 程式的基本運作

`from bokeh.resources import INLINEimport bokeh.iobokeh.io.output_notebook(INLINE)`
`from bokeh.plotting import figure, showfrom bokeh.models import widgetsfrom bokeh.io import output_notebook, output_file`
`output_notebook()`
`output_file(“out.html”)`
`p = figure()p.line([1,2,3,4,5], [5,4,3,2,1])`
`show(p)`
`p = figure(width=800,height=300)p.circle([1,2,3],[2,5,3], size=[10,20,30], color=["pink","olive","gold"])show(p)`

## 網頁元件與互動圖表

`from IPython.display import IFrameIFrame('https://demo.bokeh.org/sliders', width=900, height=500)`

## 製作互動圖表 — CustomJS 回調

`#定義互動過程 (code=”””JavaScript代碼”””)callback = CustomJS(args=dict(source=source), code="""   var data = source.data;   var f = cb_obj.value   var x = data['x']   var y = data['y']   for (var i = 0; i < x.length; i++) {       y[i] = Math.pow(x[i], f)   }   source.change.emit();""")`
`#建立並給定部件名稱slider = Slider(start=0.1, end=4, value=1, step=.1, title="power")slider.js_on_change('value', callback)`
`# 建立頁面框架layout = column(slider, plot)# 結果呈現show(layout)`

## CustomJS 選擇

`# customjs for selectionsfrom random import randomfrom bokeh.layouts import rowfrom bokeh.models import ColumnDataSource, CustomJSfrom bokeh.plotting import figure, output_file, show# 儲存 HTML 檔案output_file("callback.html")x = [random() for x in range(500)]y = [random() for y in range(500)]s1 = ColumnDataSource(data=dict(x=x, y=y))p1 = figure(plot_width=400, plot_height=400, tools="lasso_select", title="Select Here")p1.circle('x', 'y', source=s1, alpha=0.6)s2 = ColumnDataSource(data=dict(x=[], y=[]))p2 = figure(plot_width=400, plot_height=400, x_range=(0, 1), y_range=(0, 1),            tools="", title="Watch Here")p2.circle('x', 'y', source=s2, alpha=0.6)s1.selected.js_on_change('indices', CustomJS(args=dict(s1=s1, s2=s2), code="""        var inds = cb_obj.indices;        var d1 = s1.data;        var d2 = s2.data;        d2['x'] = []        d2['y'] = []        for (var i = 0; i < inds.length; i++) {            d2['x'].push(d1['x'][inds[i]])            d2['y'].push(d1['y'][inds[i]])        }        s2.change.emit();    """))layout = row(p1, p2)show(layout)`

## 邊緣和節點渲染器 GraphRenderer

`import networkx as nx#建立互動網路圖G=nx.karate_club_graph()#載入相關的套件from bokeh.io import output_file, showfrom bokeh.models import (BoxSelectTool, Circle, EdgesAndLinkedNodes, HoverTool,             MultiLine, NodesAndLinkedEdges, Plot, Range1d, TapTool,)from bokeh.palettes import Spectral4from bokeh.plotting import from_networkx#建立互動網路圖G=nx.karate_club_graph()plot = Plot(plot_width=400, plot_height=400,            x_range=Range1d(-1.1,1.1), y_range=Range1d(-1.1,1.1))plot.title.text = "Graph Interaction Demonstration"plot.add_tools(HoverTool(tooltips=None), TapTool(), BoxSelectTool())graph_renderer = from_networkx(G, nx.circular_layout, scale=1, center=(0,0))#建立節點交互graph_renderer.node_renderer.glyph #建立邊緣回饋交互graph_renderer.edge_renderer.glyph#選擇策略graph_renderer.selection_policy = NodesAndLinkedEdges()graph_renderer.inspection_policy = EdgesAndLinkedNodes()#繪製GRAPHplot.renderers.append(graph_renderer)#輸出output_file("interactive_graphs.html")`