def J(w1,w2):
return 0.5*w1**2 + 20*w2**2 - 5*w1 + 10*w2 - 7
def dJ(w1,w2):
return w1 - 5, 40*w2 + 10
def sgd(w1,w2,n,eta=0.01):
w1_,w2_ = [w1],[w2]
for i in range(n):
gw1,gw2 = dJ(w1,w2)
dw1 = -eta*gw1
dw2 = -eta*gw2
w1 = w1+dw1
w2 = w2+dw2
w1_.append(w1)
w2_.append(w2)
return w1_,w2_
w1,w2 = -7.,2.
w1_,w2_ = sgd(w1,w2,n=50,eta=0.04)
# วาดกราฟทั้งสองและสามมิติ
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
def plot(x,y):
z = J(x,y)
mx,my = np.meshgrid(np.arange(-8,12,0.01),np.arange(-3,3,0.01))
mz = J(mx,my)
plt.figure(figsize=[8,6])
plt.plot(x,y,'g')
plt.scatter(x,y,c=np.linspace(0,1,len(x)),cmap='summer',edgecolor='k',zorder=2)
plt.contourf(mx,my,mz,40,cmap='plasma')
plt.figure(figsize=[8,8])
ax = plt.axes([0,0,1,1],projection='3d',xlabel='$w_1$',ylabel='$w_2$',xlim=[-8,12],ylim=[-10,10])
ax.plot(x,y,z,c='g')
ax.scatter(x,y,z,c=np.linspace(0,1,len(x)),cmap='summer',edgecolor='k')
ax.plot_surface(mx,my,mz,cstride=50,rstride=50,alpha=0.2,cmap='plasma',edgecolor='k')
plt.show()
plot(np.array(w1_),np.array(w2_))
def mmtsgd(w1,w2,n,eta=0.01,mmt=0.9):
dw1,dw2 = 0,0
w1_,w2_ = [w1],[w2]
for i in range(n):
gw1,gw2 = dJ(w1,w2)
dw1 = mmt*dw1-eta*gw1
dw2 = mmt*dw2-eta*gw2
w1 = w1+dw1
w2 = w2+dw2
w1_.append(w1)
w2_.append(w2)
return w1_,w2_
w1_,w2_ = mmtsgd(w1,w2,n=100,eta=0.04)
plot(np.array(w1_),np.array(w2_))
w1_,w2_ = mmtsgd(w1,w2,n=100,eta=0.004)
plot(np.array(w1_),np.array(w2_))
# ส่วนฟังก์ชันให้รับตัวแปรเป็นอาเรย์เดียวที่รวมค่าทั้งหมดที่ต้องการคำนวณรวดเดียว การคำนวณใช้ด็อตเป็นหลัก
def J(w):
return -7+w.dot(np.array([-5,10]))+(w**2).dot(np.array([0.5,20]))
def dJ(w):
return np.array([-5,10])+w.dot(np.array([[1,0],[0,40]]))
# ส่วนนิยามฟังก์ชันของวิธีการทั้ง ๒
def sgd(w,n,eta=0.01):
w_ = [w]
for i in range(n):
w = w-eta*dJ(w)
w_.append(w)
return np.stack(w_)
def mmtsgd(w,n,eta=0.01,mmt=0.9):
dw = w*0
w_ = [w]
for i in range(n):
gw = dJ(w)
dw = mmt*dw-eta*gw
w = w+dw
w_.append(w)
return np.stack(w_)
# ส่วนวาดกราฟ
def plot(X):
z = J(X)
mX = np.stack(np.meshgrid(np.arange(-8,12,0.01),np.arange(-3,3,0.01)),2)
mz = J(mX)
plt.figure(figsize=[8,4])
plt.axes(aspect=1)
plt.plot(X[:,0],X[:,1],'g')
plt.scatter(X[:,0],X[:,1],c=np.linspace(0,1,len(X)),cmap='summer',edgecolor='k',zorder=2)
plt.contourf(mX[:,:,0],mX[:,:,1],mz,40,cmap='plasma')
plt.figure(figsize=[8,8])
ax = plt.axes([0,0,1,1],projection='3d',xlabel='$w_1$',ylabel='$w_2$',xlim=[-8,12],ylim=[-10,10])
ax.plot(X[:,0],X[:,1],z,c='g')
ax.scatter(X[:,0],X[:,1],z,c=np.linspace(0,1,len(X)),cmap='summer',edgecolor='k')
ax.plot_surface(mX[:,:,0],mX[:,:,1],mz,cstride=50,rstride=50,alpha=0.2,cmap='plasma',edgecolor='k')
plt.show()
w = np.array([-7.,2.])
#w_ = sgd(w,n=100,eta=0.04)
w_ = mmtsgd(w,n=100,eta=0.004)
plot(w_)
def nag(w,n,eta=0.01,mmt=0.9):
dw = w*0
w_ = [w]
for i in range(n):
g_ = dJ(w+mmt*dw)
dw = mmt*dw-eta*g_
w = w+dw
w_.append(w)
return np.stack(w_)
def nag(w,n,eta=0.01,mmt=0.9):
dw = w*0
gw0 = dJ(w)
w_ = [w]
for i in range(n):
gw = dJ(w)
dw = mmt*dw-eta*(gw+mmt*(gw-gw0))
w = w+dw
gw0 = gw
w_.append(w)
return np.stack(w_)
plot(nag(np.array([-7.,2.]),n=100,eta=0.004))
def adagrad(w,n,eta=0.01):
G = 1e-7
w_ = [w]
for i in range(n):
gw = dJ(w)
G += gw**2
dw = -eta*gw/np.sqrt(G)
w = w+dw
w_.append(w)
return np.stack(w_)
plot(adagrad(np.array([-7.,2.]),n=100,eta=2))
def adadelta(w,n,eta=1.,rho=0.95):
G = 1e-7
w_ = [w]
for i in range(n):
gw = dJ(w)
G = rho*G+(1-rho)*gw**2
dw = -eta*gw/np.sqrt(G)
w = w+dw
w_.append(w)
return np.stack(w_)
plot(adadelta(np.array([-7.,2.]),n=100,eta=1))
def adam(w,n,eta=0.001,beta1=0.9,beta2=0.999):
m = w*0.
v = m+1e-7
w_ = [w]
for i in range(1,n+1):
gw = dJ(w)
m = beta1*m+(1-beta1)*gw
v = beta2*v+(1-beta2)*gw**2
dw = -eta*np.sqrt(1-beta2**i)/(1-beta1**i)*m/np.sqrt(v)
w = w+dw
w_.append(w)
return np.stack(w_)
plot(adam(np.array([-7.,2.]),n=100,eta=1))
class Sgd:
def __init__(self,eta=0.01):
self.eta = eta
def __call__(self,w,g):
w += -self.eta*g
class Mmtsgd:
def __init__(self,eta=0.01,mmt=0.9):
self.eta = eta
self.mmt = mmt
self.dw = 0
def __call__(self,w,gw):
self.dw = self.mmt*self.dw-self.eta*gw
w += self.dw
class Nag:
def __init__(self,eta=0.01,mmt=0.9):
self.eta = eta
self.mmt = mmt
self.dw = 0
self.gw0 = np.nan
def __call__(self,w,gw):
if(self.gw0 is np.nan):
self.gw0 = gw
self.dw = self.mmt*self.dw-self.eta*(gw+self.mmt*(gw-self.gw0))
self.gw0 = gw
w += self.dw
w = np.array([-7,2.])
opt = Nag(eta=0.004)
#opt = Mmtsgd(eta=0.004)
#opt = Sgd(eta=0.04)
w_ = [w.copy()]
for i in range(100):
gw = dJ(w)
opt(w,gw)
w_.append(w.copy())
w_ = np.stack(w_)
plot(w_)
class Adagrad:
def __init__(self,eta=0.01):
self.eta = eta
self.G = 1e-7
def __call__(self,w,gw):
self.G += gw**2
w += -self.eta*gw/np.sqrt(self.G)
class Adadelta:
def __init__(self,eta=0.01,rho=0.95):
self.eta = eta
self.rho = rho
self.G = 1e-7
def __call__(self,w,gw):
self.G = self.rho*self.G+(1-self.rho)*gw**2
w += -self.eta*gw/np.sqrt(self.G)
class Adam:
def __init__(self,eta=0.001,beta1=0.9,beta2=0.999):
self.eta = eta
self.beta1 = beta1
self.beta2 = beta2
self.i = 1
self.m = 0
self.v = 1e-7
def __call__(self,w,gw):
self.m = self.beta1*self.m+(1-self.beta1)*gw
self.v = self.beta2*self.v+(1-self.beta2)*gw**2
w += -self.eta*np.sqrt(1-self.beta2**self.i)/(1-self.beta1**self.i)*self.m/np.sqrt(self.v)
self.i += 1
def sigmoid(x):
return 1/(1+np.exp(-x))
class ThotthoiLogistic:
def __init__(self,opt):
self.opt = opt # เก็บ optimizer แทนที่จะเก็บอัตราการเรียนรู้ (η)
def rianru(self,X,z,n_thamsam,n_batch=0):
n = len(z)
if(n_batch==0 or n<n_batch):
n_batch = n
X_std = X.std()
X_std[X_std==0] = 1
X_mean = X.mean()
X = (X-X_mean)/X_std
self.w = np.zeros(X.shape[1]+1)
gw = self.w*0
self.entropy = []
self.thuktong = []
for j in range(n_thamsam):
lueak = np.random.permutation(n)
for i in range(0,n,n_batch):
Xn = X[lueak[i:i+n_batch]]
zn = z[lueak[i:i+n_batch]]
phi = self.ha_sigmoid(Xn)
eee = (phi-zn)/len(zn)
gw[1:] = np.dot(eee,Xn)
gw[0] = eee.sum()
self.opt(self.w,gw) # ใช้ optimizer เพื่อปรับค่าน้ำหนัก
thukmai = self.thamnai(X)==z
self.thuktong += [thukmai.mean()*100]
self.entropy += [self.ha_entropy(X,z)]
self.w[1:] /= X_std
self.w[0] -= (self.w[1:]*X_mean).sum()
def thamnai(self,X):
return np.dot(X,self.w[1:])+self.w[0]>0
def ha_sigmoid(self,X):
return sigmoid(np.dot(X,self.w[1:])+self.w[0])
def ha_entropy(self,X,z):
phi = self.ha_sigmoid(X)
return -(z*np.log(phi+1e-7)+(1-z)*np.log(1-phi+1e-7)).mean()
from sklearn import datasets
np.random.seed(4)
X,z = datasets.make_blobs(n_samples=12000,n_features=2,centers=2,cluster_std=2,random_state=2)
tl = ThotthoiLogistic(Adagrad(eta=1)) # ใส่ออบเจ็กต์ของออปทิไมเซอร์ไปแทนที่จะใส่แค่ eta โดยตรง
tl.rianru(X,z,n_thamsam=50,n_batch=150)
plt.figure(figsize=[6,8])
x_sen = np.array([X[:,0].min(),X[:,0].max()])
y_sen = -(tl.w[0]+tl.w[1]*x_sen)/tl.w[2]
tm = tl.thamnai(X)==z
plt.axes(aspect=1,xlim=[X[:,0].min(),X[:,0].max()],ylim=[X[:,1].min(),X[:,1].max()])
plt.plot(x_sen,y_sen,'y',lw=3,zorder=0)
plt.scatter(X[tm,0],X[tm,1],c=z[tm],alpha=0.5,s=20,edgecolor='k',lw=0.5,cmap='winter')
plt.scatter(X[~tm,0],X[~tm,1],c=z[~tm],alpha=0.5,s=20,edgecolor='r',cmap='winter')
plt.show()
plt.figure(figsize=[8,8])
ax1 = plt.subplot(211)
ax1.set_title(u'เอนโทรปี',fontname='Tahoma')
ax1.tick_params(labelbottom='off')
ax2 = plt.subplot(212)
ax2.set_title(u'% ถูก',fontname='Tahoma')
opt = [Sgd(0.2),
Mmtsgd(0.2),
Nag(0.2),
Adagrad(0.2),
Adadelta(0.2),
Adam(0.2)]
for o in opt:
tl = ThotthoiLogistic(o)
tl.rianru(X,z,n_thamsam=40,n_batch=150)
si = np.random.random(3)
ax1.plot(tl.entropy,color=si)
ax2.plot(tl.thuktong,color=si)
ax2.legend(['SGD','Momentum','NAG','AdaGrad','AdaDelta','Adam'],ncol=2)
plt.show()
ติดตามอัปเดตของบล็อกได้ที่แฟนเพจ