def J(w1,w2):
    return 0.5*w1**2 + 20*w2**2 - 5*w1 + 10*w2 - 7
def dJ(w1,w2):
    return w1 - 5, 40*w2 + 10
def sgd(w1,w2,n,eta=0.01):
    w1_,w2_ = [w1],[w2]
    for i in range(n):
        gw1,gw2 = dJ(w1,w2)
        dw1 = -eta*gw1
        dw2 = -eta*gw2
        w1 = w1+dw1
        w2 = w2+dw2
        w1_.append(w1)
        w2_.append(w2)
    return w1_,w2_
w1,w2 = -7.,2.
w1_,w2_ = sgd(w1,w2,n=50,eta=0.04)
# วาดกราฟทั้งสองและสามมิติ
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
def plot(x,y):
    z = J(x,y)
    mx,my = np.meshgrid(np.arange(-8,12,0.01),np.arange(-3,3,0.01))
    mz = J(mx,my)
    plt.figure(figsize=[8,6])
    plt.plot(x,y,'g')
    plt.scatter(x,y,c=np.linspace(0,1,len(x)),cmap='summer',edgecolor='k',zorder=2)
    plt.contourf(mx,my,mz,40,cmap='plasma')
    plt.figure(figsize=[8,8])
    ax = plt.axes([0,0,1,1],projection='3d',xlabel='$w_1$',ylabel='$w_2$',xlim=[-8,12],ylim=[-10,10])
    ax.plot(x,y,z,c='g')
    ax.scatter(x,y,z,c=np.linspace(0,1,len(x)),cmap='summer',edgecolor='k')
    ax.plot_surface(mx,my,mz,cstride=50,rstride=50,alpha=0.2,cmap='plasma',edgecolor='k')
    plt.show()
plot(np.array(w1_),np.array(w2_))

 def mmtsgd(w1,w2,n,eta=0.01,mmt=0.9):
    dw1,dw2 = 0,0
    w1_,w2_ = [w1],[w2]
    for i in range(n):
        gw1,gw2 = dJ(w1,w2)
        dw1 = mmt*dw1-eta*gw1
        dw2 = mmt*dw2-eta*gw2
        w1 = w1+dw1
        w2 = w2+dw2
        w1_.append(w1)
        w2_.append(w2)
    return w1_,w2_
w1_,w2_ = mmtsgd(w1,w2,n=100,eta=0.04)
plot(np.array(w1_),np.array(w2_))


w1_,w2_ = mmtsgd(w1,w2,n=100,eta=0.004)
plot(np.array(w1_),np.array(w2_))


# ส่วนฟังก์ชันให้รับตัวแปรเป็นอาเรย์เดียวที่รวมค่าทั้งหมดที่ต้องการคำนวณรวดเดียว การคำนวณใช้ด็อตเป็นหลัก
def J(w):
    return -7+w.dot(np.array([-5,10]))+(w**2).dot(np.array([0.5,20]))
def dJ(w):
    return np.array([-5,10])+w.dot(np.array([[1,0],[0,40]]))
# ส่วนนิยามฟังก์ชันของวิธีการทั้ง ๒
def sgd(w,n,eta=0.01):
    w_ = [w]
    for i in range(n):
        w = w-eta*dJ(w)
        w_.append(w)
    return np.stack(w_)
def mmtsgd(w,n,eta=0.01,mmt=0.9):
    dw = w*0
    w_ = [w]
    for i in range(n):
        gw = dJ(w)
        dw = mmt*dw-eta*gw
        w = w+dw
        w_.append(w)
    return np.stack(w_)
# ส่วนวาดกราฟ
def plot(X):
    z = J(X)
    mX = np.stack(np.meshgrid(np.arange(-8,12,0.01),np.arange(-3,3,0.01)),2)
    mz = J(mX)
    plt.figure(figsize=[8,4])
    plt.axes(aspect=1)
    plt.plot(X[:,0],X[:,1],'g')
    plt.scatter(X[:,0],X[:,1],c=np.linspace(0,1,len(X)),cmap='summer',edgecolor='k',zorder=2)
    plt.contourf(mX[:,:,0],mX[:,:,1],mz,40,cmap='plasma')
    plt.figure(figsize=[8,8])
    ax = plt.axes([0,0,1,1],projection='3d',xlabel='$w_1$',ylabel='$w_2$',xlim=[-8,12],ylim=[-10,10])
    ax.plot(X[:,0],X[:,1],z,c='g')
    ax.scatter(X[:,0],X[:,1],z,c=np.linspace(0,1,len(X)),cmap='summer',edgecolor='k')
    ax.plot_surface(mX[:,:,0],mX[:,:,1],mz,cstride=50,rstride=50,alpha=0.2,cmap='plasma',edgecolor='k')
    plt.show()
w = np.array([-7.,2.])
#w_ = sgd(w,n=100,eta=0.04)
w_ = mmtsgd(w,n=100,eta=0.004)
plot(w_)
def nag(w,n,eta=0.01,mmt=0.9):
    dw = w*0
    w_ = [w]
    for i in range(n):
        g_ = dJ(w+mmt*dw)
        dw = mmt*dw-eta*g_
        w = w+dw
        w_.append(w)
    return np.stack(w_)
def nag(w,n,eta=0.01,mmt=0.9):
    dw = w*0
    gw0 = dJ(w)
    w_ = [w]
    for i in range(n):
        gw = dJ(w)
        dw = mmt*dw-eta*(gw+mmt*(gw-gw0))
        w = w+dw
        gw0 = gw
        w_.append(w)
    return np.stack(w_)
plot(nag(np.array([-7.,2.]),n=100,eta=0.004))


def adagrad(w,n,eta=0.01):
    G = 1e-7
    w_ = [w]
    for i in range(n):
        gw = dJ(w)
        G += gw**2
        dw = -eta*gw/np.sqrt(G)
        w = w+dw
        w_.append(w)
    return np.stack(w_)
plot(adagrad(np.array([-7.,2.]),n=100,eta=2))


def adadelta(w,n,eta=1.,rho=0.95):
    G = 1e-7
    w_ = [w]
    for i in range(n):
        gw = dJ(w)
        G = rho*G+(1-rho)*gw**2
        dw = -eta*gw/np.sqrt(G)
        w = w+dw
        w_.append(w)
    return np.stack(w_)
plot(adadelta(np.array([-7.,2.]),n=100,eta=1))


def adam(w,n,eta=0.001,beta1=0.9,beta2=0.999):
    m = w*0.
    v = m+1e-7
    w_ = [w]
    for i in range(1,n+1):
        gw = dJ(w)
        m = beta1*m+(1-beta1)*gw
        v = beta2*v+(1-beta2)*gw**2
        dw = -eta*np.sqrt(1-beta2**i)/(1-beta1**i)*m/np.sqrt(v)
        w = w+dw
        w_.append(w)
    return np.stack(w_)
plot(adam(np.array([-7.,2.]),n=100,eta=1))




class Sgd:
    def __init__(self,eta=0.01):
        self.eta = eta
    def __call__(self,w,g):
        w += -self.eta*g
class Mmtsgd:
    def __init__(self,eta=0.01,mmt=0.9):
        self.eta = eta
        self.mmt = mmt
        self.dw = 0
    def __call__(self,w,gw):
        self.dw = self.mmt*self.dw-self.eta*gw
        w += self.dw
class Nag:
    def __init__(self,eta=0.01,mmt=0.9):
        self.eta = eta
        self.mmt = mmt
        self.dw = 0
        self.gw0 = np.nan
    def __call__(self,w,gw):
        if(self.gw0 is np.nan):
            self.gw0 = gw
        self.dw = self.mmt*self.dw-self.eta*(gw+self.mmt*(gw-self.gw0))
        self.gw0 = gw
        w += self.dw
w = np.array([-7,2.])
opt = Nag(eta=0.004)
#opt = Mmtsgd(eta=0.004)
#opt = Sgd(eta=0.04)
w_ = [w.copy()]
for i in range(100):
    gw = dJ(w)
    opt(w,gw)
    w_.append(w.copy())
w_ = np.stack(w_)
plot(w_)
class Adagrad:
    def __init__(self,eta=0.01):
        self.eta = eta
        self.G = 1e-7
    def __call__(self,w,gw):
        self.G += gw**2
        w += -self.eta*gw/np.sqrt(self.G)
class Adadelta:
    def __init__(self,eta=0.01,rho=0.95):
        self.eta = eta
        self.rho = rho
        self.G = 1e-7
    def __call__(self,w,gw):
        self.G = self.rho*self.G+(1-self.rho)*gw**2
        w += -self.eta*gw/np.sqrt(self.G)
class Adam:
    def __init__(self,eta=0.001,beta1=0.9,beta2=0.999):
        self.eta = eta
        self.beta1 = beta1
        self.beta2 = beta2
        self.i = 1
        self.m = 0
        self.v = 1e-7
    def __call__(self,w,gw):
        self.m = self.beta1*self.m+(1-self.beta1)*gw
        self.v = self.beta2*self.v+(1-self.beta2)*gw**2
        w += -self.eta*np.sqrt(1-self.beta2**self.i)/(1-self.beta1**self.i)*self.m/np.sqrt(self.v)
        self.i += 1
def sigmoid(x):
    return 1/(1+np.exp(-x))
class ThotthoiLogistic:
    def __init__(self,opt):
        self.opt = opt # เก็บ optimizer แทนที่จะเก็บอัตราการเรียนรู้ (η)
    def rianru(self,X,z,n_thamsam,n_batch=0):
        n = len(z)
        if(n_batch==0 or n<n_batch):
            n_batch = n
        X_std = X.std()
        X_std[X_std==0] = 1
        X_mean = X.mean()
        X = (X-X_mean)/X_std
        self.w = np.zeros(X.shape[1]+1)
        gw = self.w*0
        self.entropy = []
        self.thuktong = []
        for j in range(n_thamsam):
            lueak = np.random.permutation(n)
            for i in range(0,n,n_batch):
                Xn = X[lueak[i:i+n_batch]]
                zn = z[lueak[i:i+n_batch]]
                phi = self.ha_sigmoid(Xn)
                eee = (phi-zn)/len(zn)
                gw[1:] = np.dot(eee,Xn)
                gw[0] = eee.sum()
                self.opt(self.w,gw) # ใช้ optimizer เพื่อปรับค่าน้ำหนัก
            thukmai = self.thamnai(X)==z
            self.thuktong += [thukmai.mean()*100]
            self.entropy += [self.ha_entropy(X,z)]
        self.w[1:] /= X_std
        self.w[0] -= (self.w[1:]*X_mean).sum()
    def thamnai(self,X):
        return np.dot(X,self.w[1:])+self.w[0]>0
    def ha_sigmoid(self,X):
        return sigmoid(np.dot(X,self.w[1:])+self.w[0])
    def ha_entropy(self,X,z):
        phi = self.ha_sigmoid(X)
        return -(z*np.log(phi+1e-7)+(1-z)*np.log(1-phi+1e-7)).mean()
from sklearn import datasets
np.random.seed(4)
X,z = datasets.make_blobs(n_samples=12000,n_features=2,centers=2,cluster_std=2,random_state=2)
tl = ThotthoiLogistic(Adagrad(eta=1)) # ใส่ออบเจ็กต์ของออปทิไมเซอร์ไปแทนที่จะใส่แค่ eta โดยตรง
tl.rianru(X,z,n_thamsam=50,n_batch=150)
plt.figure(figsize=[6,8])
x_sen = np.array([X[:,0].min(),X[:,0].max()])
y_sen = -(tl.w[0]+tl.w[1]*x_sen)/tl.w[2]
tm = tl.thamnai(X)==z
plt.axes(aspect=1,xlim=[X[:,0].min(),X[:,0].max()],ylim=[X[:,1].min(),X[:,1].max()])
plt.plot(x_sen,y_sen,'y',lw=3,zorder=0)
plt.scatter(X[tm,0],X[tm,1],c=z[tm],alpha=0.5,s=20,edgecolor='k',lw=0.5,cmap='winter')
plt.scatter(X[~tm,0],X[~tm,1],c=z[~tm],alpha=0.5,s=20,edgecolor='r',cmap='winter')
plt.show()
 plt.figure(figsize=[8,8])
ax1 = plt.subplot(211)
ax1.set_title(u'เอนโทรปี',fontname='Tahoma')
ax1.tick_params(labelbottom='off')
ax2 = plt.subplot(212)
ax2.set_title(u'% ถูก',fontname='Tahoma')
opt = [Sgd(0.2),
       Mmtsgd(0.2),
       Nag(0.2),
       Adagrad(0.2),
       Adadelta(0.2),
       Adam(0.2)]
for o in opt:
    tl = ThotthoiLogistic(o)
    tl.rianru(X,z,n_thamsam=40,n_batch=150)
    si = np.random.random(3)
    ax1.plot(tl.entropy,color=si)
    ax2.plot(tl.thuktong,color=si)
ax2.legend(['SGD','Momentum','NAG','AdaGrad','AdaDelta','Adam'],ncol=2)
plt.show()

ติดตามอัปเดตของบล็อกได้ที่แฟนเพจ