当前位置: 移动技术网 > IT编程>脚本编程>Python > python实现逻辑回归

python实现逻辑回归

2020年07月22日  | 移动技术网IT编程  | 我要评论

1.自定义代码实现

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split


def sigmoid(z):
    s = 1 / (1 + np.exp(-z))
    s = s.reshape(s.shape[0], 1)  # s.shape[0]表示求数组的长度
    return s


def draw_sigmoid():
    x = np.arange(-6, 6, .01)  # 返回一个有起点有终点且固定步长的排列,左闭右开
    y = sigmoid(x)

    plt.plot(x, y, color='red', lw=2)
    plt.show()


def model(theta, X):
    z = np.sum(theta.T * X, axis=1)  # 压缩列
    return sigmoid(z)


# 定义损失函数
# h(x)
def cross_entropy(y, y_hat):
    n_samples = y.shape[0]
    return sum(-y * np.log(y_hat) - (1 - y) * np.log(1 - y_hat)) / n_samples


def cost_function(theta, X, y):
    y_hat = model(theta, X)
    return cross_entropy(y, y_hat)


# 梯度下降
def optimize(theta, X, y):
    n = X.shape[0]
    alpha = 1e-1
    y_hat = model(theta, X)
    dtheta = (1.0 / n) * ((y_hat - y) * X)
    dtheta = np.sum(dtheta, axis=0)  # 压缩行
    dtheta = dtheta.reshape((31, 1))
    theta = theta - alpha * dtheta
    return theta


# 对数据进行迭代
def iterate(theta, X, y, times):
    costs = []
    accs = []
    for i in range(times):
        theta = optimize(theta, X, y)
        costs.append(cost_function(theta, X, y))
        accs.append(accuracy(theta, X, y))

    return theta, costs, accs


# 对数据进行评估
def predict_proba(theta, X):
    y_hat = model(theta, X)
    return y_hat


def predict(X, theta):
    y_hat = predict_proba(theta, X)
    y_hard = (y_hat > 0.5) * 1
    return y_hard


def accuracy(theta, X, y):
    y_hard = predict(X, theta)
    count_right = sum(y_hard == y)
    return count_right * 1.0 / len(y)


# 载入数据
dataset = load_breast_cancer()
data = pd.DataFrame(data=dataset.data, columns=dataset.feature_names)
data['cancer'] = [dataset.target_names[t] for t in dataset.target]

# 赋值数据  shape[0] shape[1]代表数据的维度
X = dataset.data
y = dataset.target
n_features = X.shape[1]

std = X.std(axis=0)  # 按照行 竖直方向计算标准差
mean = X.mean(axis=0)  # 按照行 竖直方向计算均值
X_norm = (X - mean) / std  # 标准差标准化,经过处理的数据符合标准正态分布


def add_ones(X):
    ones = np.ones((X.shape[0], 1))
    X_with_ones = np.hstack((ones, X))
    return X_with_ones


X_with_ones = add_ones(X_norm)

X_train, X_test, y_train, y_test = train_test_split(X_with_ones, y, test_size=0.3, random_state=12345)
y_train = y_train.reshape((y_train.shape[0], 1))
y_test = y_test.reshape((y_test.shape[0], 1))

# 应用算法
theta = np.ones((n_features+1,1))
theta, costs, accs = iterate(theta, X_train, y_train, 1500)
plt.plot(costs)    # 画出代价函数
plt.plot(accs)     # 画出准确率变化
plt.show()
print(accuracy(theta, X_test, y_test))

2.库函数调用

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
dataset = load_breast_cancer()
X = dataset.data
y = dataset.target
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=12345)

lr=LogisticRegression()
lr.fit(X_train,y_train)
print(lr.score(X_train,y_train))
print(lr.score(X_test,y_test))

本文地址:https://blog.csdn.net/qq_40690199/article/details/107466555

如对本文有疑问, 点击进行留言回复!!

相关文章:

验证码:
移动技术网