分享

基于机器学习的临床决策支持

 DrugAI 2022-04-19

声明:本文示例来自于GitHub用户vkasojhaa的项目,一切权利归其所有,此处仅是自己学习分享。


实现了基于机器学习的乳腺癌的恶性和良性预测,比较了不同机器学习算法之间的性能。主要目的是评估在每种算法的准确性和效率方面对数据进行分类的正确性。

基于机器学习的乳腺癌预测

代码示例

  1. #导入依赖库

  2. #!/usr/bin/python3

  3. import numpy as np

  4. import pandas as pd

  5. import matplotlib.pyplot as plt

  6. import matplotlib.gridspec as gridspec

  7. import seaborn as sns

  8. import time

  9. %matplotlib inline

  10. #Import models from scikit learn module:

  11. from sklearn.model_selection import train_test_split

  12. from sklearn.linear_model import LogisticRegression

  13. from sklearn.model_selection import KFold

  14. from sklearn.model_selection import cross_val_score

  15. from sklearn.cross_validation import KFold  

  16. from sklearn.neighbors import KNeighborsClassifier

  17. from sklearn.tree import DecisionTreeClassifier

  18. from sklearn.model_selection import GridSearchCV

  19. from sklearn.preprocessing import StandardScaler

  20. from sklearn.model_selection import GridSearchCV

  21. from sklearn.pipeline import Pipeline

  22. from sklearn.svm import SVC

  23. from sklearn import metrics

  1. #载入数据

  2. data = pd.read_csv("data.csv")

  3. data.head()

  1. #数据预处理

  2. data.drop('id',axis=1,inplace=True) #移除id

  3. data.drop('Unnamed: 32',axis=1,inplace=True)

  4. print("Row, Col", data.shape)# (row,col)

  1. #数据标记,M:恶性,B:良性

  2. data['diagnosis'] = data['diagnosis'].map({'M':1,'B':0})

  3. data.head()

  1. #挖掘数据

  2. data.describe()

探索数据

  1. data.plot(kind='density', subplots=True, layout=(5,7), sharex=False, legend=False, fontsize=1)

  2. plt.show()

  1. print(data.groupby('diagnosis').size())

  2. sns.countplot(data['diagnosis'],label="Count")

  3. plt.show()


  1. #划分训练集和测试集

  2. traindf, testdf = train_test_split(data, test_size = 0.3)

  3. labels = 'Train', 'Test'

  4. plt.pie([70, 30], labels=labels, autopct='%1.1f%%', shadow=True)

  5. plt.show()

  6. print("Train set", traindf.shape)

  7. print("Test set", testdf.shape)


  1. features_mean= list(data.columns[1:11])

  2. corr = data[features_mean].corr()

  3. plt.figure(figsize=(14,14))

  4. sns.heatmap(corr, cbar = True,  square = True, annot=True, fmt= '.2f',annot_kws={'size': 15},

  5.           xticklabels= features_mean, yticklabels= features_mean,

  6.           cmap= 'coolwarm')

  7. plt.show()

模型分类

  1. #用于模型分类和访问性能的通用函数。

  2. Y = data['diagnosis'].values

  3. X = data.drop('diagnosis', axis=1).values

  4. X_train, X_test, Y_train, Y_test = train_test_split (X, Y, test_size = 0.30, random_state=21)

  5. def classification_model(model, data, predictors, outcome):

  6.    #拟合模型

  7.    model.fit(data[predictors],data[outcome])

  8.    #对训练集进行预测

  9.    predictions = model.predict(data[predictors])

  10.    #输出准确性

  11.    accuracy = metrics.accuracy_score(predictions,data[outcome])

  12.    print("Accuracy : %s" % "{0:.3%}".format(accuracy))

  13.  #Perform k-fold cross-validation with 5 folds

  14.    kfold = KFold(data.shape[0], n_folds=5)

  15.    error = []

  16.    for train, test in kfold:

  17.         #过滤数据

  18.        train_predictors = (data[predictors].iloc[train,:])

  19.        # 目的在于训练算法

  20.        train_target = data[outcome].iloc[train]

  21.       # 使用预测变量和目标训练算法。

  22.        model.fit(train_predictors, train_target)

  23.       #记录每次交叉验证运行的错误

  24.        error.append(model.score(data[predictors].iloc[test,:], data[outcome].iloc[test]))

  25.        cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring='accuracy')

  26.    print("Cross-Validation Score : %s" % "{0:.3%}".format(np.mean(error)))

  27.  #Fit the model again so that it can be refered outside the function:

  28.    model.fit(data[predictors],data[outcome])


逻辑回归模型

  1. predictor_var = ['texture_mean','perimeter_mean','smoothness_mean','compactness_mean','symmetry_mean']

  2. outcome_var='diagnosis'

  3. model=LogisticRegression()

  4. classification_model(model,traindf,predictor_var,outcome_var)

Accuracy : 91.206%
Cross-Validation Score : 90.206%

决策树模型

  1. predictor_var = ['texture_mean','perimeter_mean','smoothness_mean','compactness_mean','symmetry_mean']

  2. model = DecisionTreeClassifier()

  3. classification_model(model,traindf,predictor_var,outcome_var)

Accuracy : 100.000%
Cross-Validation Score : 87.446%
  1. predictor_var = ['texture_mean']

  2. model = DecisionTreeClassifier()

  3. classification_model(model,traindf,predictor_var,outcome_var)

Accuracy : 96.231%
Cross-Validation Score : 66.329%


k一近邻模型

  1. predictor_var = ['texture_mean','perimeter_mean','smoothness_mean','compactness_mean','symmetry_mean']

  2. model= KNeighborsClassifier()

  3. classification_model(model,traindf,predictor_var,outcome_var)

Accuracy : 92.462%
Cross-Validation Score : 89.456%

支持向量机模型

  1. predictor_var = ['texture_mean','perimeter_mean','smoothness_mean','compactness_mean','symmetry_mean']

  2. model= SVC()

  3. classification_model(model,traindf,predictor_var,outcome_var)

Accuracy : 94.472%
Cross-Validation Score : 87.937%
  1. #几种机器学习模型的性能比较

  2. Y = data['diagnosis'].values

  3. X = data.drop('diagnosis', axis=1).values

  4. X_train, X_test, Y_train, Y_test = train_test_split (X, Y, test_size = 0.30, random_state=21)

  5. models_list = []

  6. models_list.append(('LR', LogisticRegression()))

  7. models_list.append(('DT', DecisionTreeClassifier()))

  8. models_list.append(('SVM', SVC()))

  9. models_list.append(('KNN', KNeighborsClassifier()))

  10. num_folds = 10

  11. results = []

  12. names = []

  13. for name, model in models_list:

  14.    start = time.time()

  15.    cv_results = cross_val_score(model, X_train, Y_train, cv=num_folds, scoring='accuracy')

  16.    end = time.time()

  17.    results.append(cv_results)

  18.    names.append(name)

  19.    print( "%s:(run time: %f)"% (name, end-start))

LR:(run time: 0.069959)
DT:(run time: 0.047665)
SVM:(run time: 0.156240)
KNN:(run time: 0.029838)

Connecting artificial intelligence (AI) with pharmaceutical sciences.


参考资料:

https://github.com/vkasojhaa/Clinical-Decision-Support-using-Machine-Learning

    转藏 分享 献花(0

    0条评论

    发表

    请遵守用户 评论公约

    类似文章 更多