import pandas as pd
import numpy as np
from skrebate import ReliefF
from sklearn import preprocessing
import time
path_input_feature = "F:\\PaperCode\\Mypaper_python_code\\data\\feature_select\\input.tsv"
path_input_label = "F:\\PaperCode\\Mypaper_python_code\\data\\feature_select\\output.tsv"
features = pd.read_csv(path_input_feature, sep="\t")
labels = pd.read_csv(path_input_label)
# 补充缺失值
features.fillna(0, inplace = True)
# 将输入转化为数组
features = np.asarray(features.values)
# 高位数组映射坐标轴
labels = np.transpose(np.asarray(labels.values.ravel() - 1, dtype=int))
# 默认将特征范围缩放到0-1之间
min_max_scaler = preprocessing.MinMaxScaler()
features = min_max_scaler.fit_transform(features)
# 获取当前时间
current_time = int(time.time())
# 转换为localtime
localtime = time.localtime(current_time)
# 利用strftime()函数重新格式化时间
dt = time.strftime('%Y:%m:%d %H:%M:%S', localtime)
print(dt) # 返回当前时间:2021:09:09 19:17:29
####################################
# reliefF算法实现
fs = ReliefF()
fs.fit(features, labels)
###################################
# 获取当前时间
current_time = int(time.time())
# 转换为localtime
localtime = time.localtime(current_time)
# 利用strftime()函数重新格式化时间
dt = time.strftime('%Y:%m:%d %H:%M:%S', localtime)
print(dt) # 返回当前时间:2021:09:09 19:17:29
np.savetxt('F:\\PaperCode\\Mypaper_python_code\\data\\feature_select\\counts_matrix_mean_relieff.txt', fs.feature_importances_)
# start:2024:03:13 22:01:28
# end:2024:03:14 11:20:39
input.tsv : 2477×59427维度的矩阵,行为样本,列为基因,矩阵中每个值为基因在对应样本中的表达量。
output.tsv :2477×1维度的向量,行为样本,列为标签,向量中每个值为样本的标签值。