特征工程之特征抽取
安装 sklearn
使用 anaconda
conda install Scikit-learn
字典特征抽取
from sklearn.feature_extraction import DictVectorizer
def dictvec():
"""
字典数据抽取 :return:None
"""
#实例化
dict = DictVectorizer(sparse=False)
#调用fit_transform
data = dict.fit_transform([{'city':'北京','temperature':100},{'city':'上海','temperature':80},{'city':'深圳','temperature':60}])
#打印特征值
print(dict.get_feature_names())
print(data)
return None
if __name__ == "__main__":
dictvec()
打印结果
['city=上海', 'city=北京', 'city=深圳', 'temperature']
[[ 0. 1. 0. 100.]
[ 1. 0. 0. 80.]
[ 0. 0. 1. 60.]]
文本特征抽取
# 特征抽取
#
# 导入包
from sklearn.feature_extraction.text import CountVectorizer
#实例化CountVectorizer
vertorizer = CountVectorizer()
#调用fit_transform输入并转化数据
res = vertorizer.fit_transform(["left is short,i like python","lift is too long,i dislike python","lift is three,i not python"])
#打印结果
print(vertorizer.get_feature_names())
print(res.toarray())
打印结果
['dislike', 'is', 'left', 'lift', 'like', 'long', 'not', 'python', 'short', 'three', 'too']
[[0 1 1 0 1 0 0 1 1 0 0]
[1 1 0 1 0 1 0 1 0 0 1]
[0 1 0 1 0 0 1 1 0 1 0]]
one-hot 编码
视频
<video style="width:100%;height:100%;" src="http://pg7op1zfx.bkt.clouddn.com/02_%E7%89%B9%E5%BE%81%E5%B7%A5%E7%A8%8B%E4%B9%8B%E5%AD%97%E5%85%B8%E7%89%B9%E5%BE%81%E6%8A%BD%E5%8F%96.mp4" controls="controls">
评论已关闭