课程结束之后提交到仓库保存
This commit is contained in:
parent
02485f32ee
commit
71f31ea634
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,383 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"FILE = \"T2-fisher.txt\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 0 1 2 3\n",
|
||||
"0 9 8 7 1\n",
|
||||
"1 7 6 6 1\n",
|
||||
"2 10 7 8 1\n",
|
||||
"3 8 4 5 1\n",
|
||||
"4 9 9 3 1\n",
|
||||
"5 8 6 7 1\n",
|
||||
"6 7 5 6 1\n",
|
||||
"7 8 4 4 0\n",
|
||||
"8 3 6 6 0\n",
|
||||
"9 6 3 3 0\n",
|
||||
"10 6 4 5 0\n",
|
||||
"11 8 2 2 0\n",
|
||||
"(12, 4)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 最后一维是标签 y\n",
|
||||
"df = pd.read_csv(FILE, sep=',', header=None)\n",
|
||||
"print(df)\n",
|
||||
"print(df.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(12, 3) (12,)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"E:\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access\n",
|
||||
" \"\"\"Entry point for launching an IPython kernel.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data = df.valuesX, Y = data[:, :-1], data[:, -1]\n",
|
||||
"print(X.shape, Y.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"x 0: (5, 3)\n",
|
||||
"x 1: (7, 3)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 分开两类\n",
|
||||
"x0, x1 = [], []\n",
|
||||
"\n",
|
||||
"for i in range(X.shape[0]):\n",
|
||||
" if Y[i] == 0:\n",
|
||||
" x0.append(X[i:i+1])\n",
|
||||
" else:\n",
|
||||
" x1.append(X[i:i+1])\n",
|
||||
"\n",
|
||||
"x0 = np.vstack(x0)\n",
|
||||
"x1 = np.vstack(x1)\n",
|
||||
"\n",
|
||||
"# [n_sample, n_dim]\n",
|
||||
"print(\"x 0:\", x0.shape)\n",
|
||||
"print(\"x 1:\", x1.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"#x0: 5 \n",
|
||||
"#x1: 7\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"num_0 = x0.shape[0]\n",
|
||||
"num_1 = x1.shape[0]\n",
|
||||
"\n",
|
||||
"print(\"#x0:\", num_0, \"\\n#x1:\", num_1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"---x0_bar:\n",
|
||||
" [6.2 3.8 4. ]\n",
|
||||
"---x1_bar:\n",
|
||||
" [8.28571429 6.42857143 6. ]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 各维度沿样本求平均\n",
|
||||
"x0_bar = np.mean(x0, axis=0)\n",
|
||||
"x1_bar = np.mean(x1, axis=0)\n",
|
||||
"\n",
|
||||
"print(\"---x0_bar:\\n\", x0_bar)\n",
|
||||
"print(\"---x1_bar:\\n\", x1_bar)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--- A:\n",
|
||||
" [[ 0.71428571 1.57142857 1. ]\n",
|
||||
" [-1.28571429 -0.42857143 0. ]\n",
|
||||
" [ 1.71428571 0.57142857 2. ]\n",
|
||||
" [-0.28571429 -2.42857143 -1. ]\n",
|
||||
" [ 0.71428571 2.57142857 -3. ]\n",
|
||||
" [-0.28571429 -0.42857143 1. ]\n",
|
||||
" [-1.28571429 -1.42857143 0. ]] \n",
|
||||
"--- B:\n",
|
||||
" [[ 1.8 0.2 0. ]\n",
|
||||
" [-3.2 2.2 2. ]\n",
|
||||
" [-0.2 -0.8 -1. ]\n",
|
||||
" [-0.2 0.2 1. ]\n",
|
||||
" [ 1.8 -1.8 -2. ]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 求 A、B\n",
|
||||
"A = x1 - x1_bar\n",
|
||||
"B = x0 - x0_bar\n",
|
||||
"\n",
|
||||
"print(\"--- A:\\n\", A, \"\\n--- B:\\n\", B)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--- S1:\n",
|
||||
" [[ 7.42857143 7.14285714 2. ]\n",
|
||||
" [ 7.14285714 17.71428571 -3. ]\n",
|
||||
" [ 2. -3. 16. ]] \n",
|
||||
"--- S2:\n",
|
||||
" [[ 16.8 -9.8 -10. ]\n",
|
||||
" [ -9.8 8.8 9. ]\n",
|
||||
" [-10. 9. 10. ]]\n",
|
||||
"--- S:\n",
|
||||
" [[24.22857143 -2.65714286 -8. ]\n",
|
||||
" [-2.65714286 26.51428571 6. ]\n",
|
||||
" [-8. 6. 26. ]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 离差矩阵 S\n",
|
||||
"S1 = np.dot(A.T, A) # S1 = A.T x A\n",
|
||||
"S2 = np.dot(B.T, B) # S2 = B.T x B\n",
|
||||
"S = S1 + S2 # S = S1 + S2\n",
|
||||
"\n",
|
||||
"print(\"--- S1:\\n\", S1, \"\\n--- S2:\\n\", S2)\n",
|
||||
"print(\"--- S:\\n\", S)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.12745741 0.09034737 0.09529135]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 解 c\n",
|
||||
"# Sc = (x1_bar - x0_bar)\n",
|
||||
"c = np.linalg.solve(S, x1_bar - x0_bar)\n",
|
||||
"print(c)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--- ya:\n",
|
||||
" 2.208628264548775\n",
|
||||
"--- yb:\n",
|
||||
" 1.5147213226864054\n",
|
||||
"--- y0:\n",
|
||||
" 1.919500372106121\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 判别临界值\n",
|
||||
"ya = np.dot(x1_bar, c)\n",
|
||||
"yb = np.dot(x0_bar, c)\n",
|
||||
"\n",
|
||||
"y0 = (ya * num_1 + yb * num_0) / (num_1 + num_0)\n",
|
||||
"\n",
|
||||
"print(\"--- ya:\\n\", ya)\n",
|
||||
"print(\"--- yb:\\n\", yb)\n",
|
||||
"print(\"--- y0:\\n\", y0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 预测新数据\n",
|
||||
"x_new = np.array([\n",
|
||||
" [9, 5, 4]\n",
|
||||
"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--- y_new:\n",
|
||||
" [1.98001891]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 新数据判别值\n",
|
||||
"y_new = np.dot(x_new, c)\n",
|
||||
"print(\"--- y_new:\\n\", y_new)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"label: 1\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 判断类别\n",
|
||||
"# 比较同 y0 的大小关系\n",
|
||||
"# 如果同 ya 一样,就跟 ya 同类\n",
|
||||
"# 否则同 yb 同类\n",
|
||||
"\n",
|
||||
"label = None\n",
|
||||
"if ya > y0:\n",
|
||||
" label = 1 if y_new > y0 else 0\n",
|
||||
"else: # ya < y0\n",
|
||||
" label = 1 if y_new < y0 else 0\n",
|
||||
"print(\"label: \", label)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.5"
|
||||
},
|
||||
"latex_envs": {
|
||||
"LaTeX_envs_menu_present": true,
|
||||
"autoclose": false,
|
||||
"autocomplete": true,
|
||||
"bibliofile": "biblio.bib",
|
||||
"cite_by": "apalike",
|
||||
"current_citInitial": 1,
|
||||
"eqLabelWithNumbers": true,
|
||||
"eqNumInitial": 1,
|
||||
"hotkeys": {
|
||||
"equation": "Ctrl-E",
|
||||
"itemize": "Ctrl-I"
|
||||
},
|
||||
"labels_anchors": false,
|
||||
"latex_user_defs": false,
|
||||
"report_style_numbering": false,
|
||||
"user_envs_cfg": false
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,473 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(array([[ 9, 8, 7],\n",
|
||||
" [ 7, 6, 6],\n",
|
||||
" [10, 7, 8],\n",
|
||||
" [ 8, 4, 5],\n",
|
||||
" [ 9, 9, 3],\n",
|
||||
" [ 8, 6, 7],\n",
|
||||
" [ 7, 5, 6]]), array([[8, 4, 4],\n",
|
||||
" [3, 6, 6],\n",
|
||||
" [6, 3, 3],\n",
|
||||
" [6, 4, 5],\n",
|
||||
" [8, 2, 2]]))"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"a = np.asarray([[9,8,7],[7,6,6],[10,7,8],[8,4,5],[9,9,3],[8,6,7],[7,5,6]])\n",
|
||||
"b = np.asarray([[8,4,4],[3,6,6],[6,3,3],[6,4,5],[8,2,2]])\n",
|
||||
"a,b"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([8.28571429, 6.42857143, 6. ])"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"a_mean = np.mean(a,axis=0)\n",
|
||||
"a_mean"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([6.2, 3.8, 4. ])"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"b_mean = np.mean(b,axis=0)\n",
|
||||
"b_mean"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[ 0.71428571, 1.57142857, 1. ],\n",
|
||||
" [-1.28571429, -0.42857143, 0. ],\n",
|
||||
" [ 1.71428571, 0.57142857, 2. ],\n",
|
||||
" [-0.28571429, -2.42857143, -1. ],\n",
|
||||
" [ 0.71428571, 2.57142857, -3. ],\n",
|
||||
" [-0.28571429, -0.42857143, 1. ],\n",
|
||||
" [-1.28571429, -1.42857143, 0. ]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 计算离差矩\n",
|
||||
"A = a-a_mean\n",
|
||||
"A"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[ 1.8, 0.2, 0. ],\n",
|
||||
" [-3.2, 2.2, 2. ],\n",
|
||||
" [-0.2, -0.8, -1. ],\n",
|
||||
" [-0.2, 0.2, 1. ],\n",
|
||||
" [ 1.8, -1.8, -2. ]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"B = b-b_mean\n",
|
||||
"B"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[ 7.42857143, 7.14285714, 2. ],\n",
|
||||
" [ 7.14285714, 17.71428571, -3. ],\n",
|
||||
" [ 2. , -3. , 16. ]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"S_a = A.T.dot(A)\n",
|
||||
"S_a"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[ 16.8, -9.8, -10. ],\n",
|
||||
" [ -9.8, 8.8, 9. ],\n",
|
||||
" [-10. , 9. , 10. ]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"S_b = B.T.dot(B)\n",
|
||||
"S_b"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[24.22857143, -2.65714286, -8. ],\n",
|
||||
" [-2.65714286, 26.51428571, 6. ],\n",
|
||||
" [-8. , 6. , 26. ]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"S=S_a+S_b\n",
|
||||
"S"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([0.12745741, 0.09034737, 0.09529135])"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"c = np.linalg.inv(S).dot(a_mean-b_mean)\n",
|
||||
"c # 判别系数,最优解由拉格朗日方法得出"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 得出判别函数y = cx"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"2.2086282645487754"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 求出判别临界值\n",
|
||||
"# 购买组的平均值对应的判别值:\n",
|
||||
"y_a = np.sum(c*a_mean)\n",
|
||||
"y_a"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1.5147213226864054"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 非购买组的平均值对应的判别值为:\n",
|
||||
"y_b = np.sum(c*b_mean)\n",
|
||||
"y_b"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y0 = (len(a)*y_a+len(b)*y_b )/ (len(a)+len(b))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1.9195003721061212"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"y0"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"7"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(a)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[ 9, 8, 7],\n",
|
||||
" [ 7, 6, 6],\n",
|
||||
" [10, 7, 8],\n",
|
||||
" [ 8, 4, 5],\n",
|
||||
" [ 9, 9, 3],\n",
|
||||
" [ 8, 6, 7],\n",
|
||||
" [ 7, 5, 6]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"a"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[8, 4, 4],\n",
|
||||
" [3, 6, 6],\n",
|
||||
" [6, 3, 3],\n",
|
||||
" [6, 4, 5],\n",
|
||||
" [8, 2, 2]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"b"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([9, 5, 4])"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"x = np.asarray([9,5,4])\n",
|
||||
"x"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1.9800189070136982"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"y = np.sum(c*x)\n",
|
||||
"y"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"y > y0"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "tensorflow",
|
||||
"language": "python",
|
||||
"name": "tensorflow"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
Binary file not shown.
32
README.md
32
README.md
|
@ -2,35 +2,3 @@
|
|||
|
||||
#### Description
|
||||
广东工业大学高级人工智能课程
|
||||
|
||||
#### Software Architecture
|
||||
Software architecture description
|
||||
|
||||
#### Installation
|
||||
|
||||
1. xxxx
|
||||
2. xxxx
|
||||
3. xxxx
|
||||
|
||||
#### Instructions
|
||||
|
||||
1. xxxx
|
||||
2. xxxx
|
||||
3. xxxx
|
||||
|
||||
#### Contribution
|
||||
|
||||
1. Fork the repository
|
||||
2. Create Feat_xxx branch
|
||||
3. Commit your code
|
||||
4. Create Pull Request
|
||||
|
||||
|
||||
#### Gitee Feature
|
||||
|
||||
1. You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md
|
||||
2. Gitee blog [blog.gitee.com](https://blog.gitee.com)
|
||||
3. Explore open source project [https://gitee.com/explore](https://gitee.com/explore)
|
||||
4. The most valuable open source project [GVP](https://gitee.com/gvp)
|
||||
5. The manual of Gitee [https://gitee.com/help](https://gitee.com/help)
|
||||
6. The most popular members [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/)
|
||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,355 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"FILE = \"T2-fisher.txt\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 0 1 2 3\n",
|
||||
"0 9 8 7 1\n",
|
||||
"1 7 6 6 1\n",
|
||||
"2 10 7 8 1\n",
|
||||
"3 8 4 5 1\n",
|
||||
"4 9 9 3 1\n",
|
||||
"5 8 6 7 1\n",
|
||||
"6 7 5 6 1\n",
|
||||
"7 8 4 4 0\n",
|
||||
"8 3 6 6 0\n",
|
||||
"9 6 3 3 0\n",
|
||||
"10 6 4 5 0\n",
|
||||
"11 8 2 2 0\n",
|
||||
"(12, 4)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 最后一维是标签 y\n",
|
||||
"df = pd.read_csv(FILE, sep=',', header=None)\n",
|
||||
"print(df)\n",
|
||||
"print(df.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(12, 3) (12,)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"E:\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access\n",
|
||||
" \"\"\"Entry point for launching an IPython kernel.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data = df.valuesX, Y = data[:, :-1], data[:, -1]\n",
|
||||
"print(X.shape, Y.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"x0 = np.asarray([[0,0],[0,-1],[1,1]])\n",
|
||||
"x1 = np.asarray([[-1,0],[0,1]])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"#x0: 3 \n",
|
||||
"#x1: 2\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"num_0 = x0.shape[0]\n",
|
||||
"num_1 = x1.shape[0]\n",
|
||||
"\n",
|
||||
"print(\"#x0:\", num_0, \"\\n#x1:\", num_1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"---x0_bar:\n",
|
||||
" [0.33333333 0. ]\n",
|
||||
"---x1_bar:\n",
|
||||
" [-0.5 0.5]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 各维度沿样本求平均\n",
|
||||
"x0_bar = np.mean(x0, axis=0)\n",
|
||||
"x1_bar = np.mean(x1, axis=0)\n",
|
||||
"\n",
|
||||
"print(\"---x0_bar:\\n\", x0_bar)\n",
|
||||
"print(\"---x1_bar:\\n\", x1_bar)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--- A:\n",
|
||||
" [[-0.5 -0.5]\n",
|
||||
" [ 0.5 0.5]] \n",
|
||||
"--- B:\n",
|
||||
" [[-0.33333333 0. ]\n",
|
||||
" [-0.33333333 -1. ]\n",
|
||||
" [ 0.66666667 1. ]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 求 A、B\n",
|
||||
"A = x1 - x1_bar\n",
|
||||
"B = x0 - x0_bar\n",
|
||||
"\n",
|
||||
"print(\"--- A:\\n\", A, \"\\n--- B:\\n\", B)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--- S1:\n",
|
||||
" [[0.5 0.5]\n",
|
||||
" [0.5 0.5]] \n",
|
||||
"--- S2:\n",
|
||||
" [[0.66666667 1. ]\n",
|
||||
" [1. 2. ]]\n",
|
||||
"--- S:\n",
|
||||
" [[1.16666667 1.5 ]\n",
|
||||
" [1.5 2.5 ]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 离差矩阵 S\n",
|
||||
"S1 = np.dot(A.T, A) # S1 = A.T x A\n",
|
||||
"S2 = np.dot(B.T, B) # S2 = B.T x B\n",
|
||||
"S = S1 + S2 # S = S1 + S2\n",
|
||||
"\n",
|
||||
"print(\"--- S1:\\n\", S1, \"\\n--- S2:\\n\", S2)\n",
|
||||
"print(\"--- S:\\n\", S)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-4.25 2.75]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 解 c\n",
|
||||
"# Sc = (x1_bar - x0_bar)\n",
|
||||
"c = np.linalg.solve(S, x1_bar - x0_bar)\n",
|
||||
"print(c)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--- ya:\n",
|
||||
" 3.499999999999999\n",
|
||||
"--- yb:\n",
|
||||
" -1.4166666666666663\n",
|
||||
"--- y0:\n",
|
||||
" 0.5499999999999998\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 判别临界值\n",
|
||||
"ya = np.dot(x1_bar, c)\n",
|
||||
"yb = np.dot(x0_bar, c)\n",
|
||||
"\n",
|
||||
"y0 = (ya * num_1 + yb * num_0) / (num_1 + num_0)\n",
|
||||
"\n",
|
||||
"print(\"--- ya:\\n\", ya)\n",
|
||||
"print(\"--- yb:\\n\", yb)\n",
|
||||
"print(\"--- y0:\\n\", y0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 预测新数据\n",
|
||||
"x_new = np.array([\n",
|
||||
" [9, 5, 4]\n",
|
||||
"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "ValueError",
|
||||
"evalue": "shapes (1,3) and (2,) not aligned: 3 (dim 1) != 2 (dim 0)",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;32m<ipython-input-10-d5f4676ab255>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# 新数据判别值\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0my_new\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_new\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"--- y_new:\\n\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_new\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;31mValueError\u001b[0m: shapes (1,3) and (2,) not aligned: 3 (dim 1) != 2 (dim 0)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 新数据判别值\n",
|
||||
"y_new = np.dot(x_new, c)\n",
|
||||
"print(\"--- y_new:\\n\", y_new)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"label: 1\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 判断类别\n",
|
||||
"# 比较同 y0 的大小关系\n",
|
||||
"# 如果同 ya 一样,就跟 ya 同类\n",
|
||||
"# 否则同 yb 同类\n",
|
||||
"\n",
|
||||
"label = None\n",
|
||||
"if ya > y0:\n",
|
||||
" label = 1 if y_new > y0 else 0\n",
|
||||
"else: # ya < y0\n",
|
||||
" label = 1 if y_new < y0 else 0\n",
|
||||
"print(\"label: \", label)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.5"
|
||||
},
|
||||
"latex_envs": {
|
||||
"LaTeX_envs_menu_present": true,
|
||||
"autoclose": false,
|
||||
"autocomplete": true,
|
||||
"bibliofile": "biblio.bib",
|
||||
"cite_by": "apalike",
|
||||
"current_citInitial": 1,
|
||||
"eqLabelWithNumbers": true,
|
||||
"eqNumInitial": 1,
|
||||
"hotkeys": {
|
||||
"equation": "Ctrl-E",
|
||||
"itemize": "Ctrl-I"
|
||||
},
|
||||
"labels_anchors": false,
|
||||
"latex_user_defs": false,
|
||||
"report_style_numbering": false,
|
||||
"user_envs_cfg": false
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,456 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(array([[ 0, 0],\n",
|
||||
" [ 0, -1],\n",
|
||||
" [ 1, 1]]), array([[-1, 0],\n",
|
||||
" [ 0, 1]]))"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"a = np.asarray([[0,0],[0,-1],[1,1]])\n",
|
||||
"b = np.asarray([[-1,0],[0,1]])\n",
|
||||
"a,b"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([0.33333333, 0. ])"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"a_mean = np.mean(a,axis=0)\n",
|
||||
"a_mean"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([-0.5, 0.5])"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"b_mean = np.mean(b,axis=0)\n",
|
||||
"b_mean"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[-0.33333333, 0. ],\n",
|
||||
" [-0.33333333, -1. ],\n",
|
||||
" [ 0.66666667, 1. ]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 计算离差矩\n",
|
||||
"A = a-a_mean\n",
|
||||
"A"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[-0.5, -0.5],\n",
|
||||
" [ 0.5, 0.5]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"B = b-b_mean\n",
|
||||
"B"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[0.66666667, 1. ],\n",
|
||||
" [1. , 2. ]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"S_a = A.T.dot(A)\n",
|
||||
"S_a"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[0.5, 0.5],\n",
|
||||
" [0.5, 0.5]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"S_b = B.T.dot(B)\n",
|
||||
"S_b"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[1.16666667, 1.5 ],\n",
|
||||
" [1.5 , 2.5 ]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"S=S_a+S_b\n",
|
||||
"S"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([ 4.25, -2.75])"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"c = np.linalg.inv(S).dot(a_mean-b_mean)\n",
|
||||
"c # 判别系数,最优解由拉格朗日方法得出"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 得出判别函数y = cx"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1.4166666666666665"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 求出判别临界值\n",
|
||||
"# 购买组的平均值对应的判别值:\n",
|
||||
"y_a = np.sum(c*a_mean)\n",
|
||||
"y_a "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"-3.5"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 非购买组的平均值对应的判别值为:\n",
|
||||
"y_b = np.sum(c*b_mean)\n",
|
||||
"y_b"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y0 = (len(a)*y_a+len(b)*y_b )/ (len(a)+len(b))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"-0.55"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"y0"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"3"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(a)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[ 9, 8, 7],\n",
|
||||
" [ 7, 6, 6],\n",
|
||||
" [10, 7, 8],\n",
|
||||
" [ 8, 4, 5],\n",
|
||||
" [ 9, 9, 3],\n",
|
||||
" [ 8, 6, 7],\n",
|
||||
" [ 7, 5, 6]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"a"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[8, 4, 4],\n",
|
||||
" [3, 6, 6],\n",
|
||||
" [6, 3, 3],\n",
|
||||
" [6, 4, 5],\n",
|
||||
" [8, 2, 2]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"b"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([9, 5, 4])"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"x = np.asarray([9,5,4])\n",
|
||||
"x"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1.9800189070136982"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"y = np.sum(c*x)\n",
|
||||
"y"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"y > y0"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "tensorflow",
|
||||
"language": "python",
|
||||
"name": "tensorflow"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,657 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 题目\n",
|
||||
"1.现有两台服务器(S1, S2) ,都会单向向用户 U 传送数据。服务器 S1 和 S2 之间也会有数据通讯,但无法确定它们之间的数据流向。数据包的传送只取两种可能值:T=1 ( 成功 ) 或 F=2 ( 失败 )。假设贝叶斯网络由S1、S2和U这三个节点构成,现采集了100条该网络的数据传送样本,如文件 server_data.txt 所给出。该文件中,每行代表一个三节点网络的样本, 试利用贝叶斯算法学习得到该网络的结构和参数。( 30分 )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 参考资料\n",
|
||||
"参考学习网址 https://blog.csdn.net/leida_wt/article/details/88743323\n",
|
||||
"\n",
|
||||
"自动设计网络结构的核心问题有两个,一个是评价网络好坏的指标,另一个是查找的方法。穷举是不可取的,因为组合数太大,只能是利用各种启发式方法或是限定搜索条件以减少搜索空间,因此产生两大类方法,Score-based Structure Learning与constraint-based structure learning 以及他们的结合hybrid structure learning。"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2019-12-29T02:45:11.433710Z",
|
||||
"start_time": "2019-12-29T02:45:08.854096Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"from pgmpy.models import BayesianModel\n",
|
||||
"from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator\n",
|
||||
"from pgmpy.estimators import BdeuScore, K2Score, BicScore\n",
|
||||
"import matplotlib.pyplot as plt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2019-12-29T02:45:11.452516Z",
|
||||
"start_time": "2019-12-29T02:45:11.433710Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>S1</th>\n",
|
||||
" <th>S2</th>\n",
|
||||
" <th>U</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>95</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>96</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>97</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>98</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>99</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>100 rows × 3 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" S1 S2 U\n",
|
||||
"0 1 2 1\n",
|
||||
"1 2 2 2\n",
|
||||
"2 2 1 1\n",
|
||||
"3 2 1 1\n",
|
||||
"4 2 1 1\n",
|
||||
".. .. .. ..\n",
|
||||
"95 2 1 1\n",
|
||||
"96 2 1 1\n",
|
||||
"97 2 1 1\n",
|
||||
"98 2 1 1\n",
|
||||
"99 2 1 1\n",
|
||||
"\n",
|
||||
"[100 rows x 3 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data_list = []\n",
|
||||
"with open('server_data.txt') as f:\n",
|
||||
" lines = f.readlines()\n",
|
||||
" for line in lines:\n",
|
||||
" data_list.append(line.strip().split())\n",
|
||||
"data_list = np.array(data_list, dtype=np.int32)\n",
|
||||
"data = pd.DataFrame(data_list, columns=['S1', 'S2', 'U'])\n",
|
||||
"data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2019-12-29T02:45:11.459497Z",
|
||||
"start_time": "2019-12-29T02:45:11.454480Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def showBN(model, save=False):\n",
|
||||
" '''传入BayesianModel对象,调用graphviz绘制结构图,jupyter中可直接显示'''\n",
|
||||
" from graphviz import Digraph\n",
|
||||
" node_attr = dict(\n",
|
||||
" style='filled',\n",
|
||||
" shape='box',\n",
|
||||
" align='left',\n",
|
||||
" fontsize='12',\n",
|
||||
" ranksep='0.1',\n",
|
||||
" height='0.2'\n",
|
||||
" )\n",
|
||||
" dot = Digraph(node_attr=node_attr, graph_attr=dict(size=\"12,12\"))\n",
|
||||
" seen = set()\n",
|
||||
" edges = model.edges()\n",
|
||||
" for a, b in edges:\n",
|
||||
" dot.edge(a, b)\n",
|
||||
" if save:\n",
|
||||
" dot.view(cleanup=True)\n",
|
||||
" return dot"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 根据题目要求分别定义出两种可能的网络"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2019-12-29T02:45:11.608070Z",
|
||||
"start_time": "2019-12-29T02:45:11.461462Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"image/svg+xml": [
|
||||
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\r\n",
|
||||
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\r\n",
|
||||
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\r\n",
|
||||
"<!-- Generated by graphviz version 2.38.0 (20140413.2041)\r\n",
|
||||
" -->\r\n",
|
||||
"<!-- Title: %3 Pages: 1 -->\r\n",
|
||||
"<svg width=\"90pt\" height=\"143pt\"\r\n",
|
||||
" viewBox=\"0.00 0.00 90.00 143.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\r\n",
|
||||
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 139)\">\r\n",
|
||||
"<title>%3</title>\r\n",
|
||||
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-139 86,-139 86,4 -4,4\"/>\r\n",
|
||||
"<!-- S1 -->\r\n",
|
||||
"<g id=\"node1\" class=\"node\"><title>S1</title>\r\n",
|
||||
"<polygon fill=\"lightgrey\" stroke=\"black\" points=\"54,-135 0,-135 0,-114 54,-114 54,-135\"/>\r\n",
|
||||
"<text text-anchor=\"middle\" x=\"27\" y=\"-121.4\" font-family=\"Times New Roman,serif\" font-size=\"12.00\">S1</text>\r\n",
|
||||
"</g>\r\n",
|
||||
"<!-- U -->\r\n",
|
||||
"<g id=\"node2\" class=\"node\"><title>U</title>\r\n",
|
||||
"<polygon fill=\"lightgrey\" stroke=\"black\" points=\"54,-21 0,-21 0,-0 54,-0 54,-21\"/>\r\n",
|
||||
"<text text-anchor=\"middle\" x=\"27\" y=\"-7.4\" font-family=\"Times New Roman,serif\" font-size=\"12.00\">U</text>\r\n",
|
||||
"</g>\r\n",
|
||||
"<!-- S1->U -->\r\n",
|
||||
"<g id=\"edge1\" class=\"edge\"><title>S1->U</title>\r\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M24.7894,-113.802C22.8203,-104.634 20.0874,-90.4646 19,-78 18.1888,-68.702 18.1888,-66.298 19,-57 19.7476,-48.4306 21.2729,-39.0555 22.7931,-31.0538\"/>\r\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"26.2344,-31.6942 24.7894,-21.1984 19.3737,-30.3045 26.2344,-31.6942\"/>\r\n",
|
||||
"</g>\r\n",
|
||||
"<!-- S2 -->\r\n",
|
||||
"<g id=\"node3\" class=\"node\"><title>S2</title>\r\n",
|
||||
"<polygon fill=\"lightgrey\" stroke=\"black\" points=\"82,-78 28,-78 28,-57 82,-57 82,-78\"/>\r\n",
|
||||
"<text text-anchor=\"middle\" x=\"55\" y=\"-64.4\" font-family=\"Times New Roman,serif\" font-size=\"12.00\">S2</text>\r\n",
|
||||
"</g>\r\n",
|
||||
"<!-- S1->S2 -->\r\n",
|
||||
"<g id=\"edge2\" class=\"edge\"><title>S1->S2</title>\r\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M31.8772,-113.92C35.6053,-106.597 40.8621,-96.2709 45.4216,-87.3147\"/>\r\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"48.5723,-88.8404 49.9901,-78.3408 42.3342,-85.6646 48.5723,-88.8404\"/>\r\n",
|
||||
"</g>\r\n",
|
||||
"<!-- S2->U -->\r\n",
|
||||
"<g id=\"edge3\" class=\"edge\"><title>S2->U</title>\r\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M50.1228,-56.9197C46.3947,-49.5967 41.1379,-39.2709 36.5784,-30.3147\"/>\r\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"39.6658,-28.6646 32.0099,-21.3408 33.4277,-31.8404 39.6658,-28.6646\"/>\r\n",
|
||||
"</g>\r\n",
|
||||
"</g>\r\n",
|
||||
"</svg>\r\n"
|
||||
],
|
||||
"text/plain": [
|
||||
"<graphviz.dot.Digraph at 0x17515213780>"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model_1 = BayesianModel([('S1', 'U'), ('S2', 'U'), ('S1', 'S2')])\n",
|
||||
"model_1.fit(data)\n",
|
||||
"showBN(model_1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2019-12-29T02:45:11.709798Z",
|
||||
"start_time": "2019-12-29T02:45:11.609066Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"image/svg+xml": [
|
||||
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\r\n",
|
||||
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\r\n",
|
||||
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\r\n",
|
||||
"<!-- Generated by graphviz version 2.38.0 (20140413.2041)\r\n",
|
||||
" -->\r\n",
|
||||
"<!-- Title: %3 Pages: 1 -->\r\n",
|
||||
"<svg width=\"89pt\" height=\"143pt\"\r\n",
|
||||
" viewBox=\"0.00 0.00 89.00 143.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\r\n",
|
||||
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 139)\">\r\n",
|
||||
"<title>%3</title>\r\n",
|
||||
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-139 85,-139 85,4 -4,4\"/>\r\n",
|
||||
"<!-- S1 -->\r\n",
|
||||
"<g id=\"node1\" class=\"node\"><title>S1</title>\r\n",
|
||||
"<polygon fill=\"lightgrey\" stroke=\"black\" points=\"54,-78 0,-78 0,-57 54,-57 54,-78\"/>\r\n",
|
||||
"<text text-anchor=\"middle\" x=\"27\" y=\"-64.4\" font-family=\"Times New Roman,serif\" font-size=\"12.00\">S1</text>\r\n",
|
||||
"</g>\r\n",
|
||||
"<!-- U -->\r\n",
|
||||
"<g id=\"node2\" class=\"node\"><title>U</title>\r\n",
|
||||
"<polygon fill=\"lightgrey\" stroke=\"black\" points=\"81,-21 27,-21 27,-0 81,-0 81,-21\"/>\r\n",
|
||||
"<text text-anchor=\"middle\" x=\"54\" y=\"-7.4\" font-family=\"Times New Roman,serif\" font-size=\"12.00\">U</text>\r\n",
|
||||
"</g>\r\n",
|
||||
"<!-- S1->U -->\r\n",
|
||||
"<g id=\"edge1\" class=\"edge\"><title>S1->U</title>\r\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M31.703,-56.9197C35.2597,-49.6746 40.2593,-39.4903 44.6231,-30.601\"/>\r\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"47.9041,-31.8599 49.169,-21.3408 41.6204,-28.7751 47.9041,-31.8599\"/>\r\n",
|
||||
"</g>\r\n",
|
||||
"<!-- S2 -->\r\n",
|
||||
"<g id=\"node3\" class=\"node\"><title>S2</title>\r\n",
|
||||
"<polygon fill=\"lightgrey\" stroke=\"black\" points=\"81,-135 27,-135 27,-114 81,-114 81,-135\"/>\r\n",
|
||||
"<text text-anchor=\"middle\" x=\"54\" y=\"-121.4\" font-family=\"Times New Roman,serif\" font-size=\"12.00\">S2</text>\r\n",
|
||||
"</g>\r\n",
|
||||
"<!-- S2->S1 -->\r\n",
|
||||
"<g id=\"edge3\" class=\"edge\"><title>S2->S1</title>\r\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M49.297,-113.92C45.7403,-106.675 40.7407,-96.4903 36.3769,-87.601\"/>\r\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"39.3796,-85.7751 31.831,-78.3408 33.0959,-88.8599 39.3796,-85.7751\"/>\r\n",
|
||||
"</g>\r\n",
|
||||
"<!-- S2->U -->\r\n",
|
||||
"<g id=\"edge2\" class=\"edge\"><title>S2->U</title>\r\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M56.4864,-113.819C58.7013,-104.663 61.7754,-90.5018 63,-78 63.9099,-68.7111 63.9099,-66.2889 63,-57 62.1581,-48.405 60.442,-39.0257 58.7319,-31.0279\"/>\r\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"62.1223,-30.1528 56.4864,-21.1813 55.2975,-31.7092 62.1223,-30.1528\"/>\r\n",
|
||||
"</g>\r\n",
|
||||
"</g>\r\n",
|
||||
"</svg>\r\n"
|
||||
],
|
||||
"text/plain": [
|
||||
"<graphviz.dot.Digraph at 0x175152c2128>"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model_2 = BayesianModel([('S1', 'U'), ('S2', 'U'), ('S2', 'S1')])\n",
|
||||
"model_2.fit(data)\n",
|
||||
"showBN(model_2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 评分函数,使用k2,bdeu,bic进行评分"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2019-12-29T02:45:11.715781Z",
|
||||
"start_time": "2019-12-29T02:45:11.710794Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"bdeu = BdeuScore(data, equivalent_sample_size=5)\n",
|
||||
"k2 = K2Score(data)\n",
|
||||
"bic = BicScore(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2019-12-29T02:45:11.775621Z",
|
||||
"start_time": "2019-12-29T02:45:11.716779Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-127.81019191674014\n",
|
||||
"-130.82411202574002\n",
|
||||
"-129.03972756462477\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(bdeu.score(model_1))\n",
|
||||
"print(k2.score(model_1))\n",
|
||||
"print(bic.score(model_1))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2019-12-29T02:45:11.811526Z",
|
||||
"start_time": "2019-12-29T02:45:11.777617Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-127.81019191674014\n",
|
||||
"-130.99837093511061\n",
|
||||
"-129.0397275646248\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(bdeu.score(model_2))\n",
|
||||
"print(k2.score(model_2))\n",
|
||||
"print(bic.score(model_2))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2019-12-29T02:45:11.818531Z",
|
||||
"start_time": "2019-12-29T02:45:11.812523Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"False"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"bdeu.score(model_1)>bdeu.score(model_2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2019-12-29T02:45:11.826510Z",
|
||||
"start_time": "2019-12-29T02:45:11.819504Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"k2.score(model_1)>k2.score(model_2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2019-12-29T02:45:11.837484Z",
|
||||
"start_time": "2019-12-29T02:45:11.827483Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"bic.score(model_1)>bic.score(model_2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 查看模型的概率转移表"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2019-12-29T02:45:11.845461Z",
|
||||
"start_time": "2019-12-29T02:45:11.838453Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"+-------+------+\n",
|
||||
"| S1(1) | 0.28 |\n",
|
||||
"+-------+------+\n",
|
||||
"| S1(2) | 0.72 |\n",
|
||||
"+-------+------+\n",
|
||||
"+-------+---------------------+-------+\n",
|
||||
"| S1 | S1(1) | S1(2) |\n",
|
||||
"+-------+---------------------+-------+\n",
|
||||
"| S2(1) | 0.17857142857142858 | 0.75 |\n",
|
||||
"+-------+---------------------+-------+\n",
|
||||
"| S2(2) | 0.8214285714285714 | 0.25 |\n",
|
||||
"+-------+---------------------+-------+\n",
|
||||
"+------+-------+-------+-------+-------+\n",
|
||||
"| S1 | S1(1) | S1(1) | S1(2) | S1(2) |\n",
|
||||
"+------+-------+-------+-------+-------+\n",
|
||||
"| S2 | S2(1) | S2(2) | S2(1) | S2(2) |\n",
|
||||
"+------+-------+-------+-------+-------+\n",
|
||||
"| U(1) | 0.0 | 1.0 | 1.0 | 0.0 |\n",
|
||||
"+------+-------+-------+-------+-------+\n",
|
||||
"| U(2) | 1.0 | 0.0 | 0.0 | 1.0 |\n",
|
||||
"+------+-------+-------+-------+-------+\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(model_1.get_cpds('S1'))\n",
|
||||
"print(model_1.get_cpds('S2'))\n",
|
||||
"print(model_1.get_cpds('U'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2019-12-29T02:45:11.854410Z",
|
||||
"start_time": "2019-12-29T02:45:11.846432Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"+-------+--------------------+---------------------+\n",
|
||||
"| S2 | S2(1) | S2(2) |\n",
|
||||
"+-------+--------------------+---------------------+\n",
|
||||
"| S1(1) | 0.0847457627118644 | 0.5609756097560976 |\n",
|
||||
"+-------+--------------------+---------------------+\n",
|
||||
"| S1(2) | 0.9152542372881356 | 0.43902439024390244 |\n",
|
||||
"+-------+--------------------+---------------------+\n",
|
||||
"+-------+------+\n",
|
||||
"| S2(1) | 0.59 |\n",
|
||||
"+-------+------+\n",
|
||||
"| S2(2) | 0.41 |\n",
|
||||
"+-------+------+\n",
|
||||
"+------+-------+-------+-------+-------+\n",
|
||||
"| S1 | S1(1) | S1(1) | S1(2) | S1(2) |\n",
|
||||
"+------+-------+-------+-------+-------+\n",
|
||||
"| S2 | S2(1) | S2(2) | S2(1) | S2(2) |\n",
|
||||
"+------+-------+-------+-------+-------+\n",
|
||||
"| U(1) | 0.0 | 1.0 | 1.0 | 0.0 |\n",
|
||||
"+------+-------+-------+-------+-------+\n",
|
||||
"| U(2) | 1.0 | 0.0 | 0.0 | 1.0 |\n",
|
||||
"+------+-------+-------+-------+-------+\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(model_2.get_cpds('S1'))\n",
|
||||
"print(model_2.get_cpds('S2'))\n",
|
||||
"print(model_2.get_cpds('U'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 结论\n",
|
||||
"分数差距不是很大,说明对这组数据来说,题目假定的两种网络的区分度不够高,说明这两种网络的结构可能性都很大。"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "tensorflow",
|
||||
"language": "python",
|
||||
"name": "tensorflow"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
},
|
||||
"latex_envs": {
|
||||
"LaTeX_envs_menu_present": true,
|
||||
"autoclose": false,
|
||||
"autocomplete": true,
|
||||
"bibliofile": "biblio.bib",
|
||||
"cite_by": "apalike",
|
||||
"current_citInitial": 1,
|
||||
"eqLabelWithNumbers": true,
|
||||
"eqNumInitial": 1,
|
||||
"hotkeys": {
|
||||
"equation": "Ctrl-E",
|
||||
"itemize": "Ctrl-I"
|
||||
},
|
||||
"labels_anchors": false,
|
||||
"latex_user_defs": false,
|
||||
"report_style_numbering": false,
|
||||
"user_envs_cfg": false
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue