810 lines
23 KiB
Plaintext
810 lines
23 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# -- 创建DataFrame\n",
|
||
"# -- 将上述的DataFrame分别命名为data1, data2, data3\n",
|
||
"# -- 将data1和data2两个数据框按照行的维度进行合并,命名为all_data\n",
|
||
"# -- 将data1和data2两个数据框按照列的维度进行合并,命名为all_data_col\n",
|
||
"# -- 打印data3\n",
|
||
"# -- 按照subject_id的值对all_data和data3作合并\n",
|
||
"# -- 对data1和data2按照subject_id作连接\n",
|
||
"# -- 找到 data1 和 data2 合并之后的所有匹配结果"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"raw_data_1 = {\n",
|
||
" 'subject_id': ['1', '2', '3', '4', '5'],\n",
|
||
" 'first_name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'], \n",
|
||
" 'last_name': ['Anderson', 'Ackerman', 'Ali', 'Aoni', 'Atiches']}\n",
|
||
"\n",
|
||
"raw_data_2 = {\n",
|
||
" 'subject_id': ['4', '5', '6', '7', '8'],\n",
|
||
" 'first_name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'], \n",
|
||
" 'last_name': ['Bonder', 'Black', 'Balwner', 'Brice', 'Btisan']}\n",
|
||
"\n",
|
||
"raw_data_3 = {\n",
|
||
" 'subject_id': ['1', '2', '3', '4', '5', '7', '8', '9', '10', '11'],\n",
|
||
" 'test_id': [51, 15, 15, 61, 16, 14, 15, 1, 61, 16]}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>subject_id</th>\n",
|
||
" <th>first_name</th>\n",
|
||
" <th>last_name</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Alex</td>\n",
|
||
" <td>Anderson</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>Amy</td>\n",
|
||
" <td>Ackerman</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>Allen</td>\n",
|
||
" <td>Ali</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>Alice</td>\n",
|
||
" <td>Aoni</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>Ayoung</td>\n",
|
||
" <td>Atiches</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" subject_id first_name last_name\n",
|
||
"0 1 Alex Anderson\n",
|
||
"1 2 Amy Ackerman\n",
|
||
"2 3 Allen Ali\n",
|
||
"3 4 Alice Aoni\n",
|
||
"4 5 Ayoung Atiches"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#将上述的DataFrame分别命名为data1, data2, data3\n",
|
||
"data1 = pd.DataFrame(raw_data_1)\n",
|
||
"data2 = pd.DataFrame(raw_data_2)\n",
|
||
"data3 = pd.DataFrame(raw_data_3)\n",
|
||
"data1"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>subject_id</th>\n",
|
||
" <th>first_name</th>\n",
|
||
" <th>last_name</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Alex</td>\n",
|
||
" <td>Anderson</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>Amy</td>\n",
|
||
" <td>Ackerman</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>Allen</td>\n",
|
||
" <td>Ali</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>Alice</td>\n",
|
||
" <td>Aoni</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>Ayoung</td>\n",
|
||
" <td>Atiches</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>Billy</td>\n",
|
||
" <td>Bonder</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>Brian</td>\n",
|
||
" <td>Black</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>Bran</td>\n",
|
||
" <td>Balwner</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>7</td>\n",
|
||
" <td>Bryce</td>\n",
|
||
" <td>Brice</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>8</td>\n",
|
||
" <td>Betty</td>\n",
|
||
" <td>Btisan</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" subject_id first_name last_name\n",
|
||
"0 1 Alex Anderson\n",
|
||
"1 2 Amy Ackerman\n",
|
||
"2 3 Allen Ali\n",
|
||
"3 4 Alice Aoni\n",
|
||
"4 5 Ayoung Atiches\n",
|
||
"0 4 Billy Bonder\n",
|
||
"1 5 Brian Black\n",
|
||
"2 6 Bran Balwner\n",
|
||
"3 7 Bryce Brice\n",
|
||
"4 8 Betty Btisan"
|
||
]
|
||
},
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#将data1和data2两个数据框按照行的维度进行合并,命名为all_data\n",
|
||
"all_data = pd.concat([data1,data2],axis=0)\n",
|
||
"all_data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>subject_id</th>\n",
|
||
" <th>first_name</th>\n",
|
||
" <th>last_name</th>\n",
|
||
" <th>subject_id</th>\n",
|
||
" <th>first_name</th>\n",
|
||
" <th>last_name</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Alex</td>\n",
|
||
" <td>Anderson</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>Billy</td>\n",
|
||
" <td>Bonder</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>Amy</td>\n",
|
||
" <td>Ackerman</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>Brian</td>\n",
|
||
" <td>Black</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>Allen</td>\n",
|
||
" <td>Ali</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>Bran</td>\n",
|
||
" <td>Balwner</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>Alice</td>\n",
|
||
" <td>Aoni</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>Bryce</td>\n",
|
||
" <td>Brice</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>Ayoung</td>\n",
|
||
" <td>Atiches</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>Betty</td>\n",
|
||
" <td>Btisan</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" subject_id first_name last_name subject_id first_name last_name\n",
|
||
"0 1 Alex Anderson 4 Billy Bonder\n",
|
||
"1 2 Amy Ackerman 5 Brian Black\n",
|
||
"2 3 Allen Ali 6 Bran Balwner\n",
|
||
"3 4 Alice Aoni 7 Bryce Brice\n",
|
||
"4 5 Ayoung Atiches 8 Betty Btisan"
|
||
]
|
||
},
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#将data1和data2两个数据框按照列的维度进行合并,命名为all_data_col\n",
|
||
"all_data_col = pd.concat([data1,data2],axis=1)\n",
|
||
"all_data_col"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>subject_id</th>\n",
|
||
" <th>test_id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>51</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>15</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>15</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>61</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>16</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>7</td>\n",
|
||
" <td>14</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>8</td>\n",
|
||
" <td>15</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>9</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>61</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>16</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" subject_id test_id\n",
|
||
"0 1 51\n",
|
||
"1 2 15\n",
|
||
"2 3 15\n",
|
||
"3 4 61\n",
|
||
"4 5 16\n",
|
||
"5 7 14\n",
|
||
"6 8 15\n",
|
||
"7 9 1\n",
|
||
"8 10 61\n",
|
||
"9 11 16"
|
||
]
|
||
},
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data3"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>subject_id</th>\n",
|
||
" <th>first_name</th>\n",
|
||
" <th>last_name</th>\n",
|
||
" <th>test_id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Alex</td>\n",
|
||
" <td>Anderson</td>\n",
|
||
" <td>51</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>Amy</td>\n",
|
||
" <td>Ackerman</td>\n",
|
||
" <td>15</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>Allen</td>\n",
|
||
" <td>Ali</td>\n",
|
||
" <td>15</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>Alice</td>\n",
|
||
" <td>Aoni</td>\n",
|
||
" <td>61</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>Billy</td>\n",
|
||
" <td>Bonder</td>\n",
|
||
" <td>61</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>Ayoung</td>\n",
|
||
" <td>Atiches</td>\n",
|
||
" <td>16</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>Brian</td>\n",
|
||
" <td>Black</td>\n",
|
||
" <td>16</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>7</td>\n",
|
||
" <td>Bryce</td>\n",
|
||
" <td>Brice</td>\n",
|
||
" <td>14</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>8</td>\n",
|
||
" <td>Betty</td>\n",
|
||
" <td>Btisan</td>\n",
|
||
" <td>15</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" subject_id first_name last_name test_id\n",
|
||
"0 1 Alex Anderson 51\n",
|
||
"1 2 Amy Ackerman 15\n",
|
||
"2 3 Allen Ali 15\n",
|
||
"3 4 Alice Aoni 61\n",
|
||
"4 4 Billy Bonder 61\n",
|
||
"5 5 Ayoung Atiches 16\n",
|
||
"6 5 Brian Black 16\n",
|
||
"7 7 Bryce Brice 14\n",
|
||
"8 8 Betty Btisan 15"
|
||
]
|
||
},
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#按照subject_id的值对all_data和data3作合并\n",
|
||
"pd.merge(all_data,data3,on='subject_id')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>subject_id</th>\n",
|
||
" <th>first_name_x</th>\n",
|
||
" <th>last_name_x</th>\n",
|
||
" <th>first_name_y</th>\n",
|
||
" <th>last_name_y</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>Alice</td>\n",
|
||
" <td>Aoni</td>\n",
|
||
" <td>Billy</td>\n",
|
||
" <td>Bonder</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>Ayoung</td>\n",
|
||
" <td>Atiches</td>\n",
|
||
" <td>Brian</td>\n",
|
||
" <td>Black</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" subject_id first_name_x last_name_x first_name_y last_name_y\n",
|
||
"0 4 Alice Aoni Billy Bonder\n",
|
||
"1 5 Ayoung Atiches Brian Black"
|
||
]
|
||
},
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#对data1和data2按照subject_id作内连接\n",
|
||
"pd.merge(data1,data2,on='subject_id',how='inner')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>subject_id</th>\n",
|
||
" <th>first_name_data1</th>\n",
|
||
" <th>last_name_data1</th>\n",
|
||
" <th>first_name_data2</th>\n",
|
||
" <th>last_name_data2</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Alex</td>\n",
|
||
" <td>Anderson</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>Amy</td>\n",
|
||
" <td>Ackerman</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>Allen</td>\n",
|
||
" <td>Ali</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>Alice</td>\n",
|
||
" <td>Aoni</td>\n",
|
||
" <td>Billy</td>\n",
|
||
" <td>Bonder</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>Ayoung</td>\n",
|
||
" <td>Atiches</td>\n",
|
||
" <td>Brian</td>\n",
|
||
" <td>Black</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Bran</td>\n",
|
||
" <td>Balwner</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>7</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Bryce</td>\n",
|
||
" <td>Brice</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>8</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Betty</td>\n",
|
||
" <td>Btisan</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" subject_id first_name_data1 last_name_data1 first_name_data2 last_name_data2\n",
|
||
"0 1 Alex Anderson NaN NaN\n",
|
||
"1 2 Amy Ackerman NaN NaN\n",
|
||
"2 3 Allen Ali NaN NaN\n",
|
||
"3 4 Alice Aoni Billy Bonder\n",
|
||
"4 5 Ayoung Atiches Brian Black\n",
|
||
"5 6 NaN NaN Bran Balwner\n",
|
||
"6 7 NaN NaN Bryce Brice\n",
|
||
"7 8 NaN NaN Betty Btisan"
|
||
]
|
||
},
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#找到 data1 和 data2 合并之后的所有匹配结果\n",
|
||
"pd.merge(data1,data2,on='subject_id',how='outer',suffixes=('_data1', '_data2'))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.6.1"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 4
|
||
}
|