{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# -- 创建DataFrame\n", "# -- 将上述的DataFrame分别命名为data1, data2, data3\n", "# -- 将data1和data2两个数据框按照行的维度进行合并,命名为all_data\n", "# -- 将data1和data2两个数据框按照列的维度进行合并,命名为all_data_col\n", "# -- 打印data3\n", "# -- 按照subject_id的值对all_data和data3作合并\n", "# -- 对data1和data2按照subject_id作连接\n", "# -- 找到 data1 和 data2 合并之后的所有匹配结果" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "raw_data_1 = {\n", " 'subject_id': ['1', '2', '3', '4', '5'],\n", " 'first_name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'], \n", " 'last_name': ['Anderson', 'Ackerman', 'Ali', 'Aoni', 'Atiches']}\n", "\n", "raw_data_2 = {\n", " 'subject_id': ['4', '5', '6', '7', '8'],\n", " 'first_name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'], \n", " 'last_name': ['Bonder', 'Black', 'Balwner', 'Brice', 'Btisan']}\n", "\n", "raw_data_3 = {\n", " 'subject_id': ['1', '2', '3', '4', '5', '7', '8', '9', '10', '11'],\n", " 'test_id': [51, 15, 15, 61, 16, 14, 15, 1, 61, 16]}" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
subject_idfirst_namelast_name
01AlexAnderson
12AmyAckerman
23AllenAli
34AliceAoni
45AyoungAtiches
\n", "
" ], "text/plain": [ " subject_id first_name last_name\n", "0 1 Alex Anderson\n", "1 2 Amy Ackerman\n", "2 3 Allen Ali\n", "3 4 Alice Aoni\n", "4 5 Ayoung Atiches" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#将上述的DataFrame分别命名为data1, data2, data3\n", "data1 = pd.DataFrame(raw_data_1)\n", "data2 = pd.DataFrame(raw_data_2)\n", "data3 = pd.DataFrame(raw_data_3)\n", "data1" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
subject_idfirst_namelast_name
01AlexAnderson
12AmyAckerman
23AllenAli
34AliceAoni
45AyoungAtiches
04BillyBonder
15BrianBlack
26BranBalwner
37BryceBrice
48BettyBtisan
\n", "
" ], "text/plain": [ " subject_id first_name last_name\n", "0 1 Alex Anderson\n", "1 2 Amy Ackerman\n", "2 3 Allen Ali\n", "3 4 Alice Aoni\n", "4 5 Ayoung Atiches\n", "0 4 Billy Bonder\n", "1 5 Brian Black\n", "2 6 Bran Balwner\n", "3 7 Bryce Brice\n", "4 8 Betty Btisan" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#将data1和data2两个数据框按照行的维度进行合并,命名为all_data\n", "all_data = pd.concat([data1,data2],axis=0)\n", "all_data" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
subject_idfirst_namelast_namesubject_idfirst_namelast_name
01AlexAnderson4BillyBonder
12AmyAckerman5BrianBlack
23AllenAli6BranBalwner
34AliceAoni7BryceBrice
45AyoungAtiches8BettyBtisan
\n", "
" ], "text/plain": [ " subject_id first_name last_name subject_id first_name last_name\n", "0 1 Alex Anderson 4 Billy Bonder\n", "1 2 Amy Ackerman 5 Brian Black\n", "2 3 Allen Ali 6 Bran Balwner\n", "3 4 Alice Aoni 7 Bryce Brice\n", "4 5 Ayoung Atiches 8 Betty Btisan" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#将data1和data2两个数据框按照列的维度进行合并,命名为all_data_col\n", "all_data_col = pd.concat([data1,data2],axis=1)\n", "all_data_col" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
subject_idtest_id
0151
1215
2315
3461
4516
5714
6815
791
81061
91116
\n", "
" ], "text/plain": [ " subject_id test_id\n", "0 1 51\n", "1 2 15\n", "2 3 15\n", "3 4 61\n", "4 5 16\n", "5 7 14\n", "6 8 15\n", "7 9 1\n", "8 10 61\n", "9 11 16" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data3" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
subject_idfirst_namelast_nametest_id
01AlexAnderson51
12AmyAckerman15
23AllenAli15
34AliceAoni61
44BillyBonder61
55AyoungAtiches16
65BrianBlack16
77BryceBrice14
88BettyBtisan15
\n", "
" ], "text/plain": [ " subject_id first_name last_name test_id\n", "0 1 Alex Anderson 51\n", "1 2 Amy Ackerman 15\n", "2 3 Allen Ali 15\n", "3 4 Alice Aoni 61\n", "4 4 Billy Bonder 61\n", "5 5 Ayoung Atiches 16\n", "6 5 Brian Black 16\n", "7 7 Bryce Brice 14\n", "8 8 Betty Btisan 15" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#按照subject_id的值对all_data和data3作合并\n", "pd.merge(all_data,data3,on='subject_id')" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
subject_idfirst_name_xlast_name_xfirst_name_ylast_name_y
04AliceAoniBillyBonder
15AyoungAtichesBrianBlack
\n", "
" ], "text/plain": [ " subject_id first_name_x last_name_x first_name_y last_name_y\n", "0 4 Alice Aoni Billy Bonder\n", "1 5 Ayoung Atiches Brian Black" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#对data1和data2按照subject_id作内连接\n", "pd.merge(data1,data2,on='subject_id',how='inner')" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
subject_idfirst_name_data1last_name_data1first_name_data2last_name_data2
01AlexAndersonNaNNaN
12AmyAckermanNaNNaN
23AllenAliNaNNaN
34AliceAoniBillyBonder
45AyoungAtichesBrianBlack
56NaNNaNBranBalwner
67NaNNaNBryceBrice
78NaNNaNBettyBtisan
\n", "
" ], "text/plain": [ " subject_id first_name_data1 last_name_data1 first_name_data2 last_name_data2\n", "0 1 Alex Anderson NaN NaN\n", "1 2 Amy Ackerman NaN NaN\n", "2 3 Allen Ali NaN NaN\n", "3 4 Alice Aoni Billy Bonder\n", "4 5 Ayoung Atiches Brian Black\n", "5 6 NaN NaN Bran Balwner\n", "6 7 NaN NaN Bryce Brice\n", "7 8 NaN NaN Betty Btisan" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#找到 data1 和 data2 合并之后的所有匹配结果\n", "pd.merge(data1,data2,on='subject_id',how='outer',suffixes=('_data1', '_data2'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 4 }