Numpy에서는 배열에 적용되는 다양한 함수가 있다.
다만,
단일 배열과 다중 배열에 사용되는 함수가 각각 다르므로 따로 정리해보자.
단일 배열에 적용되는 함수
우선 이차원 배열을 생성하자.
single_arr=np.random.randn(11,4)
single_arr
array([[-0.85420188, 0.42498577, -1.21781935, -2.469909 ],
[-0.95437149, 0.84229281, -0.58423631, 0.16811405],
[ 0.41291648, -0.60213293, 0.44054662, 0.00754592],
[-0.49192662, -1.22210946, 1.82552474, 1.00134016],
[-1.06566936, 0.74219896, -0.47628828, -0.5175951 ],
[-0.0755429 , 0.79078645, 0.95086558, -1.86214404],
[ 1.50346299, -0.52695526, -0.28902625, -0.82038604],
[ 0.81427752, -0.74730953, 0.64814457, -0.94949558],
[ 0.70329552, 0.37535518, -0.43012094, 1.0470845 ],
[-0.2881095 , -0.98918133, -1.76115028, 1.28901781],
[ 1.37016394, 0.88699756, 0.43978861, 0.65941833]])
np.abs()
각 성분(요소)마다 절댓값 처리
np.abs(single_arr)
array([[0.85420188, 0.42498577, 1.21781935, 2.469909 ],
[0.95437149, 0.84229281, 0.58423631, 0.16811405],
[0.41291648, 0.60213293, 0.44054662, 0.00754592],
[0.49192662, 1.22210946, 1.82552474, 1.00134016],
[1.06566936, 0.74219896, 0.47628828, 0.5175951 ],
[0.0755429 , 0.79078645, 0.95086558, 1.86214404],
[1.50346299, 0.52695526, 0.28902625, 0.82038604],
[0.81427752, 0.74730953, 0.64814457, 0.94949558],
[0.70329552, 0.37535518, 0.43012094, 1.0470845 ],
[0.2881095 , 0.98918133, 1.76115028, 1.28901781],
[1.37016394, 0.88699756, 0.43978861, 0.65941833]])
np.sqrt()
제곱근 처리
np.sqrt(single_arr)
array([[ nan, 0.65190932, nan, nan],
[ nan, 0.91776512, nan, 0.41001713],
[0.64258577, nan, 0.66373686, 0.08686725],
[ nan, nan, 1.35111981, 1.00066986],
[ nan, 0.8615097 , nan, nan],
[ nan, 0.88926174, 0.97512337, nan],
[1.22615782, nan, nan, nan],
[0.90237327, nan, 0.80507426, nan],
[0.83862717, 0.61266237, nan, 1.02327147],
[ nan, nan, nan, 1.1353492 ],
[1.17054002, 0.94180548, 0.66316559, 0.81204577]])
np.square()
제곱 처리
np.square(single_arr)
array([[7.29660856e-01, 1.80612901e-01, 1.48308397e+00, 6.10045049e+00],
[9.10824947e-01, 7.09457185e-01, 3.41332063e-01, 2.82623330e-02],
[1.70500017e-01, 3.62564070e-01, 1.94081323e-01, 5.69408884e-05],
[2.41991798e-01, 1.49355153e+00, 3.33254059e+00, 1.00268211e+00],
[1.13565117e+00, 5.50859302e-01, 2.26850526e-01, 2.67904688e-01],
[5.70672981e-03, 6.25343204e-01, 9.04145352e-01, 3.46758044e+00],
[2.26040097e+00, 2.77681849e-01, 8.35361732e-02, 6.73033254e-01],
[6.63047882e-01, 5.58471536e-01, 4.20091384e-01, 9.01541857e-01],
[4.94624593e-01, 1.40891513e-01, 1.85004022e-01, 1.09638595e+00],
[8.30070859e-02, 9.78479713e-01, 3.10165031e+00, 1.66156692e+00],
[1.87734923e+00, 7.86764671e-01, 1.93414018e-01, 4.34832532e-01]])
np.exp()
각 성분을 exponential의 지수로 삼은 값 계산한다.
np.exp(single_arr)
array([[0.42562275, 1.52956865, 0.29587466, 0.08459256],
[0.38505408, 2.32168407, 0.55753148, 1.18307153],
[1.5112188 , 0.5476423 , 1.55355619, 1.00757446],
[0.61144723, 0.29460805, 6.20605075, 2.7219272 ],
[0.34449719, 2.10054947, 0.62108441, 0.59595203],
[0.92723995, 2.20512996, 2.58794877, 0.15533922],
[4.49723603, 0.59039985, 0.74899254, 0.44026166],
[2.25754406, 0.47363915, 1.91198997, 0.38693615],
[2.02040002, 1.45550829, 0.65043043, 2.84933177],
[0.7496795 , 0.37188101, 0.17184708, 3.62922023],
[3.93599591, 2.42782929, 1.55237902, 1.93366725]])
np.log(), np.log10(), np.log2()
각 성분을 밑을 지정해 로그처리한다.
단, 밑이 자연로그, 상용로그, 2 만 가능하다.
np.log(single_arr)
array([[ nan, -8.55699603e-01, nan,
nan],
[ nan, -1.71627565e-01, nan,
-1.78311268e+00],
[-8.84509941e-01, nan, -8.19739009e-01,
-4.88674844e+00],
[ nan, nan, 6.01867476e-01,
1.33926199e-03],
[ nan, -2.98137927e-01, nan,
nan],
[ nan, -2.34727327e-01, -5.03825721e-02,
nan],
[ 4.07771109e-01, nan, nan,
nan],
[-2.05454035e-01, nan, -4.33641505e-01,
nan],
[-3.51978101e-01, -9.79882548e-01, nan,
4.60096339e-02],
[ nan, nan, nan,
2.53880542e-01],
[ 3.14930398e-01, -1.19913048e-01, -8.21461109e-01,
-4.16397153e-01]])
np.log10(single_arr)
array([[ nan, -3.71625616e-01, nan,
nan],
[ nan, -7.45369044e-02, nan,
-7.74395996e-01],
[-3.84137787e-01, nan, -3.56008128e-01,
-2.12228788e+00],
[ nan, nan, 2.61387724e-01,
5.81634093e-04],
[ nan, -1.29479656e-01, nan,
nan],
[ nan, -1.01940783e-01, -2.18808730e-02,
nan],
[ 1.77092743e-01, nan, nan,
nan],
[-8.92275539e-02, nan, -1.88328113e-01,
nan],
[-1.52862147e-01, -4.25557584e-01, nan,
1.99817301e-02],
[ nan, nan, nan,
1.10258919e-01],
[ 1.36772534e-01, -5.20775749e-02, -3.56756027e-01,
-1.80838986e-01]])
np.log2(single_arr)
array([[ nan, -1.23451357e+00, nan,
nan],
[ nan, -2.47606237e-01, nan,
-2.57248781e+00],
[-1.27607811e+00, nan, -1.18263340e+00,
-7.05008774e+00],
[ nan, nan, 8.68311223e-01,
1.93214663e-03],
[ nan, -4.30122108e-01, nan,
nan],
[ nan, -3.38639951e-01, -7.26866869e-02,
nan],
[ 5.88289357e-01, nan, nan,
nan],
[-2.96407518e-01, nan, -6.25612449e-01,
nan],
[-5.07797062e-01, -1.41367169e+00, nan,
6.63778706e-02],
[ nan, nan, nan,
3.66272199e-01],
[ 4.54348523e-01, -1.72997959e-01, -1.18511787e+00,
-6.00734107e-01]])
np.sign()
각 성분(요소)의 부호 계산한다.
양수인 경우 1, 음수인 경우, -1, 0인 경우 0을 반환한다.
np.sign(single_arr)
array([[-1., 1., -1., -1.],
[-1., 1., -1., 1.],
[ 1., -1., 1., 1.],
[-1., -1., 1., 1.],
[-1., 1., -1., -1.],
[-1., 1., 1., -1.],
[ 1., -1., -1., -1.],
[ 1., -1., 1., -1.],
[ 1., 1., -1., 1.],
[-1., -1., -1., 1.],
[ 1., 1., 1., 1.]])
np.ceil()
각 성분의 소수 첫 번째 자리에서 반올림 값 계산한다.
np.ceil(single_arr)
array([[-0., 1., -1., -2.],
[-0., 1., -0., 1.],
[ 1., -0., 1., 1.],
[-0., -1., 2., 2.],
[-1., 1., -0., -0.],
[-0., 1., 1., -1.],
[ 2., -0., -0., -0.],
[ 1., -0., 1., -0.],
[ 1., 1., -0., 2.],
[-0., -0., -1., 2.],
[ 2., 1., 1., 1.]])
np.floor()
각 성분의 소수 첫 번째 자리에서 반내림한 값 계산한다.
np.floor(single_arr)
array([[-1., 0., -2., -3.],
[-1., 0., -1., 0.],
[ 0., -1., 0., 0.],
[-1., -2., 1., 1.],
[-2., 0., -1., -1.],
[-1., 0., 0., -2.],
[ 1., -1., -1., -1.],
[ 0., -1., 0., -1.],
[ 0., 0., -1., 1.],
[-1., -1., -2., 1.],
[ 1., 0., 0., 0.]])
np.isnan()
각 성분이 결측치(NaN)인 경우 True, 아닌 경우 False를 반환한다.
np.isnan(np.log(single_arr))
array([[ True, False, True, True],
[ True, False, True, False],
[False, True, False, False],
[ True, True, False, False],
[ True, False, True, True],
[ True, False, False, True],
[False, True, True, True],
[False, True, False, True],
[False, False, True, False],
[ True, True, True, False],
[False, False, False, False]])
np.isinf()
각 성분이 무한대인 경우 True를, 아닌 경우 False를 반환한다.
np.isinf(np.square(single_arr))
array([[False, False, False, False],
[False, False, False, False],
[False, False, False, False],
[False, False, False, False],
[False, False, False, False],
[False, False, False, False],
[False, False, False, False],
[False, False, False, False],
[False, False, False, False],
[False, False, False, False],
[False, False, False, False]])
np.cos(),np.tan(),np.sin()
각 성분에 대해 삼각함수 값을 계산해준다.
cos, cosh, sin, sinh, tan, tanh
np.sin(single_arr)
array([[-0.75404694, 0.41230781, -0.93834775, -0.62230479],
[-0.81595055, 0.74617153, -0.55156252, 0.16732328],
[ 0.40128239, -0.56640157, 0.42643396, 0.00754585],
[-0.47232493, -0.93982217, 0.96773177, 0.84219432],
[-0.87511304, 0.67591015, -0.45848373, -0.49479168],
[-0.07547107, 0.71090659, 0.81391869, -0.95785762],
[ 0.99773397, -0.50290398, -0.285019 , -0.73140914],
[ 0.72722988, -0.67966771, 0.60370829, -0.81312199],
[ 0.64673474, 0.36660301, -0.41698073, 0.86596887],
[-0.28414016, -0.83557651, -0.98193733, 0.96056242],
[ 0.97994075, 0.77517848, 0.4257482 , 0.61265723]])
np.cos(single_arr)
array([[ 0.65682054, 0.9110446 , 0.34569277, -0.78277503],
[ 0.5781217 , 0.66575374, 0.83413355, 0.98590208],
[ 0.91595439, 0.82412939, 0.9045187 , 0.99997153],
[ 0.88142451, 0.34166399, -0.2519826 , 0.53917412],
[ 0.48391856, 0.73698404, 0.8887028 , 0.86901162],
[ 0.99714799, 0.70328644, 0.5809788 , -0.28724341],
[ 0.06728247, 0.86434228, 0.95852187, 0.68193891],
[ 0.68639398, 0.73352015, 0.79720531, 0.58209332],
[ 0.762715 , 0.93037747, 0.90891533, 0.5000979 ],
[ 0.95878275, 0.5493741 , -0.18920647, 0.27806446],
[ 0.19928907, 0.6317423 , 0.90484168, 0.79034873]])
np.tan(single_arr)
array([[-1.14802582e+00, 4.52566003e-01, -2.71439794e+00,
7.94998259e-01],
[-1.41138197e+00, 1.12079209e+00, -6.61240061e-01,
1.69715924e-01],
[ 4.38103027e-01, -6.87272628e-01, 4.71448467e-01,
7.54606189e-03],
[-5.35865442e-01, -2.75072059e+00, -3.84047063e+00,
1.56200807e+00],
[-1.80838908e+00, 9.17129963e-01, -5.15902193e-01,
-5.69372924e-01],
[-7.56869302e-02, 1.01083505e+00, 1.40094389e+00,
3.33465487e+00],
[ 1.48290337e+01, -5.81834292e-01, -2.97352632e-01,
-1.07254350e+00],
[ 1.05949339e+00, -9.26583562e-01, 7.57280817e-01,
-1.39689284e+00],
[ 8.47937613e-01, 3.94036848e-01, -4.58767408e-01,
1.73159868e+00],
[-2.96355099e-01, -1.52096086e+00, 5.18976617e+00,
3.45445948e+00],
[ 4.91718257e+00, 1.22704856e+00, 4.70522307e-01,
7.75173299e-01]])
두 개의 배열에 적용되는 함수
우선 두개의 배열을 생성하자.
single_arr
array([[-0.85420188, 0.42498577, -1.21781935, -2.469909 ],
[-0.95437149, 0.84229281, -0.58423631, 0.16811405],
[ 0.41291648, -0.60213293, 0.44054662, 0.00754592],
[-0.49192662, -1.22210946, 1.82552474, 1.00134016],
[-1.06566936, 0.74219896, -0.47628828, -0.5175951 ],
[-0.0755429 , 0.79078645, 0.95086558, -1.86214404],
[ 1.50346299, -0.52695526, -0.28902625, -0.82038604],
[ 0.81427752, -0.74730953, 0.64814457, -0.94949558],
[ 0.70329552, 0.37535518, -0.43012094, 1.0470845 ],
[-0.2881095 , -0.98918133, -1.76115028, 1.28901781],
[ 1.37016394, 0.88699756, 0.43978861, 0.65941833]])
double_arr=np.random.rand(11,4)
double_arr
array([[0.48705458, 0.9637593 , 0.6738098 , 0.6541084 ],
[0.61583841, 0.39743118, 0.55485998, 0.93706706],
[0.0448264 , 0.88860695, 0.38479569, 0.45587065],
[0.65356226, 0.73741626, 0.28991024, 0.59693486],
[0.81154215, 0.91903639, 0.98248847, 0.08662736],
[0.53407128, 0.55608402, 0.71851739, 0.29082239],
[0.35009501, 0.99887153, 0.4568631 , 0.95589646],
[0.30591951, 0.3999088 , 0.61395523, 0.38426602],
[0.20310461, 0.53358661, 0.81670187, 0.7676948 ],
[0.07961264, 0.57528379, 0.92982623, 0.99493861],
[0.6184389 , 0.14453175, 0.68750759, 0.66940273]])
np.add(),np.substract(),np.multiply(),np.divide()
위 함수들은 동일한 shape일때 사칙연산을 해주는 함수들이다.
간단하게 multiply만 예로하고 넘어가겠다.
np.multiply(single_arr,double_arr)
array([[-0.41604294, 0.40958398, -0.82057861, -1.61558823],
[-0.58773863, 0.33475342, -0.32416935, 0.15753414],
[ 0.01850956, -0.53505951, 0.16952044, 0.00343996],
[-0.32150467, -0.90120339, 0.52923832, 0.59773485],
[-0.8648356 , 0.68210786, -0.46794774, -0.0448379 ],
[-0.04034529, 0.43974371, 0.68321346, -0.54155318],
[ 0.52635489, -0.52636061, -0.13204543, -0.78420411],
[ 0.24910338, -0.29885566, 0.39793175, -0.36485889],
[ 0.14284257, 0.2002845 , -0.35128057, 0.80384132],
[-0.02293716, -0.56905999, -1.63756373, 1.28249359],
[ 0.84736269, 0.12819931, 0.30235801, 0.44141643]])
np.maximum(),np.minimum()
각 shape의 성분끼리 최대 최소값을 비교하여 하나의 배열로 만들어준다.
np.maximum(single_arr,double_arr)
array([[0.48705458, 0.9637593 , 0.6738098 , 0.6541084 ],
[0.61583841, 0.84229281, 0.55485998, 0.93706706],
[0.41291648, 0.88860695, 0.44054662, 0.45587065],
[0.65356226, 0.73741626, 1.82552474, 1.00134016],
[0.81154215, 0.91903639, 0.98248847, 0.08662736],
[0.53407128, 0.79078645, 0.95086558, 0.29082239],
[1.50346299, 0.99887153, 0.4568631 , 0.95589646],
[0.81427752, 0.3999088 , 0.64814457, 0.38426602],
[0.70329552, 0.53358661, 0.81670187, 1.0470845 ],
[0.07961264, 0.57528379, 0.92982623, 1.28901781],
[1.37016394, 0.88699756, 0.68750759, 0.66940273]])
Statistic Function
통계 함수를 통해 array의 합이나 평균등을 구할 때, axis라는 인자에 대한 값을 지정해주어 열 또는 행의 합 또는 평균등을 구할 수 있다.
single_arr
array([[-0.85420188, 0.42498577, -1.21781935, -2.469909 ],
[-0.95437149, 0.84229281, -0.58423631, 0.16811405],
[ 0.41291648, -0.60213293, 0.44054662, 0.00754592],
[-0.49192662, -1.22210946, 1.82552474, 1.00134016],
[-1.06566936, 0.74219896, -0.47628828, -0.5175951 ],
[-0.0755429 , 0.79078645, 0.95086558, -1.86214404],
[ 1.50346299, -0.52695526, -0.28902625, -0.82038604],
[ 0.81427752, -0.74730953, 0.64814457, -0.94949558],
[ 0.70329552, 0.37535518, -0.43012094, 1.0470845 ],
[-0.2881095 , -0.98918133, -1.76115028, 1.28901781],
[ 1.37016394, 0.88699756, 0.43978861, 0.65941833]])
np.sum()
전체 성분의 합을 계산해준다.
np.sum(single_arr)
-1.8515573809143309
axis=0 : 행별 전체 합을 계산해준다.
np.sum(single_arr,axis=0)
array([ 1.0742947 , -0.02507179, -0.45377129, -2.447009 ])
aixs=1 : 열별 전체 합을 계산해준다.
np.sum(single_arr,axis=1)
array([-4.11694447, -0.52820094, 0.25887608, 1.11282882, -1.31735377,
-0.19603492, -0.13290456, -0.23438302, 1.69561427, -1.74942331,
3.35636844])
np.mean()
평균을 계산해준다.
np.mean(single_arr,axis=0)
array([ 0.09766315, -0.00227925, -0.04125194, -0.22245536])
np.std(),np.var(),np.min(),np.max()
표준편차,분산,최대 최소를 구해준다.
np.std(single_arr)
0.9683579692373014
np.min(single_arr,axis=1)
array([-2.469909 , -0.95437149, -0.60213293, -1.22210946, -1.06566936,
-1.86214404, -0.82038604, -0.94949558, -0.43012094, -1.76115028,
0.43978861])
np.argmin(),np.agrmax()
전체 성분의 최소값, 최대값이 위치한 인덱스를 반환.
np.argmin(single_arr)
3
np.argmax(single_arr,axis=1)
array([1, 1, 2, 2, 1, 2, 0, 0, 3, 3, 0], dtype=int64)
np.cumsum(), np.cumprod()
처음 성분부터 각 성분까지의 누적합 또는 누적곱을 계산해준다.
np.cumsum(single_arr)
array([-0.85420188, -0.42921612, -1.64703547, -4.11694447, -5.07131596,
-4.22902315, -4.81325946, -4.64514541, -4.23222893, -4.83436187,
-4.39381525, -4.38626933, -4.87819595, -6.10030541, -4.27478067,
-3.27344051, -4.33910986, -3.5969109 , -4.07319918, -4.59079428,
-4.66633718, -3.87555073, -2.92468515, -4.78682919, -3.2833662 ,
-3.81032147, -4.09934772, -4.91973375, -4.10545623, -4.85276576,
-4.20462119, -5.15411677, -4.45082125, -4.07546607, -4.50558701,
-3.45850251, -3.74661201, -4.73579335, -6.49694363, -5.20792582,
-3.83776188, -2.95076432, -2.51097571, -1.85155738])
np.cumprod(single_arr)
array([-8.54201882e-01, -3.63023641e-01, 4.42097215e-01, -1.09193989e+00,
1.04211631e+00, 8.77767075e-01, -5.12823395e-01, -8.62128166e-02,
-3.55986925e-02, 2.14351452e-02, 9.44318071e-03, 7.12574735e-05,
-3.50534480e-05, 4.28391504e-05, 7.82039290e-05, 7.83087347e-05,
-8.34512188e-05, -6.19374081e-05, 2.95000616e-05, -1.52690873e-05,
1.15347114e-06, 9.12149347e-07, 8.67331419e-07, -1.61509603e-06,
-2.42823712e-06, 1.27957233e-06, -3.69829992e-07, 3.03403362e-07,
2.47054538e-07, -1.84626211e-07, -1.19664476e-07, 1.13620891e-07,
7.99090642e-08, 2.99942814e-08, -1.29011685e-08, -1.35086135e-08,
3.89195992e-09, -3.84985411e-09, 6.78017165e-09, 8.73976203e-09,
1.19749068e-08, 1.06217131e-08, 4.67130840e-09, 3.08034638e-09])
np.sort()
전체 성분에 대해서 정렬해준다.
오름차순
np.sort(single_arr)
array([[-2.469909 , -1.21781935, -0.85420188, 0.42498577],
[-0.95437149, -0.58423631, 0.16811405, 0.84229281],
[-0.60213293, 0.00754592, 0.41291648, 0.44054662],
[-1.22210946, -0.49192662, 1.00134016, 1.82552474],
[-1.06566936, -0.5175951 , -0.47628828, 0.74219896],
[-1.86214404, -0.0755429 , 0.79078645, 0.95086558],
[-0.82038604, -0.52695526, -0.28902625, 1.50346299],
[-0.94949558, -0.74730953, 0.64814457, 0.81427752],
[-0.43012094, 0.37535518, 0.70329552, 1.0470845 ],
[-1.76115028, -0.98918133, -0.2881095 , 1.28901781],
[ 0.43978861, 0.65941833, 0.88699756, 1.37016394]])
내림차순
np.sort(single_arr)[::-1]
array([[ 0.43978861, 0.65941833, 0.88699756, 1.37016394],
[-1.76115028, -0.98918133, -0.2881095 , 1.28901781],
[-0.43012094, 0.37535518, 0.70329552, 1.0470845 ],
[-0.94949558, -0.74730953, 0.64814457, 0.81427752],
[-0.82038604, -0.52695526, -0.28902625, 1.50346299],
[-1.86214404, -0.0755429 , 0.79078645, 0.95086558],
[-1.06566936, -0.5175951 , -0.47628828, 0.74219896],
[-1.22210946, -0.49192662, 1.00134016, 1.82552474],
[-0.60213293, 0.00754592, 0.41291648, 0.44054662],
[-0.95437149, -0.58423631, 0.16811405, 0.84229281],
[-2.469909 , -1.21781935, -0.85420188, 0.42498577]])
행 방향으로 오룸차순
np.sort(single_arr,axis=0)
array([[-1.06566936, -1.22210946, -1.76115028, -2.469909 ],
[-0.95437149, -0.98918133, -1.21781935, -1.86214404],
[-0.85420188, -0.74730953, -0.58423631, -0.94949558],
[-0.49192662, -0.60213293, -0.47628828, -0.82038604],
[-0.2881095 , -0.52695526, -0.43012094, -0.5175951 ],
[-0.0755429 , 0.37535518, -0.28902625, 0.00754592],
[ 0.41291648, 0.42498577, 0.43978861, 0.16811405],
[ 0.70329552, 0.74219896, 0.44054662, 0.65941833],
[ 0.81427752, 0.79078645, 0.64814457, 1.00134016],
[ 1.37016394, 0.84229281, 0.95086558, 1.0470845 ],
[ 1.50346299, 0.88699756, 1.82552474, 1.28901781]])
'Data analysis > Numpy' 카테고리의 다른 글
5. Array boolean 인덱싱(마스크) (0) | 2021.03.19 |
---|---|
4. Array 인덱싱 (0) | 2021.03.19 |
3. Array 연산 (0) | 2021.03.19 |
2. zeros(),ones(),arange() 함수 (0) | 2021.03.19 |
1.Numpy란? (0) | 2021.03.19 |