科学计算库numpy
老唐数据分析机器学习
numpy1
import numpy world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",", dtype=str) print(type(world_alcohol)) print (world_alcohol) #print (help(numpy.genfromtxt)) #帮助文档 ''' <class 'numpy.ndarray'> [['Year' 'WHO region' 'Country' 'Beverage Types' 'Display Value'] ['1986' 'Western Pacific' 'Viet Nam' 'Wine' '0'] ['1986' 'Americas' 'Uruguay' 'Other' '0.5'] ... ['1987' 'Africa' 'Malawi' 'Other' '0.75'] ['1989' 'Americas' 'Bahamas' 'Wine' '1.5'] ['1985' 'Africa' 'Malawi' 'Spirits' '0.31']] ''' #The numpy.array() function can take a list or list of lists as input. When we input a list, we get a one-dimensional array as a result: vector = numpy.array([5, 10, 15, 20]) #When we input a list of lists, we get a matrix as a result: matrix = numpy.array([[5, 10, 15], [20, 25, 30], [35, 40, 45]]) print (vector) print (matrix) ''' [ 5 10 15 20] [[ 5 10 15] [20 25 30] [35 40 45]] ''' #We can use the ndarray.shape property to figure out how many elements are in the array vector = numpy.array([1, 2, 3, 4]) print(vector.shape) #For matrices, the shape property contains a tuple with 2 elements. matrix = numpy.array([[5, 10, 15], [20, 25, 30]]) print(matrix.shape) ''' (4,) (2, 3) ''' #Each value in a NumPy array has to have the same data type #NumPy will automatically figure out an appropriate data type when reading in data or converting lists to arrays. #You can check the data type of a NumPy array using the dtype property. numbers = numpy.array([1, 2, 3, 4]) numbers.dtype ''' dtype('int32') ''' #When NumPy can't convert a value to a numeric data type like float or integer, it uses a special nan value that stands for Not a Number #nan is the missing data #1.98600000e+03 is actually 1.986 * 10 ^ 3 world_alcohol ''' array([['Year', 'WHO region', 'Country', 'Beverage Types', 'Display Value'], ['1986', 'Western Pacific', 'Viet Nam', 'Wine', '0'], ['1986', 'Americas', 'Uruguay', 'Other', '0.5'], ..., ['1987', 'Africa', 'Malawi', 'Other', '0.75'], ['1989', 'Americas', 'Bahamas', 'Wine', '1.5'], ['1985', 'Africa', 'Malawi', 'Spirits', '0.31']], dtype='<U52') ''' world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",", dtype="U75", skip_header=1) print(world_alcohol) ''' [['1986' 'Western Pacific' 'Viet Nam' 'Wine' '0'] ['1986' 'Americas' 'Uruguay' 'Other' '0.5'] ['1985' 'Africa' "Cte d'Ivoire" 'Wine' '1.62'] ... ['1987' 'Africa' 'Malawi' 'Other' '0.75'] ['1989' 'Americas' 'Bahamas' 'Wine' '1.5'] ['1985' 'Africa' 'Malawi' 'Spirits' '0.31']] ''' uruguay_other_1986 = world_alcohol[1,4] third_country = world_alcohol[2,2] print (uruguay_other_1986) print (third_country) ''' 0.5 Cte d'Ivoire ''' vector = numpy.array([5, 10, 15, 20]) print(vector[0:3]) ''' [ 5 10 15] ''' matrix = numpy.array([ [5, 10, 15], [20, 25, 30], [35, 40, 45] ]) print(matrix[:,1]) ''' [10 25 40] ''' matrix = numpy.array([ [5, 10, 15], [20, 25, 30], [35, 40, 45] ]) print(matrix[:,0:2]) ''' [[ 5 10] [20 25] [35 40]] ''' matrix = numpy.array([ [5, 10, 15], [20, 25, 30], [35, 40, 45] ]) print(matrix[1:3,0:2]) ''' [[20 25] [35 40]] '''
numpy2 import numpy #it will compare the second value to each element in the vector # If the values are equal, the Python interpreter returns True; otherwise, it returns False vector = numpy.array([5, 10, 15, 20]) vector == 10 ''' array([False, True, False, False]) ''' matrix = numpy.array([ [5, 10, 15], [20, 25, 30], [35, 40, 45] ]) matrix == 25 ''' array([[False, False, False], [False, True, False], [False, False, False]]) ''' #Compares vector to the value 10, which generates a new Boolean vector [False, True, False, False]. It assigns this result to equal_to_ten vector = numpy.array([5, 10, 15, 20]) equal_to_ten = (vector == 10) print (equal_to_ten) print(vector[equal_to_ten]) ''' [False True False False] [10] ''' matrix = numpy.array([ [5, 10, 15], [20, 25, 30], [35, 40, 45] ]) second_column_25 = (matrix[:,1] == 25) print (second_column_25) print(matrix[second_column_25, :]) ''' [False True False] [[20 25 30]] ''' #We can also perform comparisons with multiple conditions vector = numpy.array([5, 10, 15, 20]) equal_to_ten_and_five = (vector == 10) & (vector == 5) print (equal_to_ten_and_five) ''' [False False False False] ''' vector = numpy.array([5, 10, 15, 20]) equal_to_ten_or_five = (vector == 10) | (vector == 5) print (equal_to_ten_or_five) ''' [ True True False False] ''' vector = numpy.array([5, 10, 15, 20]) equal_to_ten_or_five = (vector == 10) | (vector == 5) vector[equal_to_ten_or_five] = 50 print(vector) ''' [50 50 15 20] ''' matrix = numpy.array([ [5, 10, 15], [20, 25, 30], [35, 40, 45] ]) second_column_25 = matrix[:,1] == 25 print (second_column_25) matrix[second_column_25, 1] = 10 print (matrix) ''' [False True False] [[ 5 10 15] [20 10 30] [35 40 45]] ''' #We can convert the data type of an array with the ndarray.astype() method. vector = numpy.array(["1", "2", "3"]) print (vector.dtype) print (vector) vector = vector.astype(float) print (vector.dtype) print (vector) ''' <U1 ['1' '2' '3'] float64 [1. 2. 3.] ''' vector = numpy.array([5, 10, 15, 20]) vector.sum() ''' 50 ''' # The axis dictates which dimension we perform the operation on #1 means that we want to perform the operation on each row, and 0 means on each column matrix = numpy.array([ [5, 10, 15], [20, 25, 30], [35, 40, 45] ]) matrix.sum(axis=1) ''' array([ 30, 75, 120]) ''' matrix = numpy.array([ [5, 10, 15], [20, 25, 30], [35, 40, 45] ]) matrix.sum(axis=0) ''' array([60, 75, 90]) ''' #replace nan value with 0 world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",") #print world_alcohol is_value_empty = numpy.isnan(world_alcohol[:,4]) #print is_value_empty world_alcohol[is_value_empty, 4] = '0' alcohol_consumption = world_alcohol[:,4] alcohol_consumption = alcohol_consumption.astype(float) total_alcohol = alcohol_consumption.sum() average_alcohol = alcohol_consumption.mean() print (total_alcohol) print (average_alcohol) ''' 1137.78 1.140060120240481 '''
numpy3
import numpy as np print(np.arange(15)) a = np.arange(15).reshape(3, 5) a ''' [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14] array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14]]) ''' a.shape ''' (3, 5) ''' #the number of axes (dimensions) of the array a.ndim ''' 2 ''' a.dtype.name ''' 'int32' ''' #the total number of elements of the array a.size ''' 15 ''' np.zeros ((3,4)) ''' array([[0., 0., 0., 0.], [0., 0., 0., 0.], [0., 0., 0., 0.]]) ''' np.ones( (2,3,4), dtype=np.int32 ) ''' array([[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]], [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]]) ''' #To create sequences of numbers np.arange( 10, 30, 5 ) ''' array([10, 15, 20, 25]) ''' np.arange( 0, 2, 0.3 ) ''' array([0. , 0.3, 0.6, 0.9, 1.2, 1.5, 1.8]) ''' np.arange(12).reshape(4,3) ''' array([[ 0, 1, 2], [ 3, 4, 5], [ 6, 7, 8], [ 9, 10, 11]]) ''' np.random.random((2,3))#random随机产生(-1,1)区间的数 ''' array([[0.06665873, 0.92526157, 0.42866618], [0.19151176, 0.79870056, 0.32145198]]) ''' from numpy import pi
#linspace取100个间隔为2*pi的数 np.linspace( 0, 2*pi, 100 ) ''' array([0. , 0.06346652, 0.12693304, 0.19039955, 0.25386607, 0.31733259, 0.38079911, 0.44426563, 0.50773215, 0.57119866, 0.63466518, 0.6981317 , 0.76159822, 0.82506474, 0.88853126, 0.95199777, 1.01546429, 1.07893081, 1.14239733, 1.20586385, 1.26933037, 1.33279688, 1.3962634 , 1.45972992, 1.52319644, 1.58666296, 1.65012947, 1.71359599, 1.77706251, 1.84052903, 1.90399555, 1.96746207, 2.03092858, 2.0943951 , 2.15786162, 2.22132814, 2.28479466, 2.34826118, 2.41172769, 2.47519421, 2.53866073, 2.60212725, 2.66559377, 2.72906028, 2.7925268 , 2.85599332, 2.91945984, 2.98292636, 3.04639288, 3.10985939, 3.17332591, 3.23679243, 3.30025895, 3.36372547, 3.42719199, 3.4906585 , 3.55412502, 3.61759154, 3.68105806, 3.74452458, 3.8079911 , 3.87145761, 3.93492413, 3.99839065, 4.06185717, 4.12532369, 4.1887902 , 4.25225672, 4.31572324, 4.37918976, 4.44265628, 4.5061228 , 4.56958931, 4.63305583, 4.69652235, 4.75998887, 4.82345539, 4.88692191, 4.95038842, 5.01385494, 5.07732146, 5.14078798, 5.2042545 , 5.26772102, 5.33118753, 5.39465405, 5.45812057, 5.52158709, 5.58505361, 5.64852012, 5.71198664, 5.77545316, 5.83891968, 5.9023862 , 5.96585272, 6.02931923, 6.09278575, 6.15625227, 6.21971879, 6.28318531]) ''' np.sin(np.linspace( 0, 2*pi, 100 )) ''' array([ 0.00000000e+00, 6.34239197e-02, 1.26592454e-01, 1.89251244e-01, 2.51147987e-01, 3.12033446e-01, 3.71662456e-01, 4.29794912e-01, 4.86196736e-01, 5.40640817e-01, 5.92907929e-01, 6.42787610e-01, 6.90079011e-01, 7.34591709e-01, 7.76146464e-01, 8.14575952e-01, 8.49725430e-01, 8.81453363e-01, 9.09631995e-01, 9.34147860e-01, 9.54902241e-01, 9.71811568e-01, 9.84807753e-01, 9.93838464e-01, 9.98867339e-01, 9.99874128e-01, 9.96854776e-01, 9.89821442e-01, 9.78802446e-01, 9.63842159e-01, 9.45000819e-01, 9.22354294e-01, 8.95993774e-01, 8.66025404e-01, 8.32569855e-01, 7.95761841e-01, 7.55749574e-01, 7.12694171e-01, 6.66769001e-01, 6.18158986e-01, 5.67059864e-01, 5.13677392e-01, 4.58226522e-01, 4.00930535e-01, 3.42020143e-01, 2.81732557e-01, 2.20310533e-01, 1.58001396e-01, 9.50560433e-02, 3.17279335e-02, -3.17279335e-02, -9.50560433e-02, -1.58001396e-01, -2.20310533e-01, -2.81732557e-01, -3.42020143e-01, -4.00930535e-01, -4.58226522e-01, -5.13677392e-01, -5.67059864e-01, -6.18158986e-01, -6.66769001e-01, -7.12694171e-01, -7.55749574e-01, -7.95761841e-01, -8.32569855e-01, -8.66025404e-01, -8.95993774e-01, -9.22354294e-01, -9.45000819e-01, -9.63842159e-01, -9.78802446e-01, -9.89821442e-01, -9.96854776e-01, -9.99874128e-01, -9.98867339e-01, -9.93838464e-01, -9.84807753e-01, -9.71811568e-01, -9.54902241e-01, -9.34147860e-01, -9.09631995e-01, -8.81453363e-01, -8.49725430e-01, -8.14575952e-01, -7.76146464e-01, -7.34591709e-01, -6.90079011e-01, -6.42787610e-01, -5.92907929e-01, -5.40640817e-01, -4.86196736e-01, -4.29794912e-01, -3.71662456e-01, -3.12033446e-01, -2.51147987e-01, -1.89251244e-01, -1.26592454e-01, -6.34239197e-02, -2.44929360e-16]) ''' #the product operator * operates elementwise in NumPy arrays a = np.array( [20,30,40,50] ) b = np.arange( 4 ) #print (a) #print (b) #b c = a-b #print (c) b**2 #print (b**2) print (a<35) ''' [ True True False False] ''' #The matrix product can be performed using the dot function or method A = np.array( [[1,1], [0,1]] ) B = np.array( [[2,0], [3,4]] ) print (A) print (B) #print (A*B) #对应位置上的数相乘 print (A.dot(B)) #数学中的矩阵相乘 print (np.dot(A, B)) #数学中的矩阵相乘 ''' [[1 1] [0 1]] [[2 0] [3 4]] [[5 4] [3 4]] [[5 4] [3 4]] '''
numpy4 import numpy as np B = np.arange(3) print(B) print(np.exp(B)) print(np.sqrt(B)) ''' [0 1 2] [1. 2.71828183 7.3890561 ] [0. 1. 1.41421356] ''' #Return the floor of the input a = np.floor(10*np.random.random((3,4))) print(a) print('--------------') print(a.shape) print('--------------') ## flatten the array print(a.ravel()) #拉平 print('--------------') a.shape = (6, 2) print(a) print('--------------') print(a.T) #转置 print(a.resize((2,6))) print(a) #If a dimension is given as -1 in a reshaping operation, the other dimensions are automatically calculated: #a.reshape(3,-1) #用-1表示会进行自动计算 ''' [[1. 6. 7. 4.] [5. 4. 1. 0.] [2. 3. 9. 7.]] -------------- (3, 4) [1. 6. 7. 4. 5. 4. 1. 0. 2. 3. 9. 7.] -------------- [[1. 6.] [7. 4.] [5. 4.] [1. 0.] [2. 3.] [9. 7.]] -------------- [[1. 7. 5. 1. 2. 9.] [6. 4. 4. 0. 3. 7.]] None [[1. 6. 7. 4. 5. 4.] [1. 0. 2. 3. 9. 7.]] ''' a = np.floor(10*np.random.random((2,2))) b = np.floor(10*np.random.random((2,2))) print(a) print('---') print(b) print('---') print(np.hstack((a,b))) #横向拼接 print(np.vstack((a,b))) #纵向拼接 #np.hstack((a,b)) ''' [[7. 5.] [9. 1.]] --- [[6. 2.] [4. 7.]] --- [[7. 5. 6. 2.] [9. 1. 4. 7.]] [[7. 5.] [9. 1.] [6. 2.] [4. 7.]] ''' a = np.floor(10*np.random.random((2,12))) print(a) print('-------------') print(np.hsplit(a,3)) #横向平均切分三份 print('-------------') print(np.hsplit(a,(3,4))) # Split a after the third and the fourth column a = np.floor(10*np.random.random((12,2))) print('-------------') print(a) np.vsplit(a,3) #纵向平均切分三份 ''' [[0. 8. 1. 3. 4. 7. 7. 1. 9. 8. 7. 2.] [4. 2. 7. 3. 9. 6. 9. 1. 7. 8. 3. 8.]] ------------- [array([[0., 8., 1., 3.], [4., 2., 7., 3.]]), array([[4., 7., 7., 1.], [9., 6., 9., 1.]]), array([[9., 8., 7., 2.], [7., 8., 3., 8.]])] ------------- [array([[0., 8., 1.], [4., 2., 7.]]), array([[3.], [3.]]), array([[4., 7., 7., 1., 9., 8., 7., 2.], [9., 6., 9., 1., 7., 8., 3., 8.]])] ------------- [[9. 3.] [3. 5.] [1. 1.] [0. 3.] [6. 4.] [5. 6.] [9. 4.] [1. 7.] [6. 2.] [1. 6.] [1. 1.] [8. 9.]] [array([[9., 3.], [3., 5.], [1., 1.], [0., 3.]]), array([[6., 4.], [5., 6.], [9., 4.], [1., 7.]]), array([[6., 2.], [1., 6.], [1., 1.], [8., 9.]])] ''' python三种复制 #Simple assignments make no copy of array objects or of their data. a = np.arange(12) b = a # a and b are two names for the same ndarray object print(b is a) b.shape = (3,4) print(a.shape) print(id(a)) print(id(b)) ''' True (3, 4) 1229965715056 1229965715056 ''' #The view method creates a new array object that looks at the same data. c = a.view() print(c is a) c.shape = 2,6 print(a.shape) c[0,4] = 1234 print(a) print(id(a)) print(id(c)) ''' False (3, 4) [[ 0 1 2 3] [1234 5 6 7] [ 8 9 10 11]] 1229965715056 1229965716336 ''' #The copy method makes a complete copy of the array and its data. d = a.copy() print(d is a) d[0,0] = 9999 print(d) print(a) ''' False [[9999 1 2 3] [1234 5 6 7] [ 8 9 10 11]] [[ 0 1 2 3] [1234 5 6 7] [ 8 9 10 11]] '''
numpy5 import numpy as np data = np.sin(np.arange(20)).reshape(5,4) print(data) ind = data.argmax(axis=0) print(ind) print(data.shape) print(data.shape[1]) data_max = data[ind, range(data.shape[1])] print(data_max) all(data_max == data.max(axis=0)) ''' [[ 0. 0.84147098 0.90929743 0.14112001] [-0.7568025 -0.95892427 -0.2794155 0.6569866 ] [ 0.98935825 0.41211849 -0.54402111 -0.99999021] [-0.53657292 0.42016704 0.99060736 0.65028784] [-0.28790332 -0.96139749 -0.75098725 0.14987721]] [2 0 3 1] (5, 4) 4 [0.98935825 0.84147098 0.99060736 0.6569866 ] True ''' a = np.arange(0, 40, 10) print(a) b = np.tile(a, (3, 5)) #扩展 print(b) ''' [ 0 10 20 30] [[ 0 10 20 30 0 10 20 30 0 10 20 30 0 10 20 30 0 10 20 30] [ 0 10 20 30 0 10 20 30 0 10 20 30 0 10 20 30 0 10 20 30] [ 0 10 20 30 0 10 20 30 0 10 20 30 0 10 20 30 0 10 20 30]] ''' a = np.array([[4, 3, 5], [1, 2, 1]]) print(a) print('------------') b = np.sort(a, axis=1) print(b) #b a.sort(axis=1) print('------------') print(a) a = np.array([4, 3, 1, 2]) j = np.argsort(a) #排序得到索引值 print('------------') print(j) print('------------') print(a[j]) ''' [[4 3 5] [1 2 1]] ------------ [[3 4 5] [1 1 2]] ------------ [[3 4 5] [1 1 2]] ------------ [2 3 1 0] ------------ [1 2 3 4] '''
请你一定不要停下来 成为你想成为的人
感谢您的阅读,我是LXL