Python基础算法库Numpy及可视化库使用实践-大数据ML样本集案例实战

栏目: Python · 发布时间: 6年前

内容简介：版权声明：本套技术专栏是作者（秦凯新）平时工作的总结和升华，通过从真实商业环境抽取案例进行总结和分享，并给出商业应用的调优建议和集群环境容量规划等内容，请持续关注本套博客。QQ邮箱地址：1120746959@qq.com，如有任何学术交流，可随时联系。

1 Numpy详细使用

读取txt文件

import numpy
  world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",")
  print(type(world_alcohol))

  world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",", dtype="U75", skip_header=1)
  print(world_alcohol)
  
  [[u'1986' u'Western Pacific' u'Viet Nam' u'Wine' u'0']
   [u'1986' u'Americas' u'Uruguay' u'Other' u'0.5']
   [u'1985' u'Africa' u"Cte d'Ivoire" u'Wine' u'1.62']
   ..., 
   [u'1987' u'Africa' u'Malawi' u'Other' u'0.75']
   [u'1989' u'Americas' u'Bahamas' u'Wine' u'1.5']
   [u'1985' u'Africa' u'Malawi' u'Spirits' u'0.31']]
复制代码

创建一维和二维的Array数组

#The numpy.array() function can take a list or list of lists as input. When we input a list, we get a one-dimensional array as a result:
  
  #一维的Array数组[]
  vector = numpy.array([5, 10, 15, 20])
  
  #二维的Array数组[[],[],[]]
  matrix = numpy.array([[5, 10, 15], [20, 25, 30], [35, 40, 45]])
  print vector
  print matrix
复制代码

shape用法

#We can use the ndarray.shape property to figure out how many elements are in the array
  vector = numpy.array([1, 2, 3, 4])
  print(vector.shape)
  
  #For matrices, the shape property contains a tuple with 2 elements.
  matrix = numpy.array([[5, 10, 15], [20, 25, 30]])
  print(matrix.shape)
  
  (4,)
  (2, 3)
复制代码

dtype用法（numpy要求numpy.array内部元素结构相同）

numbers = numpy.array([1, 2, 3, 4])
  numbers.dtype
  
  dtype('int32')
  
  #改变其中一个值时，其他值都会改变
  numbers = numpy.array([1, 2, 3, '4'])
  print(numbers)
  numbers.dtype
  
 
  ['1' '2' '3' '4']
   dtype('<U11')
复制代码

索引定位

[[u'1986' u'Western Pacific' u'Viet Nam' u'Wine' u'0']
   [u'1986' u'Americas' u'Uruguay' u'Other' u'0.5']
   [u'1985' u'Africa' u"Cte d'Ivoire" u'Wine' u'1.62']
   ..., 
   [u'1987' u'Africa' u'Malawi' u'Other' u'0.75']
   [u'1989' u'Americas' u'Bahamas' u'Wine' u'1.5']
   [u'1985' u'Africa' u'Malawi' u'Spirits' u'0.31']]
   
  uruguay_other_1986 = world_alcohol[1,4]
  third_country = world_alcohol[2,2]
  print uruguay_other_1986
  print third_country
  
  0.5
  Cte d'Ivoire
复制代码

索引切片

vector = numpy.array([5, 10, 15, 20])
  print(vector[0:3])  
  [ 5 10 15]
复制代码

取某一列（：表示所有行）

matrix = numpy.array([
                      [5, 10, 15], 
                      [20, 25, 30],
                      [35, 40, 45]
                   ])
  print(matrix[:,1])
  
  [10 25 40]

  matrix = numpy.array([
                  [5, 10, 15], 
                  [20, 25, 30],
                  [35, 40, 45]
               ])
  print(matrix[:,0:2])
  
  [[ 5 10]
   [20 25]
   [35 40]]
   
  matrix = numpy.array([
              [5, 10, 15], 
              [20, 25, 30],
              [35, 40, 45]
           ])
  print(matrix[1:3,0:2])
  
  [[20 25]
  [35 40]]
复制代码

对Array操作表示对内部所有元素进行操作

import numpy
  #it will compare the second value to each element in the vector
  # If the values are equal, the Python interpreter returns True; otherwise, it returns False
  vector = numpy.array([5, 10, 15, 20])
  vector == 10
  
  array([False,  True, False, False], dtype=bool)
  
  matrix = numpy.array([
              [5, 10, 15], 
              [20, 25, 30],
              [35, 40, 45]
           ])
  matrix == 25
  
  array([[False, False, False],
 [False,  True, False],
 [False, False, False]], dtype=bool)
复制代码

布尔值当索引（[False True False False]）

vector = numpy.array([5, 10, 15, 20])
  equal_to_ten = (vector == 10)
  print equal_to_ten
  print(vector[equal_to_ten])
  
  [False  True False False]
  [10]


  #矩阵表示索引
  matrix = numpy.array([
                  [5, 10, 15], 
                  [20, 25, 30],
                  [35, 40, 45]
               ])
  second_column_25 = (matrix[:,1] == 25)
  print second_column_25
  print(matrix[second_column_25, :])
  
  [False  True False]
  [[20 25 30]]
复制代码

对数组进行与运算

#We can also perform comparisons with multiple conditions
  vector = numpy.array([5, 10, 15, 20])
  equal_to_ten_and_five = (vector == 10) & (vector == 5)
  print equal_to_ten_and_five
  
  [False False False False]
  
  
  vector = numpy.array([5, 10, 15, 20])
  equal_to_ten_or_five = (vector == 10) | (vector == 5)
  print equal_to_ten_or_five
  
  [ True  True False False]
复制代码

值类型转换

vector = numpy.array(["1", "2", "3"])
  print vector.dtype
  print vector
  vector = vector.astype(float)
  print vector.dtype
  print vector
  
  |S1
  ['1' '2' '3']
  float64
  [ 1.  2.  3.]
复制代码

聚合求解

vector = numpy.array([5, 10, 15, 20])
  vector.sum()
复制代码

按行维度（axis=1）

matrix = numpy.array([
                 [5, 10, 15], 
                 [20, 25, 30],
                 [35, 40, 45]
              ])
 matrix.sum(axis=1)
 array([ 30,  75, 120])
复制代码

按列求和（axis=0）

matrix = numpy.array([
                  [5, 10, 15], 
                  [20, 25, 30],
                  [35, 40, 45]
               ])
  matrix.sum(axis=0)  
复制代码

矩阵操作np.arange生成0-N的整数

import numpy as np
  a = np.arange(15).reshape(3, 5)
  a

  array([[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14]])
         
  a.ndim
  2
  
  a.dtype.name
  'int32'
  
  a.size
  15
复制代码

矩阵初始化

np.zeros ((3,4)) 
  
  array([[ 0.,  0.,  0.,  0.],
 [ 0.,  0.,  0.,  0.],
 [ 0.,  0.,  0.,  0.]])
 

  np.ones( (2,3,4), dtype=np.int32 )
  
  array([[[1, 1, 1, 1],
  [1, 1, 1, 1],
  [1, 1, 1, 1]],

 [[1, 1, 1, 1],
  [1, 1, 1, 1],
  [1, 1, 1, 1]]])
复制代码

按照间隔生成数据

np.arange( 10, 30, 5 )
  array([10, 15, 20, 25])

  np.arange( 0, 2, 0.3 )
  array([ 0. ,  0.3,  0.6,  0.9,  1.2,  1.5,  1.8])
复制代码

随机生成数据

np.random.random((2,3))
  
  array([[ 0.40130659,  0.45452825,  0.79776512],
 [ 0.63220592,  0.74591134,  0.64130737]])
复制代码

linspace在0到2pi之间取100个数

from numpy import pi
  np.linspace( 0, 2*pi, 100 )

  array([ 0.    ,  0.06346652,  0.12693304,  0.19039955,  0.25386607,
      0.31733259,  0.38079911,  0.44426563,  0.50773215,  0.57119866,
      0.63466518,  0.6981317 ,  0.76159822,  0.82506474,  0.88853126,
      0.95199777,  1.01546429,  1.07893081,  1.14239733,  1.20586385,
      1.26933037,  1.33279688,  1.3962634 ,  1.45972992,  1.52319644,
      1.58666296,  1.65012947,  1.71359599,  1.77706251,  1.84052903,
      1.90399555,  1.96746207,  2.03092858,  2.0943951 ,  2.15786162,
      2.22132814,  2.28479466,  2.34826118,  2.41172769,  2.47519421,
      2.53866073,  2.60212725,  2.66559377,  2.72906028,  2.7925268 ,
      2.85599332,  2.91945984,  2.98292636,  3.04639288,  3.10985939,
      3.17332591,  3.23679243,  3.30025895,  3.36372547,  3.42719199,
      3.4906585 ,  3.55412502,  3.61759154,  3.68105806,  3.74452458,
      3.8079911 ,  3.87145761,  3.93492413,  3.99839065,  4.06185717,
      4.12532369,  4.1887902 ,  4.25225672,  4.31572324,  4.37918976,
      4.44265628,  4.5061228 ,  4.56958931,  4.63305583,  4.69652235,
      4.75998887,  4.82345539,  4.88692191,  4.95038842,  5.01385494,
      5.07732146,  5.14078798,  5.2042545 ,  5.26772102,  5.33118753,
      5.39465405,  5.45812057,  5.52158709,  5.58505361,  5.64852012,
      5.71198664,  5.77545316,  5.83891968,  5.9023862 ,  5.96585272,
      6.02931923,  6.09278575,  6.15625227,  6.21971879,  6.28318531])
复制代码

矩阵基本操作

#the product operator * operates elementwise in NumPy arrays
  a = np.array( [20,30,40,50] )
  b = np.arange( 4 )
  print (a)
  print (b)
  #b
  c = a-b
  print (c)
  b**2
  print (b**2)
  print (a<35)
  
  [20 30 40 50]
  [0 1 2 3]
  [20 29 38 47]
  [ True  True False False]
复制代码

矩阵相乘

#The matrix product can be performed using the dot function or method
  A = np.array([[1,1],
                 [0,1]] )
  B = np.array([[2,0],
                 [3,4]])
  print (A)
  print (B)
  print (A*B)
  
  print (A.dot(B))
  print (np.dot(A, B) )
  
  [[1 1]
   [0 1]]
   
  [[2 0]
   [3 4]]
   
  [[2 0]
   [0 4]]
   
  [[5 4]
   [3 4]]
   
  [[5 4]
   [3 4]]
复制代码

矩阵操作floor向下取整

import numpy as np
  B = np.arange(3)
  print (B)
  #print np.exp(B)
  print (np.sqrt(B))
  
  [0 1 2]
  [0.         1.         1.41421356]
  
  #Return the floor of the input
  a = np.floor(10*np.random.random((3,4)))
  #print a
  
  #Return the floor of the input
  a = np.floor(10*np.random.random((3,4)))
  print (a)
  
  print(a.reshape(2,-1))
  
  [[0. 4. 2. 2.]
   [8. 1. 5. 7.]
   [0. 9. 7. 4.]]
   
  [[0. 4. 2. 2. 8. 1.]
   [5. 7. 0. 9. 7. 4.]]
复制代码

hstack矩阵拼接

a = np.floor(10*np.random.random((2,2)))
  b = np.floor(10*np.random.random((2,2)))
  print a
  print '---'
  print b
  print '---'
  print np.hstack((a,b))
  
  [[ 5.  6.]
   [ 1.  5.]]
  ---
  [[ 8.  6.]
   [ 9.  0.]]
  ---
  [[ 5.  6.  8.  6.]
   [ 1.  5.  9.  0.]]

  a = np.floor(10*np.random.random((2,2)))
  b = np.floor(10*np.random.random((2,2)))
  print (a)
  print ('---')
  print (b)
  print ('---')
  #print np.hstack((a,b))
  np.vstack((a,b))
  
  [[7. 7.]
   [2. 6.]]
  ---
  [[0. 6.]
   [0. 3.]]
  ---
 array([[1., 0.],
 [3., 6.],
 [4., 2.],
 [8., 7.]])

  a = np.floor(10*np.random.random((2,12)))
  print (a)
  print (np.hsplit(a,3))
  
  [[6. 5. 2. 4. 2. 4. 9. 4. 4. 6. 8. 9.]
   [8. 4. 0. 2. 6. 5. 2. 5. 0. 4. 1. 6.]]
  [array([[6., 5., 2., 4.],
         [8., 4., 0., 2.]]), array([[2., 4., 9., 4.],
         [6., 5., 2., 5.]]), array([[4., 6., 8., 9.],
         [0., 4., 1., 6.]])]
复制代码

任意选择切分位置

print ( np.hsplit(a,(3,4)))   # Split a after the third and the fourth column
  
  [[2. 8. 4.    7.    6. 6. 5. 8. 8. 3. 0. 1.]
   [3. 5. 9.    4.    5. 8. 7. 6. 2. 3. 8. 4.]]
  
  [array([[2., 8., 4.],
  [3., 5., 9.]]), array([[7.],
  [4.]]), array([[6., 6., 5., 8., 8., 3., 0., 1.],
  [5., 8., 7., 6., 2., 3., 8., 4.]])]
复制代码

变量赋值
变量视图

copy实现变量之间没有关系

d = a.copy() 
  d is a
  d[0,0] = 9999
  print d 
  print a

  [[9999    1    2    3]
   [1234    5    6    7]
   [   8    9   10   11]]
  [[   0    1    2    3]
   [1234    5    6    7]
   [   8    9   10   11]]
复制代码

寻找列最大值索引

以上就是本文的全部内容，希望对大家的学习有所帮助，也希望大家多多支持码农网

查看所有标签

猜你喜欢:

本站部分资源来源于网络，本站转载出于传递更多信息之目的，版权归原作者或者来源机构所有，如转载稿涉及版权问题，请联系我们。

码农书籍

Types and Programming Languages

Benjamin C. Pierce / The MIT Press / 2002-2-1 / USD 95.00

A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of typ......一起来看看《Types and Programming Languages》这本书的介绍吧!

码农工具

RGB转16进制工具

RGB HEX 互转工具

URL 编码/解码

MD5 加密

MD5 加密工具