3.7. Advanced Indexing

3.7.1. Indexing

  • two types of indexes: int, bool

  • Also known as Fancy indexing

import numpy as np


a = np.array([[1, 2, 3],
              [4, 5, 6]])

a > 2
# array([[False, False,  True],
#        [ True,  True,  True]])

a[a > 2]
# array([3, 4, 5, 6])

a[a > a.mean()]
# array([4, 5, 6])

a[a >= a.mean()+1]
# array([5, 6])
import numpy as np


a = np.array([[1, 2, 3],
              [4, 5, 6]])

a[a % 2 == 0]
# array([2, 4, 6])

even = (a % 2 == 0)
a[even]
# array([2, 4, 6])
import numpy as np


a = np.array([[1, 2, 3],
              [4, 5, 6]])

a[ (a>2) & (a<=5) & (a%2==1) ]
# array([3, 5])

query1 = (a > 2)
query2 = (a <= 5)
query3 = (a % 2 == 1)
a[query1 & query2 & query3]
# array([3, 5])

large = (a > 2)
small = (a <= 5)
odd = (a % 2 == 1)
a[large & small & odd]
# array([3, 5])
import numpy as np


a = np.array([1, 2, 3])

at_index = np.array([0, 1, 0])
a[at_index]
# array([1, 2, 1])

at_index = np.array([0, 2])
a[at_index]
# array([1, 3])
import numpy as np


a = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]])

a[[0,2]]
# array([[1, 2, 3],
#        [7, 8, 9]])

a[[0,2], [1,2]]
# array([2, 9])

a[:2, [1,2]]
# array([[2, 3],
#        [5, 6]])

rows,cols creates coordinate system for selecting values (like zip()). For example: (0,0); (0,1); (1,0); (1,1); (0,1), as in this example:

import numpy as np


a = np.array([[1, 4], [9, 16]], float)

rows = np.array([0, 0, 1, 1, 0], int)
cols = np.array([0, 1, 0, 1, 1], int)

a[rows]
# array([[ 1.,  4.],
#        [ 1.,  4.],
#        [ 9., 16.],
#        [ 9., 16.],
#        [ 1.,  4.]])

a[rows,cols]
# array([ 1.,  4.,  9., 16.,  4.])

3.7.2. Use Cases

import numpy as np

# '2000-01-01' -> [1, 2, 3]
# '2000-01-02' -> [4, 5, 6]
# '2000-01-03' -> [7, 8, 9]

date = np.array([
    '2000-01-01',
    '2000-01-02',
    '2000-01-03'])

values = np.array([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9]])


date == '2000-01-02'
# array([False,  True, False])

values[date == '2000-01-02']
# array([[4, 5, 6]])

values[date != '2000-01-02']
# array([[1, 2, 3],
#        [7, 8, 9]])

values[ (date=='2000-01-01') | (date=='2000-01-03') ]
# array([[1, 2, 3],
#        [7, 8, 9]])
import numpy as np


index = np.array([
    '2000-01-01',
    '2000-01-02',
    '2000-01-03'])

data = np.array([[1, 2, 3],
                 [4, 5, 6],
                 [7, 8, 9]])

jan01 = (index == '2000-01-01')
jan03 = (index == '2000-01-03')

data[jan01|jan03]
# array([[1, 2, 3],
#        [7, 8, 9]])

data[jan01|jan03, 0]
# array([1, 7])

data[jan01|jan03, :2]
# array([[1, 2],
#        [7, 8]])

data[jan01|jan03, :2] = 0
data
# array([[0, 0, 3],
#        [4, 5, 6],
#        [0, 0, 9]])
import numpy as np

#                Morning         Noon      Evening
# 1999-12-30  1.76405235,  0.40015721,  0.97873798,
# 1999-12-31  2.2408932 ,  1.86755799, -0.97727788,
# 2000-01-01  0.95008842, -0.15135721, -0.10321885,
# 2000-01-02  0.4105985 ,  0.14404357,  1.45427351,

index = np.array([
    '1999-12-30',
    '1999-12-31',
    '2000-01-01',
    '2000-01-02'])

columns = np.array(['Morning', 'Noon', 'Evening'])

data = np.array([[ 1.76405235,  0.40015721,  0.97873798],
                 [ 2.2408932 ,  1.86755799, -0.97727788],
                 [ 0.95008842, -0.15135721, -0.10321885],
                 [ 0.4105985 ,  0.14404357,  1.45427351]])

dec31 = (index == '1999-12-31')   # array([False,  True, False, False])
jan01 = (index == '2000-01-01')   # array([False, False,  True, False])
morning = (columns == 'Morning')  # array([ True, False, False])

data[dec31|jan01]
# array([[ 2.2408932 ,  1.86755799, -0.97727788],
#        [ 0.95008842, -0.15135721, -0.10321885]])

data[dec31|jan01, (columns == 'Morning')]
# array([2.2408932 , 0.95008842])

data[dec31|jan01, morning]
# array([2.2408932 , 0.95008842])

3.7.3. Diagonal problem

  • Warning

    Without the np.ix_ call, only the diagonal elements would be selected. This difference is the most important thing to remember about indexing with multiple advanced indexes.

import numpy as np

#                Morning         Noon      Evening
# 1999-12-30  1.76405235,  0.40015721,  0.97873798,
# 1999-12-31  2.2408932 ,  1.86755799, -0.97727788,
# 2000-01-01  0.95008842, -0.15135721, -0.10321885,
# 2000-01-02  0.4105985 ,  0.14404357,  1.45427351,

index = np.array([
    '1999-12-30',
    '1999-12-31',
    '2000-01-01',
    '2000-01-02'])

columns = np.array(['Morning', 'Noon', 'Evening'])

data = np.array([[ 1.76405235,  0.40015721,  0.97873798],
                 [ 2.2408932 ,  1.86755799, -0.97727788],
                 [ 0.95008842, -0.15135721, -0.10321885],
                 [ 0.4105985 ,  0.14404357,  1.45427351]])

dec31 = (index == '1999-12-31')     # array([False,  True, False, False])
jan01 = (index == '2000-01-01')     # array([False, False,  True, False])
morning = (columns == 'Morning')    # array([ True, False, False])
evening = (columns == 'Evening')    # array([False, False,  True])

data[dec31|jan01]
# array([[ 2.2408932 ,  1.86755799, -0.97727788],
#        [ 0.95008842, -0.15135721, -0.10321885]])

data[(dec31|jan01), (morning|evening)]
# array([ 2.2408932 , -0.10321885])

data[np.ix_((dec31|jan01), (morning|evening))]
# array([[ 2.2408932 , -0.97727788],
#        [ 0.95008842, -0.10321885]])