4.1. DataFrame Create¶
4.1.1. Create from List of Dicts¶
import pandas as pd
pd.DataFrame([
{'A': 1.0, 'B': 2.0},
{'A': 3.0, 'B': 4.0},
])
# A B
# 0 1.0 2.0
# 1 3.0 4.0
import pandas as pd
pd.DataFrame([
{'A': 1.0, 'B': 2.0},
{'B': 3.0, 'C': 4.0},
])
# A B C
# 0 1.0 2.0 NaN
# 1 NaN 3.0 4.0
import pandas as pd
pd.DataFrame([
{'firstname': 'Mark', 'lastname': 'Watney'},
{'firstname': 'Jan', 'lastname': 'Twardowski'},
{'firstname': 'Ivan', 'lastname': 'Ivanovic'},
{'firstname': 'Melissa', 'lastname': 'Lewis'},
])
# firstname lastname
# 0 Mark Watney
# 1 Jan Twardowski
# 2 Ivan Ivanovic
# 3 Melissa Lewis
4.1.2. Create from Dict¶
import pandas as pd
pd.DataFrame({
'A': ['a', 'b', 'c'],
'B': [1.0, 2.0, 3.0],
'C': [1, 2, 3],
})
# A B C
# 0 a 1.0 1
# 1 b 2.0 2
# 2 c 3.0 3
import pandas as pd
pd.DataFrame({
'firstname': ['Mark', 'Jan', 'Ivan', 'Melissa'],
'lastname': ['Watney', 'Twardowski', 'Ivanovic', 'Lewis'],
})
# firstname lastname
# 0 Mark Watney
# 1 Jan Twardowski
# 2 Ivan Ivanovic
# 3 Melissa Lewis
import pandas as pd
import numpy as np
pd.DataFrame({
'A': 1.,
'B': pd.Timestamp('1961-04-12'),
'C': pd.Series(1, index=list(range(4)), dtype='float32'),
'D': np.array([3] * 4, dtype='int32'),
'E': pd.Categorical(["test", "train", "test", "train"]),
'F': 'foo',
'G': [1,2,3,4],
})
# A B C D E F G
# 0 1.0 1961-04-12 1.0 3 test foo 1
# 1 1.0 1961-04-12 1.0 3 train foo 2
# 2 1.0 1961-04-12 1.0 3 test foo 3
# 3 1.0 1961-04-12 1.0 3 train foo 4
4.1.3. Create from NDArray¶
import pandas as pd
import numpy as np
np.random.seed(0)
df = pd.DataFrame(np.random.randn(7, 4))
df
# 0 1 2 3
# 0 1.764052 0.400157 0.978738 2.240893
# 1 1.867558 -0.977278 0.950088 -0.151357
# 2 -0.103219 0.410599 0.144044 1.454274
# 3 0.761038 0.121675 0.443863 0.333674
# 4 1.494079 -0.205158 0.313068 -0.854096
# 5 -2.552990 0.653619 0.864436 -0.742165
# 6 2.269755 -1.454366 0.045759 -0.187184
4.1.4. Assignments¶
"""
* Assignment: DataFrame Create
* Complexity: easy
* Lines of code: 10 lines
* Time: 5 min
English:
1. Use data from "Given" section (see below)
2. Create `result: pd.DataFrame` for input data
Polish:
1. Użyj danych z sekcji "Given" (patrz poniżej)
2. Stwórz `result: pd.DataFrame` dla danych wejściowych
Hints:
* Use selection with `alt` key in your IDE
Tests:
>>> type(result) is pd.DataFrame
True
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.max_columns', 10)
>>> pd.set_option('display.max_rows', 10)
>>> result # doctest: +NORMALIZE_WHITESPACE
Crew Role Astronaut
0 Prime CDR Neil Armstrong
1 Prime LMP Buzz Aldrin
2 Prime CMP Michael Collins
3 Backup CDR James Lovell
4 Backup LMP William Anders
5 Backup CMP Fred Haise
"""
# Given
import pandas as pd
"""
"Prime", "CDR", "Neil Armstrong"
"Prime", "LMP", "Buzz Aldrin"
"Prime", "CMP", "Michael Collins"
"Backup", "CDR", "James Lovell"
"Backup", "LMP", "William Anders"
"Backup", "CMP", "Fred Haise"
"""
result = ...