dataframe
Two-dimensional, size-mutable, potentially heterogeneous tabular data.
class pandas.DataFrame(data=None, index=None, columns=None, dtype=None, copy=None)
import pandas as pd
import numpy as np
colab example
https://colab.research.google.com/github/google/eng-edu/blob/main/ml/cc/exercises/pandas_dataframe_ultraquick_tutorial.ipynb
# Create and populate a 5x2 NumPy array.
my_data = np.array([[0, 3], [10, 7], [20, 9], [30, 14], [40, 15]])
# Create a Python list that holds the names of the two columns.
my_column_names = ['temperature', 'activity']
# Create a DataFrame.
my_dataframe = pd.DataFrame(data=my_data, columns=my_column_names)
# Print the entire DataFrame
print(my_dataframe)
# Create a new column named adjusted.
my_dataframe["adjusted"] = my_dataframe["activity"] + 2
# Print the entire DataFrame
print(my_dataframe)
print("Rows #0, #1, and #2:")
print(my_dataframe.head(3), '\n')
print("Row #2:")
print(my_dataframe.iloc[[2]], '\n')
print("Rows #1, #2, and #3:")
print(my_dataframe[1:4], '\n')
print("Column 'temperature':")
print(my_dataframe['temperature'])
# Create a reference by assigning my_dataframe to a new variable.
print("Experiment with a reference:")
reference_to_df = df
# Print the starting value of a particular cell.
print(" Starting value of df: %d" % df['Jason'][1])
print(" Starting value of reference_to_df: %d\n" % reference_to_df['Jason'][1])
# Modify a cell in df.
df.at[1, 'Jason'] = df['Jason'][1] + 5
print(" Updated df: %d" % df['Jason'][1])
print(" Updated reference_to_df: %d\n\n" % reference_to_df['Jason'][1])
# Create a true copy of my_dataframe
print("Experiment with a true copy:")
copy_of_my_dataframe = my_dataframe.copy()
# Print the starting value of a particular cell.
print(" Starting value of my_dataframe: %d" % my_dataframe['activity'][1])
print(" Starting value of copy_of_my_dataframe: %d\n" % copy_of_my_dataframe['activity'][1])
# Modify a cell in df.
my_dataframe.at[1, 'activity'] = my_dataframe['activity'][1] + 3
print(" Updated my_dataframe: %d" % my_dataframe['activity'][1])
print(" copy_of_my_dataframe does not get updated: %d" % copy_of_my_dataframe['activity'][1])
d = {'col1': [1, 2], 'col2': [3, 4]}
df = pd.DataFrame(data=d)
df col1 col2 0 1 3 1 2 4
df.dtypes col1 int64 col2 int64 dtype: object
To enforce a single dtype:
df = pd.DataFrame(data=d, dtype=np.int8)
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
Comments
Post a Comment