Skip to content

Pandas

# Import Pandas
import pandas as pd

# Creating DataFrames
df = pd.DataFrame(data)            # Create a DataFrame from data
df = pd.read_csv('file.csv')       # Read from a CSV file
df = pd.read_excel('file.xlsx')    # Read from an Excel file

# Basic DataFrame Operations
df.head()                          # Display first 5 rows
df.tail()                          # Display last 5 rows
df.shape                           # Number of rows and columns
df.info()                          # Summary of DataFrame
df.columns                         # List of column names
df.index                           # Index information
df.describe()                      # Summary statistics

# Selection and Indexing
df['column_name']                  # Select a column
df[['col1', 'col2']]               # Select multiple columns
df.loc[row_label]                  # Select row by label
df.iloc[row_index]                 # Select row by integer index
df.loc[condition]                  # Select rows based on condition
df.iloc[condition]                 # Select rows based on condition

# Filtering Data
df[df['column_name'] > value]      # Filter rows based on condition
df.query('condition')              # Filter using SQL-like queries

# Sorting
df.sort_values('column_name')      # Sort DataFrame by column
df.sort_values(['col1', 'col2'])   # Sort by multiple columns
df.sort_index()                    # Sort by index

# Aggregation
df['column_name'].sum()            # Sum of values in column
df['column_name'].mean()           # Mean of values in column
df.groupby('group_col').agg(func)  # Apply aggregate function to groups

# Missing Data
df.isna()                          # Boolean mask for missing values
df.dropna()                        # Drop rows with missing values
df.fillna(value)                   # Fill missing values

# Creating and Modifying Columns
df['new_column'] = values          # Create a new column
df['new_col'] = df['col'] + 1      # Perform operations on columns

# Data Cleaning
df['column_name'] = df['column_name'].str.strip()  # Remove leading/trailing spaces
df['column_name'] = df['column_name'].str.lower()  # Convert to lowercase

# Merging DataFrames
pd.merge(df1, df2, on='common_col')              # Merge based on a common column
pd.concat([df1, df2], axis=0)                   # Concatenate along rows

# Grouping and Aggregation
df.groupby('column_name').agg({'col1': 'mean', 'col2': 'sum'})

# Pivot Tables
df.pivot_table(values='value_col', index='index_col', columns='col_to_pivot')

# Reshaping Data
df.melt(id_vars=['col1'], value_vars=['col2'])  # Convert wide data to long format
df.pivot(index='index_col', columns='col_to_pivot', values='value_col')

# DateTime Operations
df['datetime_col'] = pd.to_datetime(df['datetime_col'])  # Convert to datetime
df.set_index('datetime_col', inplace=True)       # Set datetime column as index

# Save DataFrames
df.to_csv('file.csv', index=False)         # Save to CSV
df.to_excel('file.xlsx', index=False)      # Save to Excel