As you can see at the end of my benchmark post, the 3 acceptable ways (performance wise) to do a bulk insert in Psycopg2 are
This post provides an end-to-end working code for the execute_values() option.
Step 1: Specify the Connection Parameters
import pandas as pd
import psycopg2.extras as extras
# Here you want to change your database, username & password according to your own values
param_dic = {
"host" : "localhost",
"database" : "globaldata",
"user" : "myuser",
"password" : "Passw0rd"
}
Step 2: Helper Functions
def connect(params_dic):
""" Connect to the PostgreSQL database server """
conn = None
try:
# connect to the PostgreSQL server
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**params_dic)
except (Exception, psycopg2.DatabaseError) as error:
print(error)
sys.exit(1)
print("Connection successful")
return conn
def execute_values(conn, df, table):
"""
Using psycopg2.extras.execute_values() to insert the dataframe
"""
# Create a list of tupples from the dataframe values
tuples = [tuple(x) for x in df.to_numpy()]
# Comma-separated dataframe columns
cols = ','.join(list(df.columns))
# SQL quert to execute
query = "INSERT INTO %s(%s) VALUES %%s" % (table, cols)
cursor = conn.cursor()
try:
extras.execute_values(cursor, query, tuples)
conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
print("Error: %s" % error)
conn.rollback()
cursor.close()
return 1
print("execute_values() done")
cursor.close()
Step 3: Main Code
# Read the csv file
# Read your dataframe
df = read_dataframe(csv_file)
# Connect to the database
conn = connect(param_dic)
# Run the execute_many strategy
execute_values(conn, df, 'MonthlyTemp')
# Close the connection
conn.close()
For a fully functioning tutorial on how to replicate this, please refer to my Jupyter notebook and Python script on GitHub.