Welcome to the last “bulk insert” post of my Pandas2PostgreSQL series. As you can see at the end of my benchmark post, the 3 acceptable ways (performance wise) to do a bulk insert in Psycopg2 are
This post provides an end-to-end working code for the execute_mogrify() option. Here you are combining 2 steps
- cursor.mogrify(): bind the query arguments and return the query string
- cursor.execute(): where you actually execute the query
Full Code
import psycopg2
import os
import pandas as pd
# Connection parameters
param_dic = {
"host" : "localhost",
"database" : "globaldata",
"user" : "myuser",
"password" : "Passw0rd"
}
def connect(params_dic):
""" Connect to the PostgreSQL database server """
conn = None
try:
# connect to the PostgreSQL server
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**params_dic)
except (Exception, psycopg2.DatabaseError) as error:
print(error)
sys.exit(1)
print("Connection successful")
return conn
def execute_mogrify(conn, df, table):
"""
Using cursor.mogrify() to build the bulk insert query
then cursor.execute() to execute the query
"""
# Create a list of tupples from the dataframe values
tuples = [tuple(x) for x in df.to_numpy()]
# Comma-separated dataframe columns
cols = ','.join(list(df.columns))
# SQL quert to execute
cursor = conn.cursor()
values = [cursor.mogrify("(%s,%s,%s)", tup).decode('utf8') for tup in tuples]
query = "INSERT INTO %s(%s) VALUES " % (table, cols) + ",".join(values)
try:
cursor.execute(query, tuples)
conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
print("Error: %s" % error)
conn.rollback()
cursor.close()
return 1
print("execute_mogrify() done")
cursor.close()
#-----------------------------------------------
# Main code
#-----------------------------------------------
# Reading the csv file, change to meet your own requirements
csv_file = "../data/global-temp-monthly.csv"
df = pd.read_csv(csv_file)
df = df.rename(columns={
"Source": "source",
"Date": "datetime",
"Mean": "mean_temp"
})
conn = connect(param_dic) # connect to the database
execute_mogrify(conn, df, 'MonthlyTemp') # Run the execute_many strategy
conn.close() # close the connection
For a fully functioning tutorial on how to replicate this, please refer to my Jupyter notebook on GitHub.