Welcome to the last “bulk insert” post of my Pandas2PostgreSQL series. As you can see at the end of my benchmark post, the 3 acceptable ways (performance wise) to do a bulk insert in Psycopg2 are
This post provides an end-to-end working code for the execute_mogrify() option. Here you are combining 2 steps
- cursor.mogrify(): bind the query arguments and return the query string
- cursor.execute(): where you actually execute the query
Full Code
import psycopg2 import os import pandas as pd # Connection parameters param_dic = { "host" : "localhost", "database" : "globaldata", "user" : "myuser", "password" : "Passw0rd" } def connect(params_dic): """ Connect to the PostgreSQL database server """ conn = None try: # connect to the PostgreSQL server print('Connecting to the PostgreSQL database...') conn = psycopg2.connect(**params_dic) except (Exception, psycopg2.DatabaseError) as error: print(error) sys.exit(1) print("Connection successful") return conn def execute_mogrify(conn, df, table): """ Using cursor.mogrify() to build the bulk insert query then cursor.execute() to execute the query """ # Create a list of tupples from the dataframe values tuples = [tuple(x) for x in df.to_numpy()] # Comma-separated dataframe columns cols = ','.join(list(df.columns)) # SQL quert to execute cursor = conn.cursor() values = [cursor.mogrify("(%s,%s,%s)", tup).decode('utf8') for tup in tuples] query = "INSERT INTO %s(%s) VALUES " % (table, cols) + ",".join(values) try: cursor.execute(query, tuples) conn.commit() except (Exception, psycopg2.DatabaseError) as error: print("Error: %s" % error) conn.rollback() cursor.close() return 1 print("execute_mogrify() done") cursor.close() #----------------------------------------------- # Main code #----------------------------------------------- # Reading the csv file, change to meet your own requirements csv_file = "../data/global-temp-monthly.csv" df = pd.read_csv(csv_file) df = df.rename(columns={ "Source": "source", "Date": "datetime", "Mean": "mean_temp" }) conn = connect(param_dic) # connect to the database execute_mogrify(conn, df, 'MonthlyTemp') # Run the execute_many strategy conn.close() # close the connection
For a fully functioning tutorial on how to replicate this, please refer to my Jupyter notebook on GitHub.