As you can see at the end of my benchmark post, the 3 acceptable ways (performance wise) to do a bulk insert in Psycopg2 are
This post provides an end-to-end working code for the execute_values() option.
Step 1: Specify the Connection Parameters
import pandas as pd import psycopg2.extras as extras # Here you want to change your database, username & password according to your own values param_dic = { "host" : "localhost", "database" : "globaldata", "user" : "myuser", "password" : "Passw0rd" }
Step 2: Helper Functions
def connect(params_dic): """ Connect to the PostgreSQL database server """ conn = None try: # connect to the PostgreSQL server print('Connecting to the PostgreSQL database...') conn = psycopg2.connect(**params_dic) except (Exception, psycopg2.DatabaseError) as error: print(error) sys.exit(1) print("Connection successful") return conn def execute_values(conn, df, table): """ Using psycopg2.extras.execute_values() to insert the dataframe """ # Create a list of tupples from the dataframe values tuples = [tuple(x) for x in df.to_numpy()] # Comma-separated dataframe columns cols = ','.join(list(df.columns)) # SQL quert to execute query = "INSERT INTO %s(%s) VALUES %%s" % (table, cols) cursor = conn.cursor() try: extras.execute_values(cursor, query, tuples) conn.commit() except (Exception, psycopg2.DatabaseError) as error: print("Error: %s" % error) conn.rollback() cursor.close() return 1 print("execute_values() done") cursor.close()
Step 3: Main Code
# Read the csv file # Read your dataframe df = read_dataframe(csv_file) # Connect to the database conn = connect(param_dic) # Run the execute_many strategy execute_values(conn, df, 'MonthlyTemp') # Close the connection conn.close()
For a fully functioning tutorial on how to replicate this, please refer to my Jupyter notebook and Python script on GitHub.