Thanks for this post, I noticed that doing insert from SQLAlchemy is slower than when is done directly from Python. So I wrote following code, in one case I can getting connection string from SQLAlchemy and in second case getting it from Python. Exact code is 50% slower when connection is obtained from SQLAlchemy. Any thoughts?
The code is below, only difference between the two run is how I am setting the connection variable.
Thanks for this post, I noticed that doing insert from SQLAlchemy is slower than when is done directly from Python. So I wrote following code, in one case I can getting connection string from SQLAlchemy and in second case getting it from Python. Exact code is 50% slower when connection is obtained from SQLAlchemy. Any thoughts?
The code is below, only difference between the two run is how I am setting the connection variable.
import pandas as pd
from sqlalchemy import create_engine
import numpy as np
import time
import iris
# Create the SQLAlchemy engine
DATABASE_URL = "iris+intersystems://XXX:XXX@localhost:1972/USER"
engine = create_engine(DATABASE_URL, echo=True)
args = {'hostname':'localhost', 'port':1972, 'namespace':'USER', 'username':'XXX', 'password':'XXX'}
# connection = iris.connect(**args)
connection = engine.raw_connection()
# Generate data for each row (49 fields)
num_records = 100000
# Define SQL insert statement
sql_insert = "INSERT INTO SQLUser . test_table ( field_0 , field_1 , field_2 , field_3 , field_4 , field_5 , field_6 , field_7 , field_8 , field_9 , field_10 , field_11 , field_12 , field_13 , field_14 , field_15 , field_16 , field_17 , field_18 , field_19 , field_20 , field_21 , field_22 , field_23 , field_24 , field_25 , field_26 , field_27 , field_28 , field_29 , field_30 , field_31 , field_32 , field_33 , field_34 , field_35 , field_36 , field_37 , field_38 , field_39 , field_40 , field_41 , field_42 , field_43 , field_44 , field_45 , field_46 , field_47 , field_48 ) VALUES ( ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? ) "
record_values = []
# Execute SQL insert
try:
start_time = time.perf_counter() # Capture start time
batch = 0
cursor = connection.cursor()
for _ in range(num_records):
record_values = [np.random.rand() for _ in range(49)]
cursor.execute(sql_insert, record_values)
batch = batch + 1
if batch >= 10000:
connection.commit()
batch = 0
# print("Record inserted successfully!")
connection.commit()
end_time = time.perf_counter() # Capture end time
elapsed_time = end_time - start_time
print(f"Time taken: {elapsed_time} seconds to insert {num_records} at ", num_records/elapsed_time, " records per second.")
except Exception as e:
print("Error inserting record:", e)
finally:
cursor.close()
connection.close()
engine.dispose()