Improve Performance of InsertIfNotFound using Memory Database and Bulk Copy
Often we need to insert many rows to the database, while making sure that these rows don't already exist.
We use the InsertIfNotFound
relation for that.
As the number of rows grows, the cost of checking in the db if the row exists, and inserting the rows one by one can be significant.
Let's use the following code as an example, it makes sure that all the customers in a specific country have an order
public class VerifyOrdersOfCustomer : BusinessProcessBase
{
readonly Models.Customers Customers = new Models.Customers();
readonly Models.Orders Orders = new Models.Orders();
TextColumn _pCountry = new TextColumn();
public VerifyOrdersOfCustomer()
{
From = Customers;
Where.Add(Customers.Country.IsEqualTo(_pCountry));
Relations.Add(Orders,RelationType.InsertIfNotFound,
Orders.CustomerID.BindEqualTo(Customers.CustomerID));
}
protected override void OnLeaveRow()
{
if (!Relations[Orders].RowFound)
{
Orders.OrderID.Value = 20000 + Counter;
Orders.ShipCountry.Value = Customers.Country;
}
}
public void Run(TextParameter country)
{
BindParameter(_pCountry, country);
Execute();
}
}
We're going to change this code, so that instead of processing the rows one by one, we'll first load the relevant rows into memory, We'll process them in memory, and save them to the db when we are done.
Step 1, change our Orders entity and make it memory enabled
In the Orders.cs
file we'll change its constructor and add another one
-public Orders() : base("dbo.Orders", "Orders", Shared.DataSources.Northwind)
public Orders(bool inMemory) : base("dbo.Orders", "Orders", SqlHelper.GetMemoryOrReal(inMemory, Shared.DataSources.Northwind))
{...}
public Orders() : this(false) { }
That way, when we create an instance of this class, we can decide if we want an in memory version of it.
you can find the source of the
SqlHelper
class further done this article
Step 2, adjust our controller
Now we'll Change our controller to use the in memory version while it is running, and insert the rows once it's done
public class VerifyOrdersOfCustomer : BusinessProcessBase
{
readonly Models.Customers Customers = new Models.Customers();
readonly Models.Orders Orders = new Models.Orders();
readonly Models.Orders Orders = new Models.Orders(true);//the controller uses the in memory version of the code
TextColumn _pCountry = new TextColumn();
...
public void Run(TextParameter country)
{
BindParameter(_pCountry, country);
Execute();
var real = new Models.Orders();
SqlHelper.CopyData(Orders, real);
Orders.Truncate();//cleanup of the Orders in memory data
}
}
- We've changed the controller to use the in memory version of the table
- Once the controller is done, we use the CopyData method to copy the in memory data to the database
The CopyData Method
The CopyData
method is optimized for performance. In fact - for SQL server it actually uses the BulkCopy option of Sql Server, improving the performance of inserting rows by more than 90%.
Instead of sending many insert statements, it sends all the data in a bulk.
Step 3, handling existing rows
So far our example was good for only new rows, now we'll adjust it for existing rows as well. Basically we'll start by loading the data of our range from the db, then before writing it - we'll delete its existing version in the database and insert the version that exists in memory.
public void Run(TextParameter country)
{
BindParameter(_pCountry, country);
var real = new Models.Orders();
var where = real.ShipCountry.IsEqualTo(country.Value);
//copy the data from the database to the memory version
SqlHelper.CopyData(real, Orders, where);
//delete the data in the database before we insert the values from the memory
real.Delete(where);
Execute();
//Insert the rows from the memory to the database
SqlHelper.CopyData(Orders, real);
Orders.Truncate();//cleanup of the Orders in memory data
}
SqlHelper Source
using ENV.Data;
using ENV.Data.DataProvider;
using Firefly.Box;
using Firefly.Box.Data.Advanced;
using Firefly.Box.Data.DataProvider;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Northwind
{
public class SqlHelper
{
public static IEntityDataProvider GetMemoryOrReal(bool useMemory, IEntityDataProvider originalConnection)
{
if (useMemory)
return ENV.Data.DataProvider.MemoryDatabase.Instance;
else
return originalConnection;
}
public static void CopyData(Entity sourceTable, Entity targetTable, FilterBase sourceTableWhere = null)
{
using (ENV.Utilities.Profiler.StartContext("Copy Data To " + targetTable.GetType().Name))
{
#region Try use Sql Bulk Insert
if (sourceTableWhere == null && sourceTable.DataProvider is MemoryDatabase && targetTable.DataProvider is DynamicSQLSupportingDataProvider)
{
using (var c = ((DynamicSQLSupportingDataProvider)targetTable.DataProvider).CreateCommand())
{
var sqlConn = c.Connection as System.Data.SqlClient.SqlConnection;
if (sqlConn != null)
{
using (ENV.Utilities.Profiler.StartContext("Bulk Insert Into " + targetTable.GetType().Name))
{
var bc = new System.Data.SqlClient.SqlBulkCopy(sqlConn,
System.Data.SqlClient.SqlBulkCopyOptions.
KeepIdentity, (System.Data.SqlClient.SqlTransaction)c.Transaction);
bc.BulkCopyTimeout = 0;
bc.DestinationTableName = targetTable.EntityName;
foreach (var item in targetTable.Columns)
{
if (!item.DbReadOnly)
bc.ColumnMappings.Add(item.Name, item.Name);
}
bc.WriteToServer(((MemoryDatabase)sourceTable.DataProvider).DataSet.Tables[sourceTable.EntityName]);
return;
}
}
}
}
#endregion
var bp = new BusinessProcess { From = sourceTable };
if (sourceTableWhere != null)
bp.Where.Add(sourceTableWhere);
bp.Relations.Add(targetTable, RelationType.Insert);
bp.ForEachRow(() =>
{
foreach (var column in targetTable.Columns)
{
if (!column.DbReadOnly)
{
column.Value = sourceTable.Columns[column.Name];
}
}
});
}
}
}
}
Help us improve, Edit this page on GitHub
or email us at info@fireflymigration.com