This page describes various methods for extracting data from a database into a plain text format (e.g. CSV, JSON, XML etc) ingesting data into a database from a plain text format and manipulating data in a plain text file from within the database. The Database technologies where each method applies are shown by the buttons in the right hand column.
Tab separated, newline terminated...
SELECT mycol1,mycol2,mycol3
FROM mytable
INTO OUTFILE '/tmp/myfile.txt';
Comma separated, newline terminated, each field enclosed in double quotes...
SELECT mycol1,mycol2,mycol3
FROM mytable
INTO OUTFILE '/tmp/myfile.csv'
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
SET MARKUP csv ON DELIMITER ,
SPOOL myfile.csv
SELECT mycol1,mycol2,mycol3
FROM mytable;
SPOOL OFF
SET COLSEP ,
SET PAGESIZE 0
SET TRIMSPOOL ON
SET HEADSEP OFF
SET LINESIZE 80
SET NUMW 10
SPOOL myfile.csv
SELECT mycol1,mycol2,mycol3
FROM mytable;
SPOOL OFF
TODO
In psql...
SELECT mycol1,mycol2,mycol3
FROM mytable \g /tmp/myfile.txt
To append to an existing file...
SELECT mycol1,mycol2,mycol3
FROM mytable \g | cat >>/tmp/myfile.txt
Alternate method in psql...
\o /tmp/myfile.txt
SELECT mycol1,mycol2,mycol3
FROM mytable;
\o
From shell...
echo 'SELECT mycol1,mycol2,mycol3 FROM mytable' | psql myDB >> /tmp/myfile.txt
TODO
BULK INSERT dbo.MyTable
FROM 'C:\data\myfile.csv'
WITH (
FIRSTROW = 2, -- skip header
FIELDTERMINATOR = ',',
ROWTERMINATOR = '\n',
TABLOCK,
CODEPAGE = '65001' -- UTF-8
);
The CSV can be on a local disk or network shareCREATE PROC dbo.LoadCsvFile
@FilePath NVARCHAR(4000)
AS
BEGIN
SET NOCOUNT ON;
BEGIN TRY
DECLARE @sql NVARCHAR(MAX);
SET @sql = N'
BULK INSERT dbo.MyTable
FROM ''' + REPLACE(@FilePath,'''','''''') + '''
WITH (
FIRSTROW = 2,
FIELDTERMINATOR = '','',
ROWTERMINATOR = ''\n'',
CODEPAGE = ''65001''
);';
EXEC sys.sp_executesql @sql;
END TRY
BEGIN CATCH
THROW;
END CATCH
END;
sp_configure 'Ad Hoc Distributed Queries', 1;
RECONFIGURE;
INSERT INTO dbo.MyTable
SELECT *
FROM OPENROWSET(
BULK 'C:\data\myfile.csv',
FORMAT = 'CSV',
FIRSTROW = 2
) AS data;
TODO
CREATE EXTERNAL TABLE dbo.MyCsv
(
col1 INT,
col2 VARCHAR(50)
)
WITH (
LOCATION = '/myfile.csv',
DATA_SOURCE = MyDataSource,
FILE_FORMAT = MyCsvFormat
);
INSERT INTO dbo.MyTable
SELECT * FROM dbo.MyCsv;
CREATE TABLE dbo.ImportTable
(
Col1 INT,
Col2 VARCHAR(50),
Col3 DATETIME
);
CREATE TABLE dbo.CsvImportLog
(
LogID INT IDENTITY PRIMARY KEY,
FileName NVARCHAR(255),
LoadStart DATETIME2,
LoadEnd DATETIME2,
RowsInserted INT,
Status VARCHAR(10),
ErrorMessage NVARCHAR(MAX)
);
CREATE PROC dbo.LoadCsvFile
@FilePath NVARCHAR(4000),
@OriginalFileName NVARCHAR(255)
AS
BEGIN
SET NOCOUNT ON;
DECLARE
@startTime DATETIME2 = SYSDATETIME(),
@rows INT = 0,
@sql NVARCHAR(MAX);
BEGIN TRY
SET @sql = N'
BULK INSERT dbo.ImportTable
FROM ''' + REPLACE(@FilePath,'''','''''') + '''
WITH (
FIRSTROW = 2,
FIELDTERMINATOR = '','',
ROWTERMINATOR = ''\r\n'',
TABLOCK,
ERRORFILE = ''C:\BulkErrors\' + @OriginalFileName + ''',
MAXERRORS = 100
);';
EXEC sys.sp_executesql @sql;
SET @rows = @@ROWCOUNT;
INSERT dbo.CsvImportLog
(
FileName, LoadStart, LoadEnd,
RowsInserted, Status
)
VALUES
(
@OriginalFileName,
@startTime,
SYSDATETIME(),
@rows,
'SUCCESS'
);
END TRY
BEGIN CATCH
INSERT dbo.CsvImportLog
(
FileName, LoadStart, LoadEnd,
RowsInserted, Status, ErrorMessage
)
VALUES
(
@OriginalFileName,
@startTime,
SYSDATETIME(),
0,
'FAILED',
ERROR_MESSAGE()
);
THROW;
END CATCH
END;
$SqlServer = "SQL01"
$Database = "MyDB"
$DropFolder = "\\myfile\drop"
$Archive = "\\myfile\archive"
$ErrorFolder = "\\myfile\error"
$files = Get-ChildItem $DropFolder -Filter "*.csv"
foreach ($file in $files) {
try {
Write-Host "Processing $($file.Name)"
# Prevent double-load
$processing = "$($file.FullName).processing"
Rename-Item $file.FullName $processing
$escapedPath = $processing.Replace("'", "''")
$query = @"
EXEC dbo.LoadCsvFile
@FilePath = N'$escapedPath',
@OriginalFileName = N'$($file.Name)'
"@
Invoke-Sqlcmd `
-ServerInstance $SqlServer `
-Database $Database `
-Query $query `
-QueryTimeout 0
Move-Item $processing (Join-Path $Archive $file.Name)
Write-Host "SUCCESS: $($file.Name)"
}
catch {
Write-Error "FAILED: $($file.Name) - $_"
if (Test-Path $processing) {
Move-Item $processing (Join-Path $ErrorFolder $file.Name)
}
}
}
SQL Server Agent service account must have:
Read/write on
\\myfile\drop
\\myfile\archive
\\myfile\error
Write on
C:\BulkErrors
Use the Stored Procedure from above (dbo.LoadCsvFile)
Create Job Shell...
USE msdb;
GO
EXEC sp_add_job
@job_name = N'CSV Import – ImportTable',
@enabled = 1,
@description = N'Automatically imports CSV files into dbo.ImportTable';
GO
Add PowerShell Job Step...
EXEC sp_add_jobstep
@job_name = N'CSV Import – ImportTable',
@step_name = N'Process CSV Files',
@subsystem = N'PowerShell',
@command = N'
$SqlServer = "SQL01"$Database = "MyDB"',
@retry_attempts = 0,
@on_success_action = 1, -- Quit with success
@on_fail_action = 2; -- Quit with failure
GO
Add cleanup step...
EXEC sp_add_jobstep
@job_name = N'CSV Import – ImportTable',
@step_name = N'Cleanup Old Error Files',
@subsystem = N'PowerShell',
@command = N'
Get-ChildItem "C:\BulkErrors" -File |
Where-Object { $_.LastWriteTime -lt (Get-Date).AddDays(-30) } |
Remove-Item -Force
',
@on_success_action = 1;
GO
Create schedule...
EXEC sp_add_schedule
@schedule_name = N'CSV Import – Every 5 Minutes',
@freq_type = 4, -- Daily
@freq_interval = 1,
@freq_subday_type = 4, -- Minutes
@freq_subday_interval = 5,
@active_start_time = 000000;
GO
EXEC sp_attach_schedule
@job_name = N'CSV Import – ImportTable',
@schedule_name = N'CSV Import – Every 5 Minutes';
GO
Attach Job to Server...
EXEC sp_add_jobserver
@job_name = N'CSV Import – ImportTable';
GO
TODO
TODO
TODO
TODO
TODO