CNF Database
Useful Queries
psql -h myconfluencedb -U atlconfluence -W -d confluence
\pset format wrapped
Spaces
SELECT spaceid,
spacename
FROM spaces
ORDER BY spacename;
Pages
\prompt 'spaceid> ' space
SELECT c.contentid,
c.contenttype,
c.title
FROM content c
WHERE c.spaceid = :space
AND c.contenttype = 'PAGE'
AND c.content_status = 'current'
ORDER BY c.title;
SELECT COUNT(c.contentid) AS "number of pages",
s.spaceid,
s.spacename
FROM content c
JOIN spaces s ON c.spaceid = s.spaceid
WHERE c.spaceid IS NOT NULL
AND c.prevver IS NULL
AND c.contenttype = 'PAGE'
AND c.content_status = 'current'
GROUP BY s.spacename,
s.spaceid
ORDER BY "number of pages" DESC;
SELECT c.title,
u.username AS Creator,
c.creationdate,
c.lastmoddate,
um.username AS LastModifier
FROM content c
JOIN user_mapping u ON c.creator = u.user_key
JOIN user_mapping um ON c.lastmodifier = um.user_key
WHERE c.prevver IS NULL
AND c.contenttype = 'PAGE'
AND c.content_status = 'current'
ORDER BY c.title;
SELECT s.spacekey,
c.title,
u.username AS Creator,
c.creationdate,
um.username AS LastModifier,
c.lastmoddate
FROM content c
JOIN spaces s ON c.spaceid = s.spaceid
JOIN user_mapping u ON c.creator = u.user_key
JOIN user_mapping um ON c.lastmodifier = um.user_key
WHERE c.prevver IS NULL
AND c.contenttype = 'PAGE'
AND c.content_status = 'current'
AND s.spacekey IN ('MYSPACE1','MYSPACE2','etc')
ORDER BY s.spacekey,
c.title;
SELECT COUNT(c.version) AS "number of versions",
c.title,
s.spacename
FROM content c
LEFT JOIN spaces s ON c.spaceid= s.spaceid
WHERE c.contenttype = 'PAGE'
AND s.spacename = 'Database'
GROUP BY s.spacename,
c.title
ORDER BY c.title ASC;
SELECT c.contentid,
c.pageid,
c.parentid,
c.title,
s.spaceid,
s.spacename
FROM content c
LEFT JOIN spaces s ON c.spaceid= s.spaceid
WHERE c.contenttype = 'PAGE'
AND c.content_status = 'current'
ORDER BY s.spacename, c.title ASC;
Page Content
SELECT body
FROM bodycontent
WHERE contentid = 999999;
SELECT bc.body
FROM bodycontent bc
JOIN content c ON bc.contentid = c.contentid
WHERE c.title = 'page_title'
AND c.prevver IS NULL \g filename.htm
SELECT bc.body
FROM bodycontent bc
JOIN content c ON bc.contentid = c.contentid
WHERE c.contenttype = 'PAGE'
AND c.content_status = 'current'
AND c.pageid = 99999999;
The example below enables you to extract HTML files from the database. The directory structure works with the attachment extraction detailed later on this page.
vi /mnt/efs/script1.sql
SELECT c.contentid||' xx '||c.contentid
FROM content c
WHERE c.spaceid = :space
AND c.contenttype = 'PAGE'
AND c.content_status = 'current' \g /mnt/efs/temp.sh
vi /mnt/efs/script2.sql
SELECT c.contentid,
c.contenttype,
c.title
FROM content c
WHERE c.spaceid = :space
AND c.contenttype = 'PAGE'
AND c.content_status = 'current'
ORDER BY c.title \g readme.txt
vi /mnt/efs/script.sh
export RDSHOST=RDSEndpoint
read -p "Enter spaceid: " myvar
export SPACEID=${myvar}
cd /mnt/efs/content/${SPACEID}
pwd
psql -h $RDSHOST -U atlconfluence -d confluence -t -v space=${SPACEID} -f /mnt/efs/script1.sql
cat /mnt/efs/temp.sh | awk 'NF' | sed 's/^/SELECT body FROM bodycontent WHERE contentid =/' | sed 's/xx/\\g/' | sed 's/$/.htm/' >/mnt/efs/temp.sql
psql -h $RDSHOST -U atlconfluence -d confluence -t -f /mnt/efs/temp.sql
psql -h $RDSHOST -U atlconfluence -d confluence -t -v space=${SPACEID} -f /mnt/efs/script2/sql
ls /mnt/efs/content/${SPACEID}
sh /mnt/efs/script.sh
Script should create an html file for each page and a readme.txt mapping the html file to a page title.
Attachments
Size...
SELECT c.title AS Attachment_Name,
cp.longval AS Attachment_Size,
s.spacename,
c2.title AS Page_Title,
c.pageid AS Location
FROM content c
JOIN content c2 ON c.pageid= c2.contentid
JOIN contentproperties cp ON c.contentid = cp.contentid
JOIN spaces s on c2.spaceid = s.spaceid
WHERE c.contenttype = 'ATTACHMENT'
AND cp.propertyname = 'FILESIZE'
ORDER BY cp.longval DESC;
To extract all attachments to their original filenames...
First create the directory structure that the attachments will be saved to.
Run the SQL to generate the mkdir script.
Run sh /tmp/mktarget.sh to create the directories.
SELECT 'mkdir -p /mnt/efs/content/'||
c.spaceid||'/'||
c.pageid||'/'||
c.contentid AS MK
FROM content c
WHERE c.contenttype = 'ATTACHMENT'
AND c.content_status = 'current'\g /tmp/mktarget.sh
Run the SQL to generate the copy script.
Run sh /tmp/cpattach.sh to copy the attachments to their original names.
SELECT 'rsync -i /mnt/efs/aws*/confluence/shared-home/attachments/ver003/'||
MOD(CAST( RIGHT(CAST(c.spaceid AS VARCHAR),3) AS INTEGER),250)||'/'||
MOD(CAST(SUBSTR(RIGHT(CAST(c.spaceid AS VARCHAR),6),1,3) AS INTEGER),250)||'/'||
c.spaceid||'/'||
MOD(CAST( RIGHT(CAST(c.pageid AS VARCHAR),3) AS INTEGER),250)||'/'||
MOD(CAST(SUBSTR(RIGHT(CAST(c.pageid AS VARCHAR),6),1,3) AS INTEGER),250)||'/'||
c.pageid||'/'||
c.contentid||'/'||
c.version||' '||
'/mnt/efs/content/'||
c.spaceid||'/'||
c.pageid||'/'||
c.contentid||'/'||'"'||
c.title||'"' AS MV
FROM content c
WHERE c.contenttype = 'ATTACHMENT'
AND c.content_status = 'current'\g /tmp/cpattach.sh