Check for Possible Duplicate Media FH7.fh_lua--[[
@Title: Check for Possible Duplicate Media (FH7)
@Type: Standard
@Author: Mark Draper
@Version: 1.1
@LastUpdated: 15 May 2025
@Licence: This plugin is copyright (c) 2025 Mark Draper and is licensed under the MIT License which
is hereby incorporated by reference (see https://pluginstore.family-historian.co.uk/fh-plugin-licence)
@Description: This plugin provides a very fast and up to date alternative to the well-established Check For
Possible Duplicate Media plugin by extending the application to Media Records with non-Latin (i.e. Unicode)
filenames and paths and to files of essentially unlimited size. It also supports the FH7/GEDCOM 5.5.1 feature
of multiple files in the same Media Record, the option to automatically merge identical records, more detailed
reporting and a new online help page.
]]
--[[
Version 1.0 (Jan 2024)
Initial Plugin Store version
Version 1.1 (May 2025)
Enhanced tooltips added
Support for multiple monitors added
]]
fhInitialise(7, 0, 15, 'save_recommended')
require('iuplua')
require('iupluaimglib')
MD5 = require 'md5'
fhu = require('fhUtils')
fhfu = require('fhFileUtils')
fhu.setIupDefaults()
FSO = luacom.CreateObject('Scripting.FileSystemObject')
luaShell = luacom.CreateObject('WScript.Shell')
function main()
-- carry out pre-flight checks
local Max = Initialise()
if not Max then return end
-- check whether Windows method is available using current project file (small and always present)
local WindowsEnabled = GetWindowsHash(fhGetContextInfo('CI_PROJECT_FILE'), true) and true
-- create progress bar
ProgressBarStart(Max)
iup.SetAttribute(gblProgBar.whirlywheel, 'START', 'YES')
local tblT = {}
local tblN = {Records = 0, Files = 0, Unicode = 0, Large = 0, Missing = 0, Multiple = 0,
Differs = 0, Unlinked = 0, Duplicates = 0, Suffix = 0, Start = os.time()}
local tblMultiple = {} -- records with >1 file attached
-- get table of media file sizes
local pM = fhNewItemPtr()
pM:MoveToFirstRecord('OBJE')
while pM:IsNotNull() do
tblN.Records = tblN.Records + 1
if fhCallBuiltInFunction('LinksTo', pM) == 0 then tblN.Unlinked = tblN.Unlinked + 1 end
local i = 0
local pF = fhNewItemPtr()
pF:MoveTo(pM, '~.FILE')
while pF:IsNotNull() do
local FileName = fhGetValueAsText(pF)
if FileName:match('^Media%\\') then
FileName = fhGetContextInfo('CI_PROJECT_DATA_FOLDER') .. '\\' .. FileName
end
if fhfu.fileExists(FileName) then
tblN.Files = tblN.Files + 1
local tblT1 = {}
if IsUnicode(FileName) then
tblN.Unicode = tblN.Unicode + 1
end
local size = FSO:GetFile(FileName).Size
if size > 2^25 then
tblN.Large = tblN.Large + 1
end
tblT1.Record = pM:Clone()
tblT1.File = FileName
i = i + 1
tblT1.N = i
if i == 2 then -- first additional file
tblN.Multiple = tblN.Multiple + 1
table.insert(tblMultiple, pM:Clone())
end
if not tblT[size] then tblT[size] = {} end
table.insert(tblT[size], tblT1)
else
tblN.Missing = tblN.Missing + 1
end
pF:MoveNext('SAME_TAG')
end
if tblN.Records % 10 == 0 then gblProgBar.bar.Value = tblN.Records end
gblProgBar.Dialog.Title = 'Checking Media Records (' .. tblN.Records .. ' of ' .. Max .. ')...'
if gblProgBar.Cancel then return end
iup.LoopStep()
pM:MoveNext()
end
gblProgBar.Dialog.Title = 'Preparing hashes...'
-- delete all examples of unique file size (cannot be duplicated)
for size, Files in pairs(tblT) do
if #Files == 1 then
tblT[size] = nil
end
end
-- get table of file hashes to be determined
local tblFh = {}
for _, Files in pairs(tblT) do
for _, F in ipairs(Files) do
tblFh[F.File] = true
end
end
-- how many hashes to calculate?
tblN.HashCount = 0
for File, _ in pairs(tblFh) do
tblN.HashCount = tblN.HashCount + 1
end
-- get hashes
tblN.Hashed = 0
gblProgBar.bar.Max = tblN.HashCount
for File, _ in pairs(tblFh) do
tblN.Hashed = tblN.Hashed + 1
gblProgBar.Dialog.Title = 'Calculating file hashes (' .. tblN.Hashed .. ' of ' ..
tblN.HashCount .. ')...'
gblProgBar.bar.Value = tblN.Hashed
tblFh[File] = GetHash(File, WindowsEnabled)
end
gblProgBar.Dialog.Title = 'Collating final report...'
-- add hashes and core name to main table (and record ID for debug and audit)
for _, v in pairs(tblT) do
for _, F in ipairs(v) do
F.Hash = tblFh[F.File]
F.CoreName = GetCoreName(F.File)
F.ID = fhGetRecordId(F.Record)
F.Fingerprint = GetFingerprint(F.Record)
end
end
-- structure by hash and core name
local tblD = {}
for _, v in pairs(tblT) do
for _, F in ipairs(v) do
if not tblD[F.Hash] then tblD[F.Hash] = {} end
if not tblD[F.Hash][F.CoreName] then tblD[F.Hash][F.CoreName] = {} end
local tblM = {}
tblM.Record = F.Record
tblM.ID = F.ID
tblM.File = F.File
tblM.N = F.N
tblM.Fingerprint = F.Fingerprint
table.insert(tblD[F.Hash][F.CoreName], tblM)
end
end
-- ensure first (master) record has the core name with no suffix
for Hash, tblCoreName in pairs(tblD) do
for CoreName, tblF in pairs(tblCoreName) do
if tblF[1].File ~= CoreName then
for i, F in ipairs(tblF) do
if i ~= 1 and F.File == CoreName and F.Fingerprint == tblF[1].Fingerprint then
tblF[1], tblF[i] = tblF[i], tblF[1] -- swap table records so i is now 1
break
end
end
end
end
end
-- delete any non-duplicated hashes
for Hash, tblCoreName in pairs(tblD) do
local i = 0
for CoreName, tblF in pairs(tblCoreName) do
i = i + #tblF
end
if i < 2 then tblD[Hash] = nil end
end
-- tabulate duplicates with same core name
local tblDuplicates = {}
local tblLogged = {} -- prevents duplicate listings if multiple files
for Hash, tblCoreName in pairs(tblD) do
for CoreName, tblF in pairs(tblCoreName) do
if #tblF > 1 then
for i, F in ipairs(tblF) do
if i ~= 1 and not tblLogged[tblF[1].ID .. '-' .. tblF[i].ID] then
local tblA = {}
tblA.Record1 = tblF[1].Record
tblA.File1 = tblF[1].File
tblA.N1 = tblF[1].N
tblA.ID1 = tblF[1].ID
tblA.Record2 = tblF[i].Record
tblA.File2 = tblF[i].File
tblA.N2 = tblF[i].N
tblA.ID2 = tblF[i].ID
if tblF[1].Fingerprint ~= tblF[i].Fingerprint then
tblA.Match = 'Records Differ'
tblN.Differs = tblN.Differs + 1
elseif tblF[1].ID == tblF[i].ID then
tblA.Match = 'Duplicate File'
elseif tblF[1].File == tblF[i].File then
tblA.Match = 'Duplicate Record'
tblN.Duplicates = tblN.Duplicates + 1
else
tblA.Match = 'File Suffix'
tblN.Suffix = tblN.Suffix + 1
end
table.insert(tblDuplicates, tblA)
tblLogged[tblF[1].ID .. '-' .. tblF[i].ID] = true
end
end
end
end
end
-- tabulate comparison of different core names within same hash
for Hash, tblCoreName in pairs(tblD) do
local tblFP = {}
for CoreName, tblF in pairs(tblCoreName) do
table.insert(tblFP, tblF[1])
end
if #tblFP > 1 then
for i, F in ipairs(tblFP) do
if i ~= 1 and not tblLogged[tblFP[1].ID .. '-' .. tblFP[i].ID] then
local tblA = {}
tblA.Record1 = tblFP[1].Record
tblA.File1 = tblFP[1].File
tblA.N1 = tblFP[1].N
tblA.ID1 = tblFP[1].ID
tblA.Record2 = tblFP[i].Record
tblA.File2 = tblFP[i].File
tblA.N2 = tblFP[i].N
tblA.ID2 = tblFP[i].ID
tblA.Match = 'Records Differ'
table.insert(tblDuplicates, tblA)
tblN.Differs = tblN.Differs + 1
end
end
end
end
gblProgBar.Dialog:destroy()
-- return if no duplicates
if tblN.Differs + tblN.Duplicates + tblN.Multiple + tblN.Suffix == 0 then
MessageBox('No duplicate Media Records detected.', 'OK', 'INFORMATION')
return
end
-- report results and present options menu
local action = Menu(tblN, WindowsEnabled)
if not action then
return
elseif action == 'FULL' then
ReportAnalysis(tblDuplicates, tblN, true)
elseif action == 'BASIC' then
ReportAnalysis(tblDuplicates, tblN, false)
elseif action == 'MULTIPLE' then
ReportDuplicates(tblMultiple)
elseif action == 'DUPLICATE' then
GetDuplicateLinks(tblDuplicates, tblN.Suffix > 0)
end
end
-- ******************************************************************************************************--
function Initialise()
-- check running in a project
if fhGetContextInfo('CI_APP_MODE') ~= 'Project Mode' then
MessageBox('This plugin can only be run from within a Family Historian Project.', 'OK', 'ERROR')
return
end
-- ensure plugin folder available if required
local PluginFolder = fhGetContextInfo('CI_APP_DATA_FOLDER') .. '\\Plugin Data'
if not fhfu.folderExists(PluginFolder) then fhfu.createFolder(PluginFolder) end
local ThisPluginFolder = PluginFolder .. '\\Check for Duplicated Media (FH7)'
if not fhfu.folderExists(ThisPluginFolder) then fhfu.createFolder(ThisPluginFolder) end
-- count media records
local pM = fhNewItemPtr()
local Max = 0
pM:MoveToFirstRecord('OBJE')
while pM:IsNotNull() do
Max = Max + 1
pM:MoveNext()
end
-- exit if no media
if Max == 0 then
MessageBox('No Media Records.', 'OK', 'INFORMATION')
return
end
return Max
end
-- ******************************************************************************************************--
function GetHash(FileName, WindowsEnabled)
-- get MD5 hash for specified file
local hash
if not IsUnicode(FileName) and FileName:len() < 250 then
hash = GetLuaHash(FileName)
else
if WindowsEnabled then -- use command script
hash = GetWindowsHash(FileName)
else -- create temporary copy
local MD5File = 'CI_APP_DATA_FOLDER' ..
'\\Plugin Data\\Check for Duplicated Media (FH7)\\~TempFile.tmp'
local cmd = 'copy "' .. FileName .. '" "' .. MD5File .. '"'
if luaShell:Run('cmd.exe /c ' .. cmd, 0, true) ~= 0 then return end
hash = GetLuaHash(MD5File)
fhfu.deleteFile(MD5File)
end
end
return hash
end
-- ******************************************************************************************************--
function GetLuaHash(FileName)
-- determine MD5 using Lua library (pseudo-MD5 for large files)
if FSO:GetFile(FileName).Size < 2^25 then -- one pass only
local F = io.open(fhConvertUTF8toANSI(FileName), 'rb')
if not F then return end
local S = F:read('*all')
F:close()
return MD5.sumhexa(S)
else -- read data in chunks
local tblMD5 = {}
local F = io.open(fhConvertUTF8toANSI(FileName), 'rb')
if not F then return end
repeat
local S = F:read(2^25)
if not S then break end
table.insert(tblMD5, MD5.sumhexa(S))
until false
F:close()
return MD5.sumhexa(table.concat(tblMD5))
end
end
-- ******************************************************************************************************--
function GetWindowsHash(FileName)
-- determine MD5 using a Windows Command Script
local Folder = fhGetContextInfo('CI_APP_DATA_FOLDER') .. '\\Plugin Data\\Check for Duplicated Media (FH7)'
local CmdFile = Folder .. '\\GetHash.cmd'
local OutputFile = Folder .. '\\GetHash.txt'
local Cmd = 'chcp 65001\nCertutil -hashfile "' .. FileName .. '" MD5 > "' .. OutputFile .. '"'
-- save command file in UTF-8 without BOM (as needed by Windows command shell)
local F = io.open(CmdFile, 'wb')
if not F then return end
F:write(Cmd)
F:close()
-- check output file has been created ok, and read hash
if fhfu.fileExists(OutputFile) then fhfu.deleteFile(OutputFile) end
if luaShell:Run('cmd.exe /C "' .. CmdFile .. '"', 0, true) ~= 0 then return end
if fhfu.fileExists(OutputFile) then
return fhLoadTextFile(OutputFile):match('%c(%x+)%c')
end
end
-- ******************************************************************************************************--
function Menu(tblN, WindowsEnabled)
local action -- form return value
-- Media Analysis frame
local lblRecords = iup.label{title = 'Total Records:', expand = 'HORIZONTAL'}
local lblFiles = iup.label{title = 'Total Files:', expand = 'HORIZONTAL'}
local lblMultiple = iup.label{title = 'Records With Multiple Files:', expand = 'HORIZONTAL'}
local lblMissing = iup.label{title = 'Missing Files:', expand = 'HORIZONTAL'}
local lblUnlinked = iup.label{title = 'Unlinked Records:', expand = 'HORIZONTAL'}
local lblLarge = iup.label{title = 'Large Files (>32 MB):', expand = 'HORIZONTAL'}
local lblUnicode = iup.label{title = 'Unicode File Names:', expand = 'HORIZONTAL'}
local lblRecordsN = iup.label{title = tblN.Records, expand = 'HORIZONTAL'}
local lblFilesN = iup.label{title = tblN.Files, expand = 'HORIZONTAL'}
local lblMultipleN = iup.label{title = tblN.Multiple, expand = 'HORIZONTAL'}
local lblMissingN = iup.label{title = tblN.Missing, expand = 'HORIZONTAL'}
local lblUnlinkedN = iup.label{title = tblN.Unlinked, expand = 'HORIZONTAL'}
local lblLargeN = iup.label{title = tblN.Large, expand = 'HORIZONTAL'}
local lblUnicodeN = iup.label{title = tblN.Unicode, expand = 'HORIZONTAL'}
local gridAnalysis = iup.gridbox{
lblRecords, lblFiles, lblMissing, lblUnlinked, lblMultiple, lblLarge, lblUnicode,
lblRecordsN, lblFilesN, lblMissingN, lblUnlinkedN, lblMultipleN, lblLargeN, lblUnicodeN;
orientation = 'VERTICAL', numdiv = 7, sizelin = -1,
gapcol = 10, gaplin = 2, margin = '10x10'
}
local fraAnalysis = iup.frame{gridAnalysis, title = 'Media Analysis'}
-- Duplicates frame
local lblPotential = iup.label{title = 'Potential Duplicate Pairs:', expand = 'HORIZONTAL'}
local lblActual = iup.label{title = 'Exact Duplicate Pairs:', expand = 'HORIZONTAL'}
local lblSuffix = iup.label{title = 'File Suffix Difference:', expand = 'HORIZONTAL'}
local lblPotentialN = iup.label{title = tblN.Differs, expand = 'HORIZONTAL'}
local lblActualN = iup.label{title = tblN.Duplicates, expand = 'HORIZONTAL'}
local lblSuffixN = iup.label{title = tblN.Suffix, expand = 'HORIZONTAL'}
local gridDuplicates = iup.gridbox{
lblPotential, lblActual, lblSuffix,
lblPotentialN, lblActualN, lblSuffixN,
orientation = 'VERTICAL', numdiv = 3, sizelin = -1,
gapcol = 10, gaplin = 2, margin = '10x10'
}
local fraDuplicates = iup.frame{gridDuplicates, title = 'Duplicates'}
-- Technical frame
local lblMode = iup.label{title = 'Operating Mode:', expand = 'HORIZONTAL'}
local lblTime = iup.label{title = 'Run Time:', expand = 'HORIZONTAL'}
local lblModeN = iup.label{title = 'Windows', expand = 'HORIZONTAL'}
local lblTimeN = iup.label{title = os.time() - tblN.Start .. ' s', expand = 'HORIZONTAL'}
local gridTechnical = iup.gridbox{
lblMode, lblTime,
lblModeN, lblTimeN,
orientation = 'VERTICAL', numdiv = 2, sizelin = -1,
gapcol = 10, gaplin = 2, margin = '10x10'
}
local fraTechnical = iup.frame{gridTechnical, title = 'Plugin Operation'}
-- Form buttons
local btnReport = iup.button{title = 'Display Plugin\nAnalysis', expand = 'YES', active = 'NO',
action = function(self) action = 'FULL' return iup.CLOSE end,
tip = 'Tabulate detailed results\nand close plugin.'
}
local btnBasicReport = iup.button{title = 'Display Basic\nPlugin Analysis', expand = 'YES', active = 'NO',
action = function(self) action = 'BASIC' return iup.CLOSE end,
tip = 'Tabulate summary results and close plugin\n(suitable for smaller monitors).'
}
local btnMultiple = iup.button{title = 'List Records With\nMultiple Files', expand = 'YES',
active = 'NO', padding = '10x3',
action = function(self) action = 'MULTIPLE' return iup.CLOSE end,
tip = 'Tabulate Media Records that have more than\none file attached and close plugin.'
}
local btnMerge = iup.button{title = 'Merge Duplicate\nRecords', expand = 'YES', active = 'NO',
action = function(self) action = 'DUPLICATE' return iup.CLOSE end,
tip = 'Merge Media Record pairs that are exact duplicates of each other\n' ..
'or differ just in their Media filename suffix.'
}
local btnHelp = iup.button{title = 'Plugin Help', expand = 'YES', tip = 'Display plugin help page',
action = function(self) fhShellExecute('https://pluginstore.family-historian.co.uk/page/help/check-for-possible-duplicate-media-fh7') end,
}
local btnClose = iup.button{title = 'Close Plugin', expand = 'YES',
action = function(self) return iup.CLOSE end,
tip = 'Close plugin with no further action.'
}
local gridButtons = iup.gridbox{
btnReport, btnBasicReport, btnMultiple, btnMerge, btnHelp, btnClose;
orientation = 'VERTICAL', numdiv = 2, sizelin = -1, sizecol = -1,
gapcol = 10, gaplin = 10, margin = '10x10', expand = 'NO',
homogeneouscol = 'YES',
homogeneouslin = 'YES',
}
-- define enhanced tool tips
local enhanced = true -- comment out this line if enhanced tool tips are not required
if enhanced then
local tblB = {btnReport, btnBasicReport, btnMultiple, btnMerge, btnHelp, btnClose}
for _, control in ipairs(tblB) do
control.TipBalloon = 'YES'
control.TipBalloonTitleIcon = 1 -- modify individually if different
end
btnReport.TipBalloonTitle = 'Display Full Analysis'
btnBasicReport.TipBalloonTitle = 'Display Basic Analysis'
btnMultiple.TipBalloonTitle = 'Multiple Media Files'
btnMerge.TipBalloonTitle = 'Duplicate Media Records'
btnHelp.TipBalloonTitle = 'Help'
btnClose.TipBalloonTitle = 'Close'
end
-- update controls
if tblN.Differs + tblN.Duplicates + tblN.Suffix > 0 then
btnReport.Active = 'YES'
btnBasicReport.Active = 'YES'
end
if tblN.Multiple > 0 then btnMultiple.Active = 'YES' end
if tblN.Duplicates + tblN.Suffix > 0 then btnMerge.Active = 'YES' end
if not WindowsEnabled then lblModeN.Title = 'Lua MD5' end
if os.time() == tblN.Start then lblTimeN.Title = '<1 s' end
-- Build final form
local vbox1 = iup.vbox{fraDuplicates, fraTechnical; margin = '0x0'; gap = 10}
local hbox = iup.hbox{fraAnalysis, vbox1; margin = '0x0', gap = 10}
local vbox2 = iup.vbox{hbox, gridButtons; alignment = 'ACENTER', margin = '10x10'}
local dialog = iup.dialog{vbox2, resize = 'No', minbox = 'No', maxbox = 'No',
title = 'Check for Possible Duplicate Media (FH7) (1.1)'}
iup.SetAttribute(dialog, 'NATIVEPARENT', fhGetContextInfo('CI_PARENT_HWND'))
dialog:popup()
return action
end
-- ******************************************************************************************************--
function ReportAnalysis(tblDuplicates, tblN, FileDetails)
-- prepare output files
local tblM1 = {}
local tblFile1 = {}
local tblFolder1 = {}
local tblN1 = {}
local tblRIN1 = {}
local tblL1 = {}
local tblM2 = {}
local tblFile2 = {}
local tblFolder2 = {}
local tblN2 = {}
local tblRIN2 = {}
local tblL2 = {}
local tblMatch = {}
for _, D in ipairs(tblDuplicates) do
table.insert(tblM1, D.Record1)
local File, Folder = SplitFileFolder(D.File1)
table.insert(tblFile1, File)
table.insert(tblFolder1, Folder)
table.insert(tblN1, D.N1)
table.insert(tblRIN1, fhGetRecordId(D.Record1))
table.insert(tblL1, fhCallBuiltInFunction('LinksTo', D.Record1))
table.insert(tblM2, D.Record2)
File, Folder = SplitFileFolder(D.File2)
table.insert(tblFile2, File)
table.insert(tblFolder2, Folder)
table.insert(tblN2, D.N2)
table.insert(tblRIN2, fhGetRecordId(D.Record2))
table.insert(tblL2, fhCallBuiltInFunction('LinksTo', D.Record2))
table.insert(tblMatch, D.Match)
end
fhOutputResultSetTitles('Duplicates')
fhOutputResultSetColumn('Record', 'item', tblM1, #tblM1, 200, 'align_left', 1)
fhOutputResultSetColumn('RecordID', 'integer', tblRIN1, #tblRIN1, 35)
if FileDetails then
fhOutputResultSetColumn('File', 'text', tblFile1, #tblFile1, 60)
fhOutputResultSetColumn('Folder', 'text', tblFolder1, #tblFolder1, 60)
if tblN.Multiple > 0 then fhOutputResultSetColumn('File #', 'integer', tblN1, #tblN1, 25) end
end
fhOutputResultSetColumn('Links', 'integer', tblL1, #tblL1, 25)
fhOutputResultSetColumn('Duplicate', 'item', tblM2, #tblM2, 200, 'align_left', 1)
fhOutputResultSetColumn('RecordID', 'integer', tblRIN2, #tblRIN2, 35)
if FileDetails then
fhOutputResultSetColumn('File', 'text', tblFile2, #tblFile2, 60)
fhOutputResultSetColumn('Folder', 'text', tblFolder2, #tblFolder2, 60)
if tblN.Multiple > 0 then fhOutputResultSetColumn('File #', 'integer', tblN2, #tblN2, 25) end
end
fhOutputResultSetColumn('Links', 'integer', tblL2, #tblL2, 25)
fhOutputResultSetColumn('Status', 'text', tblMatch, #tblMatch)
end
-- ******************************************************************************************************--
function ReportDuplicates(tblMultiple)
local tblRIN = {}
for _, Record in ipairs(tblMultiple) do
table.insert(tblRIN, fhGetRecordId(Record))
end
fhOutputResultSetTitles('Multiple Files')
fhOutputResultSetColumn('Record', 'item', tblMultiple, #tblMultiple, 200, 'align_left', 1)
fhOutputResultSetColumn('Record Id', 'text', tblRIN, #tblRIN, 40, 'align_left', 1)
end
-- ******************************************************************************************************--
function GetDuplicateLinks(tblDuplicates, Suffix)
-- locate links to duplicate records
local p = fhNewItemPtr()
local TotalLinks = 0
local TotalFound = 0
local tblLinks = {}
local MergeStarted
if Suffix then
local msg = 'Do you wish to also merge record pairs where the only difference is ' ..
'the Media filename suffix?'
local response = MessageBox(msg, 'YESNOCANCEL', 'QUESTION', nil, '3')
if response == '2' then
Suffix = false
elseif response == '3' then
return
end
end
for _, Duplicate in ipairs(tblDuplicates) do
if Duplicate.Match == 'Duplicate Record' or (Suffix and Duplicate.Match == 'File Suffix') then
TotalLinks = TotalLinks + fhCallBuiltInFunction('LinksTo', Duplicate.Record2)
end
end
-- search for links (most likely locations first)
ProgressBarStart(TotalLinks)
iup.SetAttribute(gblProgBar.whirlywheel, 'START', 'YES')
for _, Duplicate in ipairs(tblDuplicates) do
if Duplicate.Match == 'Duplicate Record' or (Suffix and Duplicate.Match == 'File Suffix') then
gblProgBar.Dialog.Title = 'Merging ' .. fhGetDisplayText(Duplicate.Record2) .. ' ...'
local Links = fhCallBuiltInFunction('LinksTo', Duplicate.Record2)
local Found = 0
local tblLinks = {}
for _, Tag in ipairs({'SOUR', 'INDI', 'FAM', '_PLAC', 'NOTE', '_RNOT'}) do
p:MoveToFirstRecord(Tag)
while p:IsNotNull() and Found < Links do
if fhGetValueAsLink(p):IsSame(Duplicate.Record2) then
Found = Found + 1
TotalFound = TotalFound + 1
gblProgBar.bar.Value = TotalFound
table.insert(tblLinks, p:Clone())
end
if gblProgBar.Cancel then
gblProgBar.Dialog:destroy()
if MergeStarted then -- incomplete changes
local msg = 'Record merging has been interrupted, resulting in some ' ..
'records not being merged. Select "Edit > Undo Plugin Updates" from ' ..
'the main Family Historian menu if you wish to undo the merges ' ..
'completed before stopping.'
MessageBox(msg, 'OK', 'WARNING')
end
return
end
iup.LoopStep()
p:MoveNextSpecial()
end
if Found == Links then break end -- all links to duplicate located
end
local ok, error = MergeDuplicates(Duplicate.Record1, Duplicate.Record2, tblLinks)
if not ok then
gblProgBar.Dialog:destroy()
local msg = 'Unable to complete merging. Select "Edit > Undo Plugin Updates" from the ' ..
'main Family Historian menu to reverse any changes made, and report this error ' ..
'on FHUG so it can be investigated.\n\nError message: ' .. error
MessageBox(msg, 'OK', 'ERROR')
return
end
MergeStarted = true
end
end
-- merging completed
gblProgBar.Dialog:destroy()
local msg = 'Duplicate Media Records merged successfully. If you wish to reverse this change, ' ..
'select "Edit > Undo Plugin Updates" from the main Family Historian menu.'
MessageBox(msg, 'OK', 'INFORMATION')
end
-- ******************************************************************************************************--
function MergeDuplicates(Record, Duplicate, tblLinks)
local function GetLinkTable(p, pRecord, pDuplicate)
local pParent = fhNewItemPtr()
pParent:MoveToParentItem(p)
local tblLinks = {}
local IDrec, IDdup
local pL = fhNewItemPtr()
pL:MoveTo(p, '~._LINK_O')
while pL:IsNotNull() do
local RIN = fhGetRecordId(fhGetValueAsLink(pL))
local ID = fhGetItemText(pL, '~._LKID')
tblLinks[RIN] = ID
if RIN == fhGetRecordId(pRecord) then IDrec = ID end
if RIN == fhGetRecordId(pDuplicate) then IDdup = ID end
p:MoveNext('SAME_TAG')
end
return IDrec, IDdup, tblLinks
end
for _, p in ipairs(tblLinks) do
if fhGetTag(p) == 'OBJE' then -- conventional link
if not fhSetValueAsLink(p, Record) then
local msg = 'Failed to create link to ' .. fhGetDisplayText(Record) .. ' (#' .. fhGetRecordId(Record) .. ')'
return false, msg
end
elseif fhGetTag(p) == '_LINK_O' then -- Rich Text link
local IDrec, IDdup, tblLinks = GetLinkTable(p, fhGetItemPtr(Record), fhGetItemPtr(Duplicate))
local IDlnk = fhGetItemText(p, '~._LKID')
if not fhSetValueAsLink(p, Record) then
local msg = 'Failed to create Rich Text link to ' .. fhGetDisplayText(Record) .. ' (#' .. fhGetRecordId(Record) .. ')'
return false, msg
end
if IDrec then -- Master record also present
local pParent = fhNewItemPtr() -- Update link text (which deletes its link table)
pParent:MoveToParentItem(p)
local rt = fhGetValueAsRichText(pParent)
local T = rt:GetText()
T = T:gsub(' 0 then
local msg = 'Duplicate record ' .. fhGetDisplayText(Duplicate) ..
' (#' .. fhGetRecordId(Duplicate) .. ' ) still has links, and will not be deleted.'
return false, msg
else
fhDeleteItem(Duplicate)
return true -- successful merge
end
end
-- ******************************************************************************************************--
function IsUnicode(S)
-- checks whether the supplied string is ANSI or Unicode
fhSetConversionLossFlag(false)
local UnicodeString = fhConvertUTF8toANSI(S)
if not fhIsConversionLossFlagSet() then return end -- string is ANSI compatible
fhSetConversionLossFlag(false)
return true
end
-- ******************************************************************************************************--
function ProgressBarStart(Max)
-- create and display a simple progress bar, and store in a global table
gblProgBar = {}
gblProgBar.bar = iup.progressbar{max = Max; rastersize = '400x30'}
gblProgBar.whirlywheel = iup.animatedlabel{animation = 'IUP_CircleProgressAnimation'}
gblProgBar.button = iup.button{title = 'Cancel'; padding = '10x3',
action = function(self) gblProgBar.Cancel = true end}
gblProgBar.vbox = iup.vbox{gblProgBar.bar, gblProgBar.whirlywheel, gblProgBar.button; gap = 20,
alignment = 'acenter', margin = '5x15'}
gblProgBar.Dialog = iup.dialog{gblProgBar.vbox; dialogframe = 'Yes', title = '',
border = 'Yes', menubox = 'No'}
iup.SetAttribute(gblProgBar.Dialog, 'NATIVEPARENT', fhGetContextInfo('CI_PARENT_HWND'))
gblProgBar.Dialog:showxy(iup.CENTER, iup.CENTER) -- Put up Progress Display
end
-- ******************************************************************************************************--
function MessageBox(Message, Buttons, Icon, Title, Default)
-- replaces built-in function with custom version containing more options
local msgdlg = iup.messagedlg{value = Message, buttons = Buttons, dialogtype = Icon,
title = Title or 'Check for Duplicate Media (FH7)', buttondefault = Default}
msgdlg:popup()
return msgdlg.ButtonResponse
end
-- ******************************************************************************************************--
function SplitFileFolder(FileName)
-- splits a supplied file name into separate file and folder elements
local i = FileName:find("[^\\]*$")
local Folder = FileName:sub(1, i-1)
local File = FileName:sub(i)
local _, j = FileName:find(fhGetContextInfo('CI_PROJECT_DATA_FOLDER') .. '\\', 1, true)
if j then Folder = Folder:sub(j) end
return File, Folder
end
-- ******************************************************************************************************--
function GetFingerprint(p)
-- get fingerprint of Media record p
local tblF = TabulateFields(p)
local tblT = {}
for _, Field in ipairs(tblF) do
if fhGetTag(Field) == 'FILE' then
local FileName = fhGetValueAsText(Field)
if FileName:match('^Media%\\') then
FileName = fhGetContextInfo('CI_PROJECT_DATA_FOLDER') .. '\\' .. FileName
end
local CoreName = GetCoreName(FileName)
table.insert(tblT, CoreName)
else
table.insert(tblT, fhGetDisplayText(Field))
end
end
table.sort(tblT)
return table.concat(tblT)
end
-- ******************************************************************************************************--
function TabulateFields(p)
-- generic function to tabulate all populated subfields of a record (excluding metafields)
local ListChildFields, ListBranch
function ListChildFields(pS, tblC)
local pF = pS:Clone()
pF:MoveToFirstChildItem(pF)
while pF:IsNotNull() do
ListBranch(pF, tblC)
pF:MoveNext()
end
end
function ListBranch(pS, tblC)
table.insert(tblC, pS:Clone())
ListChildFields(pS, tblC)
end
local tblC = {}
ListChildFields(p, tblC)
return tblC
end
-- ******************************************************************************************************--
function GetCoreName(Filename)
-- exclude files not in Media folder
local F = fhGetContextInfo('CI_PROJECT_DATA_FOLDER') .. '\\Media'
if Filename:sub(1, F:len()) ~= F then
return Filename
end
-- strips out any (number) suffix in the provided Filename
local ext = Filename:match('[^%.]+$')
local basename = Filename:sub(1, Filename:len() - ext:len() - 1)
local suffix = basename:match('%s%(%d+%)$')
if not suffix then
return Filename
else
local CoreName = basename:sub(1, basename:len() - suffix:len()) .. '.' .. ext
return CoreName
end
end
-- ******************************************************************************************************--
main()
-- remove temporary plugin folder
Folder = fhGetContextInfo('CI_APP_DATA_FOLDER') .. '\\Plugin Data\\Check for Duplicated Media (FH7)'
if fhfu.folderExists(Folder) then
fhfu.deleteFolder(Folder)
end
--[[
@Title: Check for Possible Duplicate Media (FH7)
@Type: Standard
@Author: Mark Draper
@Version: 1.1
@LastUpdated: 15 May 2025
@Licence: This plugin is copyright (c) 2025 Mark Draper and is licensed under the MIT License which
is hereby incorporated by reference (see https://pluginstore.family-historian.co.uk/fh-plugin-licence)
@Description: This plugin provides a very fast and up to date alternative to the well-established Check For
Possible Duplicate Media plugin by extending the application to Media Records with non-Latin (i.e. Unicode)
filenames and paths and to files of essentially unlimited size. It also supports the FH7/GEDCOM 5.5.1 feature
of multiple files in the same Media Record, the option to automatically merge identical records, more detailed
reporting and a new online help page.
]]
--[[
Version 1.0 (Jan 2024)
Initial Plugin Store version
Version 1.1 (May 2025)
Enhanced tooltips added
Support for multiple monitors added
]]
fhInitialise(7, 0, 15, 'save_recommended')
require('iuplua')
require('iupluaimglib')
MD5 = require 'md5'
fhu = require('fhUtils')
fhfu = require('fhFileUtils')
fhu.setIupDefaults()
FSO = luacom.CreateObject('Scripting.FileSystemObject')
luaShell = luacom.CreateObject('WScript.Shell')
function main()
-- carry out pre-flight checks
local Max = Initialise()
if not Max then return end
-- check whether Windows method is available using current project file (small and always present)
local WindowsEnabled = GetWindowsHash(fhGetContextInfo('CI_PROJECT_FILE'), true) and true
-- create progress bar
ProgressBarStart(Max)
iup.SetAttribute(gblProgBar.whirlywheel, 'START', 'YES')
local tblT = {}
local tblN = {Records = 0, Files = 0, Unicode = 0, Large = 0, Missing = 0, Multiple = 0,
Differs = 0, Unlinked = 0, Duplicates = 0, Suffix = 0, Start = os.time()}
local tblMultiple = {} -- records with >1 file attached
-- get table of media file sizes
local pM = fhNewItemPtr()
pM:MoveToFirstRecord('OBJE')
while pM:IsNotNull() do
tblN.Records = tblN.Records + 1
if fhCallBuiltInFunction('LinksTo', pM) == 0 then tblN.Unlinked = tblN.Unlinked + 1 end
local i = 0
local pF = fhNewItemPtr()
pF:MoveTo(pM, '~.FILE')
while pF:IsNotNull() do
local FileName = fhGetValueAsText(pF)
if FileName:match('^Media%\\') then
FileName = fhGetContextInfo('CI_PROJECT_DATA_FOLDER') .. '\\' .. FileName
end
if fhfu.fileExists(FileName) then
tblN.Files = tblN.Files + 1
local tblT1 = {}
if IsUnicode(FileName) then
tblN.Unicode = tblN.Unicode + 1
end
local size = FSO:GetFile(FileName).Size
if size > 2^25 then
tblN.Large = tblN.Large + 1
end
tblT1.Record = pM:Clone()
tblT1.File = FileName
i = i + 1
tblT1.N = i
if i == 2 then -- first additional file
tblN.Multiple = tblN.Multiple + 1
table.insert(tblMultiple, pM:Clone())
end
if not tblT[size] then tblT[size] = {} end
table.insert(tblT[size], tblT1)
else
tblN.Missing = tblN.Missing + 1
end
pF:MoveNext('SAME_TAG')
end
if tblN.Records % 10 == 0 then gblProgBar.bar.Value = tblN.Records end
gblProgBar.Dialog.Title = 'Checking Media Records (' .. tblN.Records .. ' of ' .. Max .. ')...'
if gblProgBar.Cancel then return end
iup.LoopStep()
pM:MoveNext()
end
gblProgBar.Dialog.Title = 'Preparing hashes...'
-- delete all examples of unique file size (cannot be duplicated)
for size, Files in pairs(tblT) do
if #Files == 1 then
tblT[size] = nil
end
end
-- get table of file hashes to be determined
local tblFh = {}
for _, Files in pairs(tblT) do
for _, F in ipairs(Files) do
tblFh[F.File] = true
end
end
-- how many hashes to calculate?
tblN.HashCount = 0
for File, _ in pairs(tblFh) do
tblN.HashCount = tblN.HashCount + 1
end
-- get hashes
tblN.Hashed = 0
gblProgBar.bar.Max = tblN.HashCount
for File, _ in pairs(tblFh) do
tblN.Hashed = tblN.Hashed + 1
gblProgBar.Dialog.Title = 'Calculating file hashes (' .. tblN.Hashed .. ' of ' ..
tblN.HashCount .. ')...'
gblProgBar.bar.Value = tblN.Hashed
tblFh[File] = GetHash(File, WindowsEnabled)
end
gblProgBar.Dialog.Title = 'Collating final report...'
-- add hashes and core name to main table (and record ID for debug and audit)
for _, v in pairs(tblT) do
for _, F in ipairs(v) do
F.Hash = tblFh[F.File]
F.CoreName = GetCoreName(F.File)
F.ID = fhGetRecordId(F.Record)
F.Fingerprint = GetFingerprint(F.Record)
end
end
-- structure by hash and core name
local tblD = {}
for _, v in pairs(tblT) do
for _, F in ipairs(v) do
if not tblD[F.Hash] then tblD[F.Hash] = {} end
if not tblD[F.Hash][F.CoreName] then tblD[F.Hash][F.CoreName] = {} end
local tblM = {}
tblM.Record = F.Record
tblM.ID = F.ID
tblM.File = F.File
tblM.N = F.N
tblM.Fingerprint = F.Fingerprint
table.insert(tblD[F.Hash][F.CoreName], tblM)
end
end
-- ensure first (master) record has the core name with no suffix
for Hash, tblCoreName in pairs(tblD) do
for CoreName, tblF in pairs(tblCoreName) do
if tblF[1].File ~= CoreName then
for i, F in ipairs(tblF) do
if i ~= 1 and F.File == CoreName and F.Fingerprint == tblF[1].Fingerprint then
tblF[1], tblF[i] = tblF[i], tblF[1] -- swap table records so i is now 1
break
end
end
end
end
end
-- delete any non-duplicated hashes
for Hash, tblCoreName in pairs(tblD) do
local i = 0
for CoreName, tblF in pairs(tblCoreName) do
i = i + #tblF
end
if i < 2 then tblD[Hash] = nil end
end
-- tabulate duplicates with same core name
local tblDuplicates = {}
local tblLogged = {} -- prevents duplicate listings if multiple files
for Hash, tblCoreName in pairs(tblD) do
for CoreName, tblF in pairs(tblCoreName) do
if #tblF > 1 then
for i, F in ipairs(tblF) do
if i ~= 1 and not tblLogged[tblF[1].ID .. '-' .. tblF[i].ID] then
local tblA = {}
tblA.Record1 = tblF[1].Record
tblA.File1 = tblF[1].File
tblA.N1 = tblF[1].N
tblA.ID1 = tblF[1].ID
tblA.Record2 = tblF[i].Record
tblA.File2 = tblF[i].File
tblA.N2 = tblF[i].N
tblA.ID2 = tblF[i].ID
if tblF[1].Fingerprint ~= tblF[i].Fingerprint then
tblA.Match = 'Records Differ'
tblN.Differs = tblN.Differs + 1
elseif tblF[1].ID == tblF[i].ID then
tblA.Match = 'Duplicate File'
elseif tblF[1].File == tblF[i].File then
tblA.Match = 'Duplicate Record'
tblN.Duplicates = tblN.Duplicates + 1
else
tblA.Match = 'File Suffix'
tblN.Suffix = tblN.Suffix + 1
end
table.insert(tblDuplicates, tblA)
tblLogged[tblF[1].ID .. '-' .. tblF[i].ID] = true
end
end
end
end
end
-- tabulate comparison of different core names within same hash
for Hash, tblCoreName in pairs(tblD) do
local tblFP = {}
for CoreName, tblF in pairs(tblCoreName) do
table.insert(tblFP, tblF[1])
end
if #tblFP > 1 then
for i, F in ipairs(tblFP) do
if i ~= 1 and not tblLogged[tblFP[1].ID .. '-' .. tblFP[i].ID] then
local tblA = {}
tblA.Record1 = tblFP[1].Record
tblA.File1 = tblFP[1].File
tblA.N1 = tblFP[1].N
tblA.ID1 = tblFP[1].ID
tblA.Record2 = tblFP[i].Record
tblA.File2 = tblFP[i].File
tblA.N2 = tblFP[i].N
tblA.ID2 = tblFP[i].ID
tblA.Match = 'Records Differ'
table.insert(tblDuplicates, tblA)
tblN.Differs = tblN.Differs + 1
end
end
end
end
gblProgBar.Dialog:destroy()
-- return if no duplicates
if tblN.Differs + tblN.Duplicates + tblN.Multiple + tblN.Suffix == 0 then
MessageBox('No duplicate Media Records detected.', 'OK', 'INFORMATION')
return
end
-- report results and present options menu
local action = Menu(tblN, WindowsEnabled)
if not action then
return
elseif action == 'FULL' then
ReportAnalysis(tblDuplicates, tblN, true)
elseif action == 'BASIC' then
ReportAnalysis(tblDuplicates, tblN, false)
elseif action == 'MULTIPLE' then
ReportDuplicates(tblMultiple)
elseif action == 'DUPLICATE' then
GetDuplicateLinks(tblDuplicates, tblN.Suffix > 0)
end
end
-- ******************************************************************************************************--
function Initialise()
-- check running in a project
if fhGetContextInfo('CI_APP_MODE') ~= 'Project Mode' then
MessageBox('This plugin can only be run from within a Family Historian Project.', 'OK', 'ERROR')
return
end
-- ensure plugin folder available if required
local PluginFolder = fhGetContextInfo('CI_APP_DATA_FOLDER') .. '\\Plugin Data'
if not fhfu.folderExists(PluginFolder) then fhfu.createFolder(PluginFolder) end
local ThisPluginFolder = PluginFolder .. '\\Check for Duplicated Media (FH7)'
if not fhfu.folderExists(ThisPluginFolder) then fhfu.createFolder(ThisPluginFolder) end
-- count media records
local pM = fhNewItemPtr()
local Max = 0
pM:MoveToFirstRecord('OBJE')
while pM:IsNotNull() do
Max = Max + 1
pM:MoveNext()
end
-- exit if no media
if Max == 0 then
MessageBox('No Media Records.', 'OK', 'INFORMATION')
return
end
return Max
end
-- ******************************************************************************************************--
function GetHash(FileName, WindowsEnabled)
-- get MD5 hash for specified file
local hash
if not IsUnicode(FileName) and FileName:len() < 250 then
hash = GetLuaHash(FileName)
else
if WindowsEnabled then -- use command script
hash = GetWindowsHash(FileName)
else -- create temporary copy
local MD5File = 'CI_APP_DATA_FOLDER' ..
'\\Plugin Data\\Check for Duplicated Media (FH7)\\~TempFile.tmp'
local cmd = 'copy "' .. FileName .. '" "' .. MD5File .. '"'
if luaShell:Run('cmd.exe /c ' .. cmd, 0, true) ~= 0 then return end
hash = GetLuaHash(MD5File)
fhfu.deleteFile(MD5File)
end
end
return hash
end
-- ******************************************************************************************************--
function GetLuaHash(FileName)
-- determine MD5 using Lua library (pseudo-MD5 for large files)
if FSO:GetFile(FileName).Size < 2^25 then -- one pass only
local F = io.open(fhConvertUTF8toANSI(FileName), 'rb')
if not F then return end
local S = F:read('*all')
F:close()
return MD5.sumhexa(S)
else -- read data in chunks
local tblMD5 = {}
local F = io.open(fhConvertUTF8toANSI(FileName), 'rb')
if not F then return end
repeat
local S = F:read(2^25)
if not S then break end
table.insert(tblMD5, MD5.sumhexa(S))
until false
F:close()
return MD5.sumhexa(table.concat(tblMD5))
end
end
-- ******************************************************************************************************--
function GetWindowsHash(FileName)
-- determine MD5 using a Windows Command Script
local Folder = fhGetContextInfo('CI_APP_DATA_FOLDER') .. '\\Plugin Data\\Check for Duplicated Media (FH7)'
local CmdFile = Folder .. '\\GetHash.cmd'
local OutputFile = Folder .. '\\GetHash.txt'
local Cmd = 'chcp 65001\nCertutil -hashfile "' .. FileName .. '" MD5 > "' .. OutputFile .. '"'
-- save command file in UTF-8 without BOM (as needed by Windows command shell)
local F = io.open(CmdFile, 'wb')
if not F then return end
F:write(Cmd)
F:close()
-- check output file has been created ok, and read hash
if fhfu.fileExists(OutputFile) then fhfu.deleteFile(OutputFile) end
if luaShell:Run('cmd.exe /C "' .. CmdFile .. '"', 0, true) ~= 0 then return end
if fhfu.fileExists(OutputFile) then
return fhLoadTextFile(OutputFile):match('%c(%x+)%c')
end
end
-- ******************************************************************************************************--
function Menu(tblN, WindowsEnabled)
local action -- form return value
-- Media Analysis frame
local lblRecords = iup.label{title = 'Total Records:', expand = 'HORIZONTAL'}
local lblFiles = iup.label{title = 'Total Files:', expand = 'HORIZONTAL'}
local lblMultiple = iup.label{title = 'Records With Multiple Files:', expand = 'HORIZONTAL'}
local lblMissing = iup.label{title = 'Missing Files:', expand = 'HORIZONTAL'}
local lblUnlinked = iup.label{title = 'Unlinked Records:', expand = 'HORIZONTAL'}
local lblLarge = iup.label{title = 'Large Files (>32 MB):', expand = 'HORIZONTAL'}
local lblUnicode = iup.label{title = 'Unicode File Names:', expand = 'HORIZONTAL'}
local lblRecordsN = iup.label{title = tblN.Records, expand = 'HORIZONTAL'}
local lblFilesN = iup.label{title = tblN.Files, expand = 'HORIZONTAL'}
local lblMultipleN = iup.label{title = tblN.Multiple, expand = 'HORIZONTAL'}
local lblMissingN = iup.label{title = tblN.Missing, expand = 'HORIZONTAL'}
local lblUnlinkedN = iup.label{title = tblN.Unlinked, expand = 'HORIZONTAL'}
local lblLargeN = iup.label{title = tblN.Large, expand = 'HORIZONTAL'}
local lblUnicodeN = iup.label{title = tblN.Unicode, expand = 'HORIZONTAL'}
local gridAnalysis = iup.gridbox{
lblRecords, lblFiles, lblMissing, lblUnlinked, lblMultiple, lblLarge, lblUnicode,
lblRecordsN, lblFilesN, lblMissingN, lblUnlinkedN, lblMultipleN, lblLargeN, lblUnicodeN;
orientation = 'VERTICAL', numdiv = 7, sizelin = -1,
gapcol = 10, gaplin = 2, margin = '10x10'
}
local fraAnalysis = iup.frame{gridAnalysis, title = 'Media Analysis'}
-- Duplicates frame
local lblPotential = iup.label{title = 'Potential Duplicate Pairs:', expand = 'HORIZONTAL'}
local lblActual = iup.label{title = 'Exact Duplicate Pairs:', expand = 'HORIZONTAL'}
local lblSuffix = iup.label{title = 'File Suffix Difference:', expand = 'HORIZONTAL'}
local lblPotentialN = iup.label{title = tblN.Differs, expand = 'HORIZONTAL'}
local lblActualN = iup.label{title = tblN.Duplicates, expand = 'HORIZONTAL'}
local lblSuffixN = iup.label{title = tblN.Suffix, expand = 'HORIZONTAL'}
local gridDuplicates = iup.gridbox{
lblPotential, lblActual, lblSuffix,
lblPotentialN, lblActualN, lblSuffixN,
orientation = 'VERTICAL', numdiv = 3, sizelin = -1,
gapcol = 10, gaplin = 2, margin = '10x10'
}
local fraDuplicates = iup.frame{gridDuplicates, title = 'Duplicates'}
-- Technical frame
local lblMode = iup.label{title = 'Operating Mode:', expand = 'HORIZONTAL'}
local lblTime = iup.label{title = 'Run Time:', expand = 'HORIZONTAL'}
local lblModeN = iup.label{title = 'Windows', expand = 'HORIZONTAL'}
local lblTimeN = iup.label{title = os.time() - tblN.Start .. ' s', expand = 'HORIZONTAL'}
local gridTechnical = iup.gridbox{
lblMode, lblTime,
lblModeN, lblTimeN,
orientation = 'VERTICAL', numdiv = 2, sizelin = -1,
gapcol = 10, gaplin = 2, margin = '10x10'
}
local fraTechnical = iup.frame{gridTechnical, title = 'Plugin Operation'}
-- Form buttons
local btnReport = iup.button{title = 'Display Plugin\nAnalysis', expand = 'YES', active = 'NO',
action = function(self) action = 'FULL' return iup.CLOSE end,
tip = 'Tabulate detailed results\nand close plugin.'
}
local btnBasicReport = iup.button{title = 'Display Basic\nPlugin Analysis', expand = 'YES', active = 'NO',
action = function(self) action = 'BASIC' return iup.CLOSE end,
tip = 'Tabulate summary results and close plugin\n(suitable for smaller monitors).'
}
local btnMultiple = iup.button{title = 'List Records With\nMultiple Files', expand = 'YES',
active = 'NO', padding = '10x3',
action = function(self) action = 'MULTIPLE' return iup.CLOSE end,
tip = 'Tabulate Media Records that have more than\none file attached and close plugin.'
}
local btnMerge = iup.button{title = 'Merge Duplicate\nRecords', expand = 'YES', active = 'NO',
action = function(self) action = 'DUPLICATE' return iup.CLOSE end,
tip = 'Merge Media Record pairs that are exact duplicates of each other\n' ..
'or differ just in their Media filename suffix.'
}
local btnHelp = iup.button{title = 'Plugin Help', expand = 'YES', tip = 'Display plugin help page',
action = function(self) fhShellExecute('https://pluginstore.family-historian.co.uk/page/help/check-for-possible-duplicate-media-fh7') end,
}
local btnClose = iup.button{title = 'Close Plugin', expand = 'YES',
action = function(self) return iup.CLOSE end,
tip = 'Close plugin with no further action.'
}
local gridButtons = iup.gridbox{
btnReport, btnBasicReport, btnMultiple, btnMerge, btnHelp, btnClose;
orientation = 'VERTICAL', numdiv = 2, sizelin = -1, sizecol = -1,
gapcol = 10, gaplin = 10, margin = '10x10', expand = 'NO',
homogeneouscol = 'YES',
homogeneouslin = 'YES',
}
-- define enhanced tool tips
local enhanced = true -- comment out this line if enhanced tool tips are not required
if enhanced then
local tblB = {btnReport, btnBasicReport, btnMultiple, btnMerge, btnHelp, btnClose}
for _, control in ipairs(tblB) do
control.TipBalloon = 'YES'
control.TipBalloonTitleIcon = 1 -- modify individually if different
end
btnReport.TipBalloonTitle = 'Display Full Analysis'
btnBasicReport.TipBalloonTitle = 'Display Basic Analysis'
btnMultiple.TipBalloonTitle = 'Multiple Media Files'
btnMerge.TipBalloonTitle = 'Duplicate Media Records'
btnHelp.TipBalloonTitle = 'Help'
btnClose.TipBalloonTitle = 'Close'
end
-- update controls
if tblN.Differs + tblN.Duplicates + tblN.Suffix > 0 then
btnReport.Active = 'YES'
btnBasicReport.Active = 'YES'
end
if tblN.Multiple > 0 then btnMultiple.Active = 'YES' end
if tblN.Duplicates + tblN.Suffix > 0 then btnMerge.Active = 'YES' end
if not WindowsEnabled then lblModeN.Title = 'Lua MD5' end
if os.time() == tblN.Start then lblTimeN.Title = '<1 s' end
-- Build final form
local vbox1 = iup.vbox{fraDuplicates, fraTechnical; margin = '0x0'; gap = 10}
local hbox = iup.hbox{fraAnalysis, vbox1; margin = '0x0', gap = 10}
local vbox2 = iup.vbox{hbox, gridButtons; alignment = 'ACENTER', margin = '10x10'}
local dialog = iup.dialog{vbox2, resize = 'No', minbox = 'No', maxbox = 'No',
title = 'Check for Possible Duplicate Media (FH7) (1.1)'}
iup.SetAttribute(dialog, 'NATIVEPARENT', fhGetContextInfo('CI_PARENT_HWND'))
dialog:popup()
return action
end
-- ******************************************************************************************************--
function ReportAnalysis(tblDuplicates, tblN, FileDetails)
-- prepare output files
local tblM1 = {}
local tblFile1 = {}
local tblFolder1 = {}
local tblN1 = {}
local tblRIN1 = {}
local tblL1 = {}
local tblM2 = {}
local tblFile2 = {}
local tblFolder2 = {}
local tblN2 = {}
local tblRIN2 = {}
local tblL2 = {}
local tblMatch = {}
for _, D in ipairs(tblDuplicates) do
table.insert(tblM1, D.Record1)
local File, Folder = SplitFileFolder(D.File1)
table.insert(tblFile1, File)
table.insert(tblFolder1, Folder)
table.insert(tblN1, D.N1)
table.insert(tblRIN1, fhGetRecordId(D.Record1))
table.insert(tblL1, fhCallBuiltInFunction('LinksTo', D.Record1))
table.insert(tblM2, D.Record2)
File, Folder = SplitFileFolder(D.File2)
table.insert(tblFile2, File)
table.insert(tblFolder2, Folder)
table.insert(tblN2, D.N2)
table.insert(tblRIN2, fhGetRecordId(D.Record2))
table.insert(tblL2, fhCallBuiltInFunction('LinksTo', D.Record2))
table.insert(tblMatch, D.Match)
end
fhOutputResultSetTitles('Duplicates')
fhOutputResultSetColumn('Record', 'item', tblM1, #tblM1, 200, 'align_left', 1)
fhOutputResultSetColumn('RecordID', 'integer', tblRIN1, #tblRIN1, 35)
if FileDetails then
fhOutputResultSetColumn('File', 'text', tblFile1, #tblFile1, 60)
fhOutputResultSetColumn('Folder', 'text', tblFolder1, #tblFolder1, 60)
if tblN.Multiple > 0 then fhOutputResultSetColumn('File #', 'integer', tblN1, #tblN1, 25) end
end
fhOutputResultSetColumn('Links', 'integer', tblL1, #tblL1, 25)
fhOutputResultSetColumn('Duplicate', 'item', tblM2, #tblM2, 200, 'align_left', 1)
fhOutputResultSetColumn('RecordID', 'integer', tblRIN2, #tblRIN2, 35)
if FileDetails then
fhOutputResultSetColumn('File', 'text', tblFile2, #tblFile2, 60)
fhOutputResultSetColumn('Folder', 'text', tblFolder2, #tblFolder2, 60)
if tblN.Multiple > 0 then fhOutputResultSetColumn('File #', 'integer', tblN2, #tblN2, 25) end
end
fhOutputResultSetColumn('Links', 'integer', tblL2, #tblL2, 25)
fhOutputResultSetColumn('Status', 'text', tblMatch, #tblMatch)
end
-- ******************************************************************************************************--
function ReportDuplicates(tblMultiple)
local tblRIN = {}
for _, Record in ipairs(tblMultiple) do
table.insert(tblRIN, fhGetRecordId(Record))
end
fhOutputResultSetTitles('Multiple Files')
fhOutputResultSetColumn('Record', 'item', tblMultiple, #tblMultiple, 200, 'align_left', 1)
fhOutputResultSetColumn('Record Id', 'text', tblRIN, #tblRIN, 40, 'align_left', 1)
end
-- ******************************************************************************************************--
function GetDuplicateLinks(tblDuplicates, Suffix)
-- locate links to duplicate records
local p = fhNewItemPtr()
local TotalLinks = 0
local TotalFound = 0
local tblLinks = {}
local MergeStarted
if Suffix then
local msg = 'Do you wish to also merge record pairs where the only difference is ' ..
'the Media filename suffix?'
local response = MessageBox(msg, 'YESNOCANCEL', 'QUESTION', nil, '3')
if response == '2' then
Suffix = false
elseif response == '3' then
return
end
end
for _, Duplicate in ipairs(tblDuplicates) do
if Duplicate.Match == 'Duplicate Record' or (Suffix and Duplicate.Match == 'File Suffix') then
TotalLinks = TotalLinks + fhCallBuiltInFunction('LinksTo', Duplicate.Record2)
end
end
-- search for links (most likely locations first)
ProgressBarStart(TotalLinks)
iup.SetAttribute(gblProgBar.whirlywheel, 'START', 'YES')
for _, Duplicate in ipairs(tblDuplicates) do
if Duplicate.Match == 'Duplicate Record' or (Suffix and Duplicate.Match == 'File Suffix') then
gblProgBar.Dialog.Title = 'Merging ' .. fhGetDisplayText(Duplicate.Record2) .. ' ...'
local Links = fhCallBuiltInFunction('LinksTo', Duplicate.Record2)
local Found = 0
local tblLinks = {}
for _, Tag in ipairs({'SOUR', 'INDI', 'FAM', '_PLAC', 'NOTE', '_RNOT'}) do
p:MoveToFirstRecord(Tag)
while p:IsNotNull() and Found < Links do
if fhGetValueAsLink(p):IsSame(Duplicate.Record2) then
Found = Found + 1
TotalFound = TotalFound + 1
gblProgBar.bar.Value = TotalFound
table.insert(tblLinks, p:Clone())
end
if gblProgBar.Cancel then
gblProgBar.Dialog:destroy()
if MergeStarted then -- incomplete changes
local msg = 'Record merging has been interrupted, resulting in some ' ..
'records not being merged. Select "Edit > Undo Plugin Updates" from ' ..
'the main Family Historian menu if you wish to undo the merges ' ..
'completed before stopping.'
MessageBox(msg, 'OK', 'WARNING')
end
return
end
iup.LoopStep()
p:MoveNextSpecial()
end
if Found == Links then break end -- all links to duplicate located
end
local ok, error = MergeDuplicates(Duplicate.Record1, Duplicate.Record2, tblLinks)
if not ok then
gblProgBar.Dialog:destroy()
local msg = 'Unable to complete merging. Select "Edit > Undo Plugin Updates" from the ' ..
'main Family Historian menu to reverse any changes made, and report this error ' ..
'on FHUG so it can be investigated.\n\nError message: ' .. error
MessageBox(msg, 'OK', 'ERROR')
return
end
MergeStarted = true
end
end
-- merging completed
gblProgBar.Dialog:destroy()
local msg = 'Duplicate Media Records merged successfully. If you wish to reverse this change, ' ..
'select "Edit > Undo Plugin Updates" from the main Family Historian menu.'
MessageBox(msg, 'OK', 'INFORMATION')
end
-- ******************************************************************************************************--
function MergeDuplicates(Record, Duplicate, tblLinks)
local function GetLinkTable(p, pRecord, pDuplicate)
local pParent = fhNewItemPtr()
pParent:MoveToParentItem(p)
local tblLinks = {}
local IDrec, IDdup
local pL = fhNewItemPtr()
pL:MoveTo(p, '~._LINK_O')
while pL:IsNotNull() do
local RIN = fhGetRecordId(fhGetValueAsLink(pL))
local ID = fhGetItemText(pL, '~._LKID')
tblLinks[RIN] = ID
if RIN == fhGetRecordId(pRecord) then IDrec = ID end
if RIN == fhGetRecordId(pDuplicate) then IDdup = ID end
p:MoveNext('SAME_TAG')
end
return IDrec, IDdup, tblLinks
end
for _, p in ipairs(tblLinks) do
if fhGetTag(p) == 'OBJE' then -- conventional link
if not fhSetValueAsLink(p, Record) then
local msg = 'Failed to create link to ' .. fhGetDisplayText(Record) .. ' (#' .. fhGetRecordId(Record) .. ')'
return false, msg
end
elseif fhGetTag(p) == '_LINK_O' then -- Rich Text link
local IDrec, IDdup, tblLinks = GetLinkTable(p, fhGetItemPtr(Record), fhGetItemPtr(Duplicate))
local IDlnk = fhGetItemText(p, '~._LKID')
if not fhSetValueAsLink(p, Record) then
local msg = 'Failed to create Rich Text link to ' .. fhGetDisplayText(Record) .. ' (#' .. fhGetRecordId(Record) .. ')'
return false, msg
end
if IDrec then -- Master record also present
local pParent = fhNewItemPtr() -- Update link text (which deletes its link table)
pParent:MoveToParentItem(p)
local rt = fhGetValueAsRichText(pParent)
local T = rt:GetText()
T = T:gsub(' 0 then
local msg = 'Duplicate record ' .. fhGetDisplayText(Duplicate) ..
' (#' .. fhGetRecordId(Duplicate) .. ' ) still has links, and will not be deleted.'
return false, msg
else
fhDeleteItem(Duplicate)
return true -- successful merge
end
end
-- ******************************************************************************************************--
function IsUnicode(S)
-- checks whether the supplied string is ANSI or Unicode
fhSetConversionLossFlag(false)
local UnicodeString = fhConvertUTF8toANSI(S)
if not fhIsConversionLossFlagSet() then return end -- string is ANSI compatible
fhSetConversionLossFlag(false)
return true
end
-- ******************************************************************************************************--
function ProgressBarStart(Max)
-- create and display a simple progress bar, and store in a global table
gblProgBar = {}
gblProgBar.bar = iup.progressbar{max = Max; rastersize = '400x30'}
gblProgBar.whirlywheel = iup.animatedlabel{animation = 'IUP_CircleProgressAnimation'}
gblProgBar.button = iup.button{title = 'Cancel'; padding = '10x3',
action = function(self) gblProgBar.Cancel = true end}
gblProgBar.vbox = iup.vbox{gblProgBar.bar, gblProgBar.whirlywheel, gblProgBar.button; gap = 20,
alignment = 'acenter', margin = '5x15'}
gblProgBar.Dialog = iup.dialog{gblProgBar.vbox; dialogframe = 'Yes', title = '',
border = 'Yes', menubox = 'No'}
iup.SetAttribute(gblProgBar.Dialog, 'NATIVEPARENT', fhGetContextInfo('CI_PARENT_HWND'))
gblProgBar.Dialog:showxy(iup.CENTER, iup.CENTER) -- Put up Progress Display
end
-- ******************************************************************************************************--
function MessageBox(Message, Buttons, Icon, Title, Default)
-- replaces built-in function with custom version containing more options
local msgdlg = iup.messagedlg{value = Message, buttons = Buttons, dialogtype = Icon,
title = Title or 'Check for Duplicate Media (FH7)', buttondefault = Default}
msgdlg:popup()
return msgdlg.ButtonResponse
end
-- ******************************************************************************************************--
function SplitFileFolder(FileName)
-- splits a supplied file name into separate file and folder elements
local i = FileName:find("[^\\]*$")
local Folder = FileName:sub(1, i-1)
local File = FileName:sub(i)
local _, j = FileName:find(fhGetContextInfo('CI_PROJECT_DATA_FOLDER') .. '\\', 1, true)
if j then Folder = Folder:sub(j) end
return File, Folder
end
-- ******************************************************************************************************--
function GetFingerprint(p)
-- get fingerprint of Media record p
local tblF = TabulateFields(p)
local tblT = {}
for _, Field in ipairs(tblF) do
if fhGetTag(Field) == 'FILE' then
local FileName = fhGetValueAsText(Field)
if FileName:match('^Media%\\') then
FileName = fhGetContextInfo('CI_PROJECT_DATA_FOLDER') .. '\\' .. FileName
end
local CoreName = GetCoreName(FileName)
table.insert(tblT, CoreName)
else
table.insert(tblT, fhGetDisplayText(Field))
end
end
table.sort(tblT)
return table.concat(tblT)
end
-- ******************************************************************************************************--
function TabulateFields(p)
-- generic function to tabulate all populated subfields of a record (excluding metafields)
local ListChildFields, ListBranch
function ListChildFields(pS, tblC)
local pF = pS:Clone()
pF:MoveToFirstChildItem(pF)
while pF:IsNotNull() do
ListBranch(pF, tblC)
pF:MoveNext()
end
end
function ListBranch(pS, tblC)
table.insert(tblC, pS:Clone())
ListChildFields(pS, tblC)
end
local tblC = {}
ListChildFields(p, tblC)
return tblC
end
-- ******************************************************************************************************--
function GetCoreName(Filename)
-- exclude files not in Media folder
local F = fhGetContextInfo('CI_PROJECT_DATA_FOLDER') .. '\\Media'
if Filename:sub(1, F:len()) ~= F then
return Filename
end
-- strips out any (number) suffix in the provided Filename
local ext = Filename:match('[^%.]+$')
local basename = Filename:sub(1, Filename:len() - ext:len() - 1)
local suffix = basename:match('%s%(%d+%)$')
if not suffix then
return Filename
else
local CoreName = basename:sub(1, basename:len() - suffix:len()) .. '.' .. ext
return CoreName
end
end
-- ******************************************************************************************************--
main()
-- remove temporary plugin folder
Folder = fhGetContextInfo('CI_APP_DATA_FOLDER') .. '\\Plugin Data\\Check for Duplicated Media (FH7)'
if fhfu.folderExists(Folder) then
fhfu.deleteFolder(Folder)
end
Source:Check-for-Possible-Duplicate-Media-FH7-3.fh_lua