Check for Possible Duplicate Media.fh_lua

--[[
@Title: Check for Possible Duplicate Media
@Author: Jane Taubman
@Version: 2.2
@LastUpdated: 4 Feb 2021
@Licence: This plugin is copyright (c) 2021 Jane Taubman and contributors, and is licensed under the MIT License
which is hereby incorporated by reference (see https://pluginstore.family-historian.co.uk/fh-plugin-licence)
@Description: 
This plugin checks all the media records in your project and compares the files linked to them looking for files 
which are identical it will pick up both records which are linked to the same file and to those where the names 
are different, but the contents are the same. 
When complete it returns a result set containing the matching records.
By selecting each pair in turn, just drag over both cells, you can then select Edit>Merge Compare Records from 
the menu and merge the records together.
Once the process is complete, it is recommended to use the "Check for Unlinked Media" plugin to clean up any duplicate files.

1.1 Add trap for Missing Files and report them on the Result Set.
1.2 Add Caching of File details between runs
1.3 Correct Clear Records Call
2.0 V7 Compatibility
2.1 V6 Compatibility
2.2 Fix MD5 problems
 ]]


function mainfunction()
    if fhGetContextInfo('CI_APP_MODE') ~= 'Project Mode' then
        fhMessageBox('This Plugin Requires a Project','MB_OK','MB_ICONEXCLAMATION')
        return
    end

    -- Define Columns
    tblOutput,iC = createResultTable()
    tblOutput.record = {title='Record',type='item',width=140,align='align_left',content={}}
    tblOutput.duplicate = {title='Duplicate',type='item',width=140,align='align_left',content={}}
    tblOutput.message = {title='Message',type='Text',width=140,align='align_left',content={}}
    -- Build Duplicate List
    local rootfolder = fhGetContextInfo('CI_PROJECT_DATA_FOLDER')
    local medialist = {}
    local filelist = {}
    local mediaCount = 0
    local sFile =	'~._FILE'
	 if ver > 6 then	
		sFile =	'~.FILE'
	 end	
    local pm = fhNewItemPtr()
    -- Quick Count Records
    local icnt = 0
    for pi in records('OBJE') do
        icnt = icnt + 1
    end
    ProgressDisplay.Start('Searching All Media Please Wait',icnt)
    for pi in records('OBJE') do
        local recordid = fhGetRecordId(pi)
        ProgressDisplay.SetMessage(fhGetDisplayText(pi))
        ProgressDisplay.Step(1)
        if ProgressDisplay.Cancel() then
            break
        end
	
        pm:MoveTo(pi,sFile)
        local mediaFile = fhGetValueAsText(pm)
        if mediaFile:starts('Media\\') then
            mediaFile = rootfolder..'\\'..mediaFile
        end
        local strlc = mediaFile:lower()
        if FileExists(mediaFile) then
--            local smd5 = chkrecord(dbcon,recordid)
            if not(sdm5) then 
				  local data = LoadFromFile(mediaFile)
               smd5 = md5.sumhexa(data)
               -- updaterecord(dbcon,recordid,smd5)
            end
            local newmedia = {filename = fhGetValueAsText(pm), record = pi:Clone(), md5 = smd5}
            -- Check for Duplicate
            bDup = false
            for i = 1,mediaCount do
                if smd5 == medialist[i].md5 then
                    iC = iC + 1
                    tblOutput.record.content[iC] = medialist[i].record
                    tblOutput.duplicate.content[iC] = newmedia.record
                    bDup = true
                end
            end
            if not(bDup) then
                mediaCount = mediaCount + 1
                medialist[mediaCount] = newmedia
            end
        else
            -- File not found
            iC = iC + 1
            tblOutput.record.content[iC] = pi:Clone()
            tblOutput.message.content[iC] = 'File Missing:'..fhGetValueAsText(pm)
        end
    end
    ProgressDisplay.Reset()
    ProgressDisplay.Close()
    if iC > 0 then
        -- Offer to delete the files
        local strTitle = "Possible Duplicate Media Records"
        fhOutputResultSetTitles(strTitle,strTitle, "Date: %#x")
        for t in tblOutput() do
            fhOutputResultSetColumn(t.title, t.type, t.content, iC, t.width,t.align)
        end
    else
        fhMessageBox('No Duplicates Found','MB_OK','MB_ICONINFORMATION')
    end
end
-------------------------------------- Custom Functions
-------------------------------------- Standard Functions
--[[
@Title: Progress Display (drop in)
@Author: Jane Taubman / Mike Tate
@LastUpdated: May 2012
@Description: Allows easy adding of a Progress Bar to any long running Plugin
]]
local StrWhite = "255 255 255" 
ProgressDisplay = {   
    Start = function(strTitle,intMax) -- Create and start the Progress Display window controls
        if not dlgProgress then
            cancelflag = false
            local cancelbutton = iup.button{ title="Cancel", rastersize="200x30",
                action = function()
                    cancelflag = true -- Signal that Cancel button was pressed
                    return iup.CLOSE
                end
            }
            gaugeProgress = iup.progressbar{ rastersize="400x30", max=intMax } -- Set progress bar maximum range
            messageline = iup.label{ title=" ", expand="YES", alignment="ACENTER" }
            dlgProgress = iup.dialog{ title=strTitle, dialogframe="YES", background=StrWhite, -- Remove Windows minimize/maximize menu
                iup.vbox{ alignment="ACENTER", gap="10", margin="10x10",
                    messageline,
                    gaugeProgress,
                    cancelbutton
                }
            }
            dlgProgress.close_cb = cancelbutton.action -- Windows Close button acts as Cancel button
            dlgProgress:showxy(iup.CENTER, iup.CENTER) -- Show the Progress Display dialogue window
        end
    end,
    
    SetMessage = function(strMessage) -- Set the progress message
        if dlgProgress then messageline.title = strMessage end
    end,
    
    Step = function(iStep) -- Step the Progress Bar forward
        if dlgProgress then
            gaugeProgress.value = gaugeProgress.value + iStep
            local val = tonumber(gaugeProgress.value)
            local max = tonumber(gaugeProgress.max)
            if val > max then
                gaugeProgress.value = 0
            end
            iup.LoopStep()
        end
    end,
    
    Reset = function() -- Reset progress bar
        if dlgProgress then gaugeProgress.value = 0 end
    end,
    
    Cancel = function() -- Check if Cancel button pressed
        return cancelflag
    end,
    
    Close = function() -- Close the dialogue window
        cancelflag = false
        if dlgProgress then dlgProgress:destroy() dlgProgress = nil end
    end,
    
}

function records(type)
    local pi = fhNewItemPtr()
    local p2 = fhNewItemPtr()
    pi:MoveToFirstRecord(type)
    return function ()
        p2:MoveTo(pi)
        pi:MoveNext()
        if p2:IsNotNull() then return p2 end
    end
end

function createResultTable()
    -- create metatable
    local tblOutput_mt = {}
    tblOutput_mt.col = 0
    tblOutput_mt.seq = {}
    tblOutput_mt.__newindex = function (t,k,v)
        rawset(t,k,v) -- update original table
        local m = getmetatable(t)
        m.col = m.col + 1
        table.insert(m.seq,k)
    end
    tblOutput_mt.__call = function (t)
        local i = 0
        local m = getmetatable(t)
        local n = table.getn(m.seq)
        return function ()
            i = i + 1
            if i <= n then return t[m.seq[i]] end
        end
    end
    local tblOutput = {} -- Define Columns Table
    setmetatable(tblOutput, tblOutput_mt)
    local iC = 0 -- Define count of lines
    return tblOutput,iC
end


function OpenFile(strFileName,strMode)
    -- Open File and return Handle --
    local fileHandle, strError = io.open(strFileName,strMode)
    if not fileHandle then
        error("\n Unable to open file in \""..strMode.."\" mode. \n "..strFileName.." \n "..tostring(strError).." \n")
    end
    return fileHandle
end -- function OpenFile

function FileExists(strFileName)
    local fileHandle = io.open(strFileName,"r")
    if fileHandle ~= nil then
        io.close(fileHandle)
        return true
    else
        return false
    end
end

function LoadFromFile(strFileName)
    -- Load string from file --
    local fileHandle = OpenFile(strFileName,"rb")
    local strString = fileHandle:read("*all")
    assert(fileHandle:close())
    return strString
end -- function LoadFromFile

function string.starts(String,Start)
    return string.sub(String,1,string.len(Start))==Start
end
-------------------------------------- SQL Lite Functions
function opendb(dbname)
    -- Check for Settings Database and create if needed
    local db = fhGetPluginDataFileName()
    local dbenv = assert (luasql.sqlite3())
    -- connect to data source, if the file does not exist it will be created
    dbcon = assert (dbenv:connect(db))

    checkTable(dbcon,'medialist',
    [[CREATE TABLE medialist(recordid BIGINT, 
               md5 varchar(100), UNIQUE (recordid))
    ]])
    return dbenv,dbcon
end
function checkTable(dbcon,table,createString)
    local sql = string.format([[SELECT count(name) as count FROM sqlite_master WHERE type='table' AND name='%s']],table)
    local cur = assert(dbcon:execute(sql))
    local rowcount = cur:fetch (row, "a")
    cur:close()
    if tonumber(rowcount) == 0 then
        -- Table not found create it
        res,err = assert(dbcon:execute(createString))
    end
end
function closedb(dbenv,dbcon)
    dbcon:close()
    dbenv:close()
end
function countrecords(dbcon)
    local sql = [[SELECT count(recordid) as count FROM medialist]]
    local cur,err = assert(dbcon:execute(sql))
    local row = cur:fetch({},'a')
    cur:close()
    if row then
        return tonumber(row['count'])
    else
      return false
    end
end
function chkrecord(dbcon,recordid)
    local sql = string.format([[SELECT * FROM medialist where recordid=%s]],recordid)
    local cur,err = assert(dbcon:execute(sql))
    local row = cur:fetch({},'a')
    cur:close()
    if row then
        return row['md5']
    else
      return false
    end
end
function updaterecord(dbcon,recordid,md5)
        local sql = string.format("INSERT OR IGNORE INTO medialist (recordid,md5) VALUES (%s,'%s')",recordid,md5)
        local res = assert(dbcon:execute(sql))
end
function clearlist(dbcon)
        local sql = "DELETE FROM medialist"
        local res = assert(dbcon:execute(sql))
end

if fhGetAppVersion() > 6 then 
function table.getn(t)
local count = 0
for _, __ in pairs(t) do
count = count + 1
end
return count
end

function unpack(t)
    return table.unpack(t)
end
end


local function httpRequest(url)
  local http = luacom.CreateObject("winhttp.winhttprequest.5.1")
  http:Open("GET",url,false)
  http:Send()
  http:WaitForResponse(30)
  return http
end -- local function httpRequest
function loadrequire(module,extended)
  if not(extended) then extended = module end
  local function installmodule(module,filename,size)
    local function makesubdirs(path)
      local sep, pStr = package.config:sub(1, 1), ""
      for dir in path:gmatch("[^" .. sep .. "]+") do
        pStr = pStr .. dir .. sep
        lfs.mkdir(pStr)
      end        
    end
    local bmodule = false
    if not(filename) then
      filename = module..'.mod'
      bmodule = true
    end
    local storein = fhGetContextInfo('CI_APP_DATA_FOLDER')..'\\Plugins\\'
    -- Check if subdirectory needed
    local path = string.match(filename, "(.-)[^/]-[^%.]+$")
    if path ~= "" then
      path = path:gsub('/','\\')
      -- Create sub-directory
      makesubdirs(storein..path)
    end
    local attr = lfs.attributes(storein..filename)
    if attr and attr.mode == 'file' and attr.size == size then return true end
    -- Get file down and install it
    local url = "http://www.family-historian.co.uk/lnk/getpluginmodule.php?file="..filename
    local isOK, reply = pcall(httpRequest,url)
    if not isOK then
      fhMessageBox(reply.."\nLoad Require module finds the Internet inaccessible.")
      return false
    end
    local http = reply
    local status = http.StatusText
    if status == 'OK' then
      length = http:GetResponseHeader('Content-Length')
      data = http.ResponseBody
      if bmodule then
        local modlist = loadstring(http.ResponseBody)
        for x,y in pairs(modlist()) do
          if type(x) == 'number' and type(y) == 'string' then
            x = y -- revert to original 'filename' ipairs modlist
            y = 0
          end -- otherwise use ['filename']=size pairs modlist
          if not(installmodule(module,x,y)) then
            break
          end
        end
      else
        local function OpenFile(strFileName,strMode)
          local fileHandle, strError = io.open(strFileName,strMode)
          if not fileHandle then
            error("\n Unable to open file in \""..strMode.."\" mode. \n "..
              strFileName.." \n "..tostring(strError).." \n")
          end
          return fileHandle
        end -- OpenFile
        local function SaveStringToFile(strString,strFileName)
          local fileHandle = OpenFile(strFileName,"wb")
          fileHandle:write(strString)
          assert(fileHandle:close())
        end -- SaveStringToFile
        SaveStringToFile(data,storein..filename)
      end
      return true
    else
      fhMessageBox('An error occurred in Download, so please try later, or perform a manual download from the FHUG Knowledge Base')
      return false
    end
  end
  local function requiref(module)
    require(module)
  end
  local res, err = pcall(requiref,extended)
  if err then
    if  err:match("module '"..extended:gsub("(%W)","%%%1").."' not found") then
      local ans = fhMessageBox(
        'This plugin requires '..module..' support, please click OK to download and install the module, which may take a few minutes in some systems',
        'MB_OKCANCEL','MB_ICONEXCLAMATION')
      if ans ~= 'OK' then
        return false
      end
      if installmodule(module) then
        package.loaded[extended] = nil -- Reset Failed Load
        require(extended)
      else
        return false
      end
    else
      fhMessageBox('Error from require("'..module..'") command:\n'..(err or ''))
      return false
    end
  end
  return true
end


-------------------------------------- Call Main Function
require 'lfs'
require 'luacom'

require("iuplua");
iup.SetGlobal("CUSTOMQUITMESSAGE","YES");


ver = fhGetAppVersion()
if ver > 6 then 
	md5 = require('md5')
else
	if not( loadrequire( 'md5' ) ) then return end
end
mainfunction()

Source:Check-for-Possible-Duplicate-Media-4.fh_lua