Re: program to remove duplicates
Sujet : Re: program to remove duplicates
De : fir (at) *nospam* grunge.pl (fir)
Groupes : comp.lang.cDate : 21. Sep 2024, 23:48:05
Autres entêtes
Organisation : i2pn2 (i2pn.org)
Message-ID : <719cc7f564ea96ce3670134433c77e96cdf4b99d@i2pn2.org>
References : 1 2 3 4 5
User-Agent : Mozilla/5.0 (Windows NT 5.1; rv:27.0) Gecko/20100101 Firefox/27.0 SeaMonkey/2.24
okay thet previous code has soem errors but i make changes and this one seem to work
i put it on a 50 HB files from recuva and it moved about 22 GB as duplicates... by the eye test it seem to work
#include<windows.h>
#include<stdio.h>
void StrCopyMaxNBytes(char* dest, char* src, int n)
{
for(int i=0; i<n; i++) { dest[i]=src[i]; if(!src[i]) break; }
}
//list of file names
const int FileNameListEntry_name_max = 500;
struct FileNameListEntry { char name[FileNameListEntry_name_max]; unsigned int file_size; int is_duplicate; };
FileNameListEntry* FileNameList = NULL;
int FileNameList_Size = 0;
void FileNameList_AddOne(char* name, unsigned int file_size)
{
FileNameList_Size++;
FileNameList = (FileNameListEntry*) realloc(FileNameList, FileNameList_Size * sizeof(FileNameListEntry) );
StrCopyMaxNBytes((char*)&FileNameList[FileNameList_Size-1].name, name, FileNameListEntry_name_max);
FileNameList[FileNameList_Size-1].file_size = file_size;
FileNameList[FileNameList_Size-1].is_duplicate = 0;
return ;
}
// collect list of filenames
WIN32_FIND_DATA ffd;
void ReadDIrectoryFileNamesToList(char* dir)
{
HANDLE h = FindFirstFile(dir, &ffd);
if(!h) { printf("error reading directory"); exit(-1);}
do {
if (!(ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY))
{
FileNameList_AddOne(ffd.cFileName, ffd.nFileSizeLow);
if(ffd.nFileSizeHigh!=0) { printf("this program only work for files up to 4GB"); exit(-1);}
}
}
while (FindNextFile(h, &ffd));
}
#include <sys/stat.h>
int GetFileSize2(char *filename)
{
struct stat st;
if (stat(filename, &st)==0) return (int) st.st_size;
printf("\n *** error obtaining file size for %s", filename); exit(-1);
return -1;
}
int FolderExist(char *name)
{
static struct stat st;
if(stat(name, &st) == 0 && S_ISDIR(st.st_mode)) return 1;
return 0;
}
//////////
unsigned char* bytes2 = NULL;
int bytes2_size = 0;
int bytes2_allocked = 0;
unsigned char* bytes2_resize(int size)
{
bytes2_size=size;
return bytes2=(unsigned char*)realloc(bytes2, bytes2_size*sizeof(unsigned char));
}
void bytes2_load(unsigned char* name)
{
int flen = GetFileSize2(name);
FILE *f = fopen(name, "rb");
if(!f) { printf( "errot: cannot open file %s for load ", name); exit(-1); }
int loaded = fread(bytes2_resize(flen), 1, flen, f);
fclose(f);
}
/////////////////
unsigned char* bytes1 = NULL;
int bytes1_size = 0;
int bytes1_allocked = 0;
unsigned char* bytes1_resize(int size)
{
bytes1_size=size;
return bytes1=(unsigned char*)realloc(bytes1, bytes1_size*sizeof(unsigned char));
}
void bytes1_load(unsigned char* name)
{
int flen = GetFileSize2(name);
FILE *f = fopen(name, "rb");
if(!f) { printf( "errot: cannot open file %s for load ", name); exit(-1); }
int loaded = fread(bytes1_resize(flen), 1, flen, f);
fclose(f);
}
/////////////
int CompareTwoFilesByContentsAndSayIfEqual(char* file_a, char* file_b)
{
bytes1_load(file_a);
bytes2_load(file_b);
if(bytes1_size!=bytes2_size) { printf("\n something is wrong compared files assumed to be be same size"); exit(-1); }
for(unsigned int i=0; i<bytes1_size;i++)
if(bytes1[i]!=bytes2[i]) return 0;
return 1;
}
#include<direct.h>
#include <dirent.h>
#include <errno.h>
int duplicates_moved = 0;
void MoveDuplicateToSubdirectory(char*name)
{
if(!FolderExist("duplicates"))
{
int n = _mkdir("duplicates");
if(n) { printf ("\n i cannot create subfolder"); exit(-1); }
}
static char renamed[1000];
int n = snprintf(renamed, sizeof(renamed), "duplicates\%s", name);
if(rename(name, renamed))
{printf("\n rename %s %s failed", name, renamed); exit(-1);}
duplicates_moved++;
}
int main()
{
printf("\n (RE)MOVE FILE DUPLICATES");
printf("\n ");
printf("\n this program searches for binaric (comparec byute to byte)");
printf("\n duplicates/multiplicates of files in its own");
printf("\n folder (no search in subdirectories, just flat)");
printf("\n and if found it copies it into 'duplicates'");
printf("\n subfolder it creates If you want to remove that");
printf("\n duplicates you may delete the subfolder then,");
printf("\n if you decided to not remove just move the contents");
printf("\n of 'duplicates' subfolder back");
printf("\n ");
printf("\n note this program not work on files larger than 4GB ");
printf("\n and no warranty at all youre responsible for any dameges ");
printf("\n if use of this program would eventually do - i just wrote ");
printf("\n the code and it work for me but not tested it to much besides");
printf("\n ");
printf("\n september 2024");
printf("\n ");
printf("\n starting.. ");
ReadDIrectoryFileNamesToList("*");
printf("\n\n found %d files in current directory", FileNameList_Size);
for(int i=0; i< FileNameList_Size; i++)
printf("\n #%d %s %d", i, FileNameList[i].name, FileNameList[i].file_size );
// return 'ok';
for(int i=0; i< FileNameList_Size; i++)
{
if(FileNameList[i].is_duplicate) continue;
for(int j=i+1; j< FileNameList_Size; j++)
{
if(FileNameList[j].is_duplicate) continue;
if(FileNameList[i].file_size!=FileNameList[j].file_size) continue;
if( CompareTwoFilesByContentsAndSayIfEqual(FileNameList[i].name, FileNameList[j].name))
{
printf("\n#%d %s (%d) has duplicate #%d %s (%d) ",i, FileNameList[i].name,FileNameList[i].file_size, j, FileNameList[j].name, FileNameList[j].file_size);
FileNameList[j].is_duplicate=1;
// MoveDuplicateToSubdirectory(FileNameList[i].name);
}
}
}
printf("\n moving duplicates to subfolder...");
for(int i=0; i< FileNameList_Size; i++)
{
if(FileNameList[i].is_duplicate) MoveDuplicateToSubdirectory(FileNameList[i].name);
}
printf(" \n\n %d duplicates moved \n\n\n", duplicates_moved);
return 'ok';
}
Haut de la page
Les messages affichés proviennent d'usenet.
NewsPortal