On Wed, 29 May 2024 00:54:23 +0100
bart <
bc@freeuk.com> wrote:
I suspect that your system just has a much faster fgetc
implementation. How long does an fgetc() loop over a 100MB input take
on your machine?
On mine it's about 2 seconds on Windows, and 3.7 seconds on WSL.
Using DMC, it's 0.65 seconds.
Your suspicion proved incorrect, but it turned out to be pretty good
question!
#include <stdio.h>
static const char usage[] =
"fgetc_test - read file with fgetc() and calculate xor checksum\n"
"Usage:\n"
" fgetc_test infile\n"
;
int main(int argz, char** argv)
{
// process command line
if (argz < 2) {
fprintf(stderr, "%s", usage);
return 1;
}
char* infilename = argv[1];
static const char *help_aliases[] = { "-h", "-H", "-?", "--help",
"--?" }; const int n_help_aliases =
sizeof(help_aliases)/sizeof(help_aliases[0]); for (int i = 0; i <
n_help_aliases; ++i) { if (strcmp(infilename, help_aliases[i])==0) {
fprintf(stderr, "%s", usage);
return 0;
}
}
// open files
FILE* fpin = fopen(infilename, "rb");
if (!fpin) {
perror(infilename);
return 1;
}
size_t n = 0;
unsigned char cs = 0;
int c;
while ((c = fgetc(fpin)) >= 0) {
cs ^= (unsigned char)c;
++n;
}
if (ferror(fpin)) {
perror(infilename);
return 1;
}
printf("%zd byte. xor sum %d.\n", n, cs);
return 0;
}
$ time ../quick_xxd/getc_test.exe uu.txt
193426754 byte. xor sum 1.
real 0m3.604s
user 0m0.000s
sys 0m0.000s
52 MB/s. Very very slow!
The same test with getc() instead of fgetc().
$ time ../quick_xxd/getc_test.exe uu.txt
193426754 byte. xor sum 1.
real 0m3.588s
user 0m0.000s
sys 0m0.000s
54 MB/s. Almost the same as above.
So, may be, fgetc() is not at fault? May be, its OS and the crap that
the corporate IT adds on top of the OS?
Let's test this hipothesys.
#include <stdio.h>
static const char usage[] =
"fread_test - read file with fread() and calculate xor checksum\n"
"Usage:\n"
" fread_test infile\n"
;
int main(int argz, char** argv)
{
// process command line
if (argz < 2) {
fprintf(stderr, "%s", usage);
return 1;
}
char* infilename = argv[1];
static const char *help_aliases[] = { "-h", "-H", "-?", "--help",
"--?" }; const int n_help_aliases =
sizeof(help_aliases)/sizeof(help_aliases[0]); for (int i = 0; i <
n_help_aliases; ++i) { if (strcmp(infilename, help_aliases[i])==0) {
fprintf(stderr, "%s", usage);
return 0;
}
}
// open files
FILE* fpin = fopen(infilename, "rb");
if (!fpin) {
perror(infilename);
return 1;
}
size_t n = 0;
unsigned char cs = 0;
for (;;) {
enum { BUF_SZ = 128*1024 };
unsigned char inpbuf[BUF_SZ];
size_t len = fread(inpbuf, 1, BUF_SZ, fpin);
n += len;
for (int i = 0; i < (int)len; ++i)
cs ^= inpbuf[i];
if (len != BUF_SZ)
break;
}
if (ferror(fpin)) {
perror(infilename);
return 1;
}
printf("%zd byte. xor sum %d.\n", n, cs);
return 0;
}
$ time ../quick_xxd/fread_test.exe uu.txt
193426754 byte. xor sum 1.
real 0m0.312s
user 0m0.000s
sys 0m0.000s
$ time ../quick_xxd/fread_test.exe uu.txt
193426754 byte. xor sum 1.
real 0m0.109s
user 0m0.000s
sys 0m0.000s
$ time ../quick_xxd/fread_test.exe uu.txt
193426754 byte. xor sum 1.
real 0m0.094s
user 0m0.000s
sys 0m0.000s
So, at least for reading of multi-megabyte file the OS and corporate
crap are not holding me back. The first read is 620 MB/s - as expected
for SATA-3 SSD. Repeating reads are from OS cache - not as fast as on
Linux, but fast enough to not be a bottleneck in our xxd replacement
gear.
So, let's rewrite our tiny app with fread().
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static const char usage[] =
"bin_to_list - convert binary file to comma-delimited list of decimal
numbers\n" "Usage:\n"
" bin_to_list infile [oufile]\n"
"When output file is not specified, the result is written to standard
output.\n" ;
int main(int argz, char** argv)
{
// process command line
if (argz < 2) {
fprintf(stderr, "%s", usage);
return 1;
}
char* infilename = argv[1];
static const char *help_aliases[] = { "-h", "-H", "-?", "--help",
"--?" }; const int n_help_aliases =
sizeof(help_aliases)/sizeof(help_aliases[0]); for (int i = 0; i <
n_help_aliases; ++i) { if (strcmp(infilename, help_aliases[i])==0) {
fprintf(stderr, "%s", usage);
return 0;
}
}
// open files
FILE* fpin = fopen(infilename, "rb");
if (!fpin) {
perror(infilename);
return 1;
}
FILE* fpout = stdout;
char* outfilename = NULL;
if (argz > 2) {
outfilename = argv[2];
fpout = fopen(outfilename, "w");
if (!fpout) {
perror(outfilename);
fclose(fpin);
return 1;
}
}
enum { MAX_CHAR_PER_LINE = 80, MAX_CHAR_PER_NUM = 4,
ALMOST_FULL_THR = MAX_CHAR_PER_LINE-MAX_CHAR_PER_NUM };
// Initialize table
unsigned char bin2dec[256][MAX_CHAR_PER_NUM+1];
// bin2dec[MAX_CHAR_PER_NUM] => length
for (int i = 0; i < 256;++i) {
char tmp[8];
int len = sprintf(tmp, "%d,", i);
memcpy(bin2dec[i], tmp, MAX_CHAR_PER_NUM);
bin2dec[i][MAX_CHAR_PER_NUM] = (unsigned char)len;
}
// main loop
int err = 0;
unsigned char outbuf[MAX_CHAR_PER_LINE+MAX_CHAR_PER_NUM];
// provide space for EOL
unsigned char* outptr = outbuf;
for (;;) {
enum { BUF_SZ = 128*1024 };
unsigned char inpbuf[BUF_SZ];
size_t len = fread(inpbuf, 1, BUF_SZ, fpin);
for (int i = 0; i < (int)len; ++i) {
unsigned char* dec = bin2dec[inpbuf[i] & 255];
memcpy(outptr, dec, MAX_CHAR_PER_NUM);
outptr += dec[MAX_CHAR_PER_NUM];
if (outptr > &outbuf[ALMOST_FULL_THR]) { // spill output buffer
*outptr++ = '\n';
ptrdiff_t wrlen = fwrite(outbuf, 1, outptr-outbuf, fpout);
if (wrlen != outptr-outbuf) {
err = 2;
break;
}
outptr = outbuf;
}
}
if (err || len != BUF_SZ)
break;
}
if (ferror(fpin)) {
perror(infilename);
err = 1;
}
// last line
if (outptr != outbuf && err == 0) {
*outptr++ = '\n';
ptrdiff_t wrlen = fwrite(outbuf, 1, outptr-outbuf, fpout);
if (wrlen != outptr-outbuf)
err = 2;
}
// completion and cleanup
if (err == 2 && outfilename)
perror(outfilename);
fclose(fpin);
if (outfilename) {
fclose(fpout);
if (err)
remove(outfilename);
}
return err;
}
Now the test. Input file size: 88,200,192 bytes
$ time ../quick_xxd/bin_to_listmb
/d/intelFPGA/18.1/quartus/bin64/db_wys.dll uu.txt
real 0m0.577s
user 0m0.000s
sys 0m0.000s
152.8 MB/s. That's much better. Some people would even say that it is
good enough.