On Thu, 25 Apr 2024 17:56:06 +0300
Michael S <
already5chosen@yahoo.com> wrote:
On Sat, 20 Apr 2024 21:10:23 +0300
Michael S <already5chosen@yahoo.com> wrote:
On Fri, 19 Apr 2024 14:59:20 -0700
Tim Rentsch <tr.17687@z991.linuxsc.com> wrote:
Did you mean you some algorithms whose worst case memory
behavior is strictly less than O( total number of pixels )?
I think it would be helpful to adopt a standard terminology
where the pixel field is of size M x N, otherwise I'm not
sure what O(N) refers to.
No, I mean O(max(M,N)) plus possibly some logarithmic component that
loses significance when images grow bigger.
More so, if bounding rectangle of the shape is A x B then I'd like
memory requirements to be O(max(A,B)), but so far it does not appear
to be possible, or at least not possible without significant
complications and further slowdown. So, as an intermediate goal I am
willing to accept that allocation would be O(max(M,N)). but amount
of touched memory is O(max(A,B)).
but so
far they are all unreasonably slow - ~5 times slower than
the best.
I'm no longer working on the problem but I'm interested to
hear what you come up with.
Here is what I had in mind.
I tried to optimize as little as I can in order to make it as simple
as I can. Unfortunately, I am not particularly good at it, so, code
still contains few unnecessary "tricks" that make understanding a
little harder.
The code uses VLA and recursion for the same purpose of making it less
tricky.
If desired, the memory footprint could be easily reduced by factor of
8 through use of packed bit arrays instead arrays of _Bool.
Even in this relatively crude form for majority of shapes this code is
blazingly fast.
Unfortunately, in the worst case (both 'slalom' shapes) an execution
time is O(max(A,B)**3) which makes it unfit as general-purpose
routine. At the moment I don't see a solution for this problem.
Overall, it's probably a dead end.
>
A solution (sort of) is in line with the famous quite of David Wheeler
- to turn todo lists from bit maps into arrays of
abscesses-or-ordinates of contact points.
The cost is a memory footprint - 4x bigger than the previous version, 32
times bigger than above-mentioned "packed" variant of the previous
version. But in BigO sense it's the same.
In my tests it reduced the worst case time from O(max(A,B)**3) to
O(A*B*log(max(A,B)). Which is non-ideal, but probably acceptable,
because the bad cases should be very rare in practice.
The real trouble is different - I don't know if my "worst case" is
really the worst.
The code below is for presentation of algorithm in both clear and
compact manner, with emphasis on symmetry between x and y directions.
It is not optimal in any sense and can be made no-trivially faster both
by algorithm enhancements an by specialization of critical loops.
#include <stddef.h>
#include <string.h>
typedef unsigned char Color;
enum coordinate_axes {
x_i = 0, y_i, // index of pos[], ld[], 1st index of limits[][],
todo[][] };
enum from_to {
from_i = 0, to_i // 2nd index of limits[][], todo[][], I use 0 and 1
more commonly };
enum { // indices of todo[] lists
le_i = x_i*2+from_i, ri_i = x_i*2+to_i,
up_i = y_i*2+from_i, dn_i = y_i*2+to_i,
};
#define IDX2INC(ft_idx) ((int)(ft_idx)*2 - 1)
#define X2Y(axis) ((axis) ^ 1)
typedef struct {
Color* image;
Color old_color, new_color;
ptrdiff_t ld[2]; // {1, width}
int limits[2][2]; // {{0, width-1}, {0, height-1}
} floodfill4_param;
typedef struct {
int *todo[4]; // {left,right, up, down} - first item holds the #
of active entries int limits[2][2]; // {{x0, x1}, {y0, y1}};
int pos[2]; // {x, y}
} floodfill4_state;
static void floodfill4_core(
const floodfill4_param* prm,
const floodfill4_state* arg);
static _Bool floodfill4_expand(
const floodfill4_param* prm,
floodfill4_state* s,
enum coordinate_axes axis, enum from_to ft_idx);
int floodfill4(
Color* image,
int width, int height,
int x, int y,
Color old_color, Color new_color)
{
if (width <= 0 || height <= 0)
return 0;
if (x < 0 || x >= width || y < 0 || y >= height)
return 0;
if (image[(size_t)width*y+x] != old_color)
return 0;
int lr_todo[2][height+1];
int ud_todo[2][width+1];
floodfill4_param prm = {
.image = image,
.ld[x_i] = 1,
.ld[y_i] = width,
.limits = {{ 0, width-1}, {0, height-1}},
.old_color = old_color,
.new_color = new_color,
};
floodfill4_state s = {
.todo[le_i] = lr_todo[0],
.todo[ri_i] = lr_todo[1],
.todo[up_i] = ud_todo[0],
.todo[dn_i] = ud_todo[1],
.limits[x_i] = {x, x},
.limits[y_i] = {y, y},
.pos[x_i] = x, .pos[y_i] = y,
};
for (int i = 0; i < 4; ++i)
*s.todo[i] = 0;
// process central 1x1 rectangle
floodfill4_core(&prm, &s);
// expansion loop
for (unsigned idx = 0; idx < 4;) {
if (floodfill4_expand(&prm, &s, idx/2, idx % 2)) { // try to expand
idx = 0; // expansion succeed - restart from beginning
continue;
}
++idx;
}
return 1;
}
static __inline
void floodfill4_add(int* list, int val)
{
int n = list[0];
list[n+1] = val;
list[0] = n + 1;
}
// floodfill4_core - floodfill4 recursively in divide and conquer
fashion // arg->*_todo arrays (lists) initialized by caller.
// floodfill4_core adds to *_todo values that indicate need for further
// action, but never removes anything
static void floodfill4_core(const floodfill4_param* prm, const
floodfill4_state* arg) {
const int ni[2] = {
arg->limits[x_i][1]-arg->limits[x_i][0],
arg->limits[y_i][1]-arg->limits[y_i][0],
};
if (ni[x_i] + ni[y_i] == 0) { // nx==ny==1
prm->image[prm->ld[y_i]*arg->limits[y_i][0]+arg->limits[x_i][0]] =
prm->new_color; floodfill4_add(arg->todo[le_i], arg->limits[y_i][0]);
floodfill4_add(arg->todo[ri_i], arg->limits[y_i][0]);
floodfill4_add(arg->todo[up_i], arg->limits[x_i][0]);
floodfill4_add(arg->todo[dn_i], arg->limits[x_i][0]);
return;
}
floodfill4_state args[2];
args[0] = args[1] = *arg;
const enum coordinate_axes axis = ni[x_i] > ni[y_i] ?
x_i : // split vertically
y_i ; // split horizontally
int todo[2][ni[X2Y(axis)]+2]; // contacts between halves
const int hpos = (arg->limits[axis][0] + arg->limits[axis][1])/2; //
split point args[0].todo[axis*2+1] = todo[0]; args[0].limits[axis][1]
= hpos; args[1].todo[axis*2+0] = todo[1]; args[1].limits[axis][0] =
hpos + 1; int todo_i = arg->pos[axis] > hpos;
todo[todo_i][0] = 0; // empty todo list
floodfill4_core(prm, &args[todo_i]);
if (todo[todo_i][0] != 0) {
// do ping-pong between halves
args[0].pos[axis] = hpos;
args[1].pos[axis] = hpos+1;
const ptrdiff_t lda = prm->ld[axis];
const ptrdiff_t ldb = prm->ld[X2Y(axis)];
Color* edge = &prm->image[lda*hpos];
do {
// look for contact points on destination edge
int* from = todo[todo_i];
Color *edge_dst = todo_i ? edge : edge + lda;
todo_i = 1 - todo_i;
todo[todo_i][0] = 0;
int np = *from++;
do {
int pos = *from++;
if (edge_dst[pos*ldb] == prm->old_color) { // contact found
args[todo_i].pos[X2Y(axis)] = pos;
floodfill4_core(prm, &args[todo_i]);
}
} while (--np);
} while (todo[todo_i][0] != 0);
}
}
static
_Bool floodfill4_expand(
const floodfill4_param* prm,
floodfill4_state* s,
enum coordinate_axes axis,
enum from_to ft_idx)
{ // try to expand
int* src_todo = s->todo[axis*2+ft_idx];
if (*src_todo == 0)
return 0;
int src_pos = s->limits[axis][ft_idx];
if (src_pos == prm->limits[axis][ft_idx]) {
*src_todo = 0;
return 0;
}
typedef struct {
int pos0, pos1;
} interval_t;
const ptrdiff_t lda = prm->ld[axis];
const ptrdiff_t ldb = prm->ld[X2Y(axis)];
Color* src_col = &prm->image[lda*src_pos];
Color* exp_col = &src_col[lda*IDX2INC(ft_idx)];
const int ort_limit0 = s->limits[X2Y(axis)][0];
const int ort_limit1 = s->limits[X2Y(axis)][1];
const Color c0 = exp_col[ldb*ort_limit0]; // preserve upper corner
const Color c1 = exp_col[ldb*ort_limit1]; // preserve lower corner
interval_t workbuf[(ort_limit1 - ort_limit0+2)/2];
interval_t* wr = workbuf;
s->pos[axis] = src_pos;
int n_todo = src_todo[0];
do {
// look for contact
int pos = src_todo[n_todo--]; // use src_todo as stack, popping
from the top Color* pt = &exp_col[ldb*pos];
if (*pt == prm->old_color) { // contact found
*pt = prm->new_color;
// extend backward
Color* p = pt - ldb;
int pos0;
for (pos0 = pos-1; pos0 >= ort_limit0 && *p == prm->old_color; p
-= ldb, --pos0) *p = prm->new_color;
pos0 += 1;
// extend forward
p = pt + ldb;
int pos1;
for (pos1 = pos+1; pos1 <= ort_limit1 && *p == prm->old_color; p
+= ldb, ++pos1) *p = prm->new_color;
pos1 -= 1;
// add interval to result list
wr->pos0 = pos0;
wr->pos1 = pos1;
++wr;
if (pos0 != pos1) {
// bounce - apply new found interval to original rectangle
src_todo[0] = n_todo;
pos = pos0;
p = &src_col[ldb*pos];
do {
if (*p == prm->old_color) { // contact found
s->pos[X2Y(axis)] = pos;
floodfill4_core(prm, s);
n_todo = src_todo[0];
++pos;
p += ldb;
}
p += ldb;
} while (++pos <= pos1);
}
}
} while (n_todo != 0);
if (wr == workbuf)
return 0; // rectangle not expanded
// rectangle expanded
// handle corners of expanded rectangle
int exp_pos = src_pos + IDX2INC(ft_idx);
s->limits[axis][ft_idx] = exp_pos;
if (exp_col[ldb*ort_limit0] != c0) // corner0 modified
floodfill4_add(s->todo[X2Y(axis)*2+0], exp_pos); // add to todo0
list if (exp_col[ldb*ort_limit1] != c1) // corner1
modified floodfill4_add(s->todo[X2Y(axis)*2+1], exp_pos); // add to
todo1 list
// turn intervals to list
interval_t* rd = workbuf;
int* dst_todo = &src_todo[1];
do {
int pos = rd->pos0;
int pos1 = rd->pos1;
do
*dst_todo++ = pos;
while (++pos <= pos1);
++rd;
} while (rd != wr);
src_todo[0] = dst_todo - src_todo - 1;
return 1;
}