Sujet : Re: ( Substring function in Python, Lisp) -- [Hijack] contains [hijk]
De : enometh (at) *nospam* meer.net (Madhu)
Groupes : comp.lang.lispDate : 16. Feb 2025, 01:56:38
Autres entêtes
Organisation : Motzarella
Message-ID : <m3o6z2wvuh.fsf@leonis4.robolove.meer.net>
References : 1
Pro tip: sanitise replies to Hen Hannas posts with emacs with
(replace-regexp "^>+[ \t]+" "> ")
* HenHanna <7513d4f4cf89bfd31edda3eb5ed84052@
www.novabbs.com> :
Wrote on Sat, 15 Feb 2025 21:36:20 +0000:
>
defrag defg
defang defg
defog defg
>
hijack hijk
>
________________________________(Is there such a word containing 5
letters? )
Not in my /usr/share/dict/words.
(defun intern-runs (word &optional hash-table)
(let ((pos 0) (len 1) (pos-max 0) (len-max 1))
(labels ((finish ()
(when (and (> len 1)
(>= len len-max))
(setq pos-max pos len-max len)
(when hash-table
(pushnew word
(gethash (subseq word pos-max (+ pos-max len-max))
hash-table nil)
:test #'equal)))))
(loop for index from 0 for c across word
for prev = nil then curr
for curr = (char-code c)
when prev do
(cond ((= (1+ prev) curr) (incf len))
(t (finish) (setq pos index len 1)))
finally (finish))
(unless (zerop len-max)
(cons pos-max len-max)))))
(intern-runs "hijacks")
;; => (0 . 3) - longest substr of length 3 at position 0
(defvar $h (make-hash-table :test #'equal))
(clrhash $h)
(intern-runs "abcdxyz" $h)
;; inspect $h -> keys are substrings whose characters are consecutive
;; values are the strings of which the keys are substrings
(clrhash $h)
(defvar $a (slurp-lines "/usr/share/dict/words"))
(length $a)
;; => 234937
(time (map nil (lambda (w) (intern-runs w $h)) $a))
;; terrible performance
(hash-table-count $h)
;; => 37
there are 37 "runs" i.e. substrings of words which are consecutive
chars.
(apply #'max (mapcar 'length (hash-keys $h))) ; 4
;; maxiumum length is 4.
(remove-if-not (lambda (x) (= (length x) 4)) (hash-keys $h))
;; => ("rstu" "mnop")
;; largest strings are "rstu" and "mnop"
(gethash "rstu" $h)
("understuffing" "understuff" "understudy" "superstuff" "overstuff" "overstudy"
"overstudiousness" "overstudiously" "overstudious" "overstudied" "overstud"
"afterstudy")
;; how many words are threre for each "run"
(let (ret)
(maphash (lambda (k v)
(push (cons k (length v)) ret))
$h)
(sort ret #'> :key #'cdr))
(("st" . 20497) ("de" . 12816) ("no" . 10914) ("op" . 10215) ("hi" . 9962)
("ab" . 8882) ("tu" . 4052) ("rs" . 3286) ("ef" . 1861) ("gh" . 1619)
("lm" . 959) ("kl" . 753) ("nop" . 737) ("mn" . 598) ("xy" . 574)
("stu" . 487) ("def" . 400) ("rst" . 340) ("uv" . 255) ("bc" . 206)
("yz" . 135) ("mno" . 134) ("ij" . 71) ("ghi" . 60) ("cd" . 20) ("mnop" . 19)
("rstu" . 12) ("fg" . 6) ("abc" . 4) ("cde" . 4) ("hij" . 2) ("fgh" . 2)
("pq" . 2) ("lmn" . 1) ("qr" . 1) ("efg" . 1) ("ijk" . 1))