Sujet : Re: Weird problem
De : Nobody447095 (at) *nospam* here-nor-there.org (B. Pym)
Groupes : comp.lang.lispDate : 06. Aug 2024, 05:46:09
Autres entêtes
Organisation : A noiseless patient Spider
Message-ID : <v8s69j$1c0n2$1@dont-email.me>
User-Agent : XanaNews/1.18.1.6
Pierre Mai wrote:
Here is a simple solution, which assumes that only one way exists to
split a word, and that a simple heuristic suffices to disambiguate
between possible matches at a given point (implemented are first-match
and longest-first-match, via the operations SIMPLE-FIND-PART and
GREEDY-FIND-PART).
If this isn't sufficient, you can either try to change the code below
to support some form of backtracking/non-determinism, or you can check
out Screamer, which is an extension to Common Lisp for non-determinstic
programming, which makes this task much easier IMHO. Screamer is by
Jeffrey Mark Siskind (http://www.neci.nj.nec.com/homepages/qobi).
I'd recommend using Screamer, since I'd imagine you will want to
process your word fragments further, and most things NLP will imply
some non-determinism.
Regs, Pierre.
;;; Utility function
(defun starts-with (string start-string &key (start 0))
(let ((start-length (+ start (length start-string))))
(and (>= (length string) start-length)
(string-equal string start-string :start1 start
:end1 start-length))))
;;; The different part-finders, which implement first-match and
;;; longest-first-match heuristics respectively.
(defun simple-find-part (string part-list &key (start 0))
(dolist (part part-list)
(when (starts-with string part :start start)
(return part))))
(defun greedy-find-part (string part-list &key (start 0))
(loop with result = nil
with length = 0
for part in part-list
do
(when (and (starts-with string part :start start)
(> (length part) length))
(setq result part
length (length part)))
finally
(return result)))
;;; The main function.
(defun break-apart (word part-finder &rest part-lists)
(loop with word-length = (length word)
for index = 0 then (+ index (length part))
for part-list in part-lists
for part = (funcall part-finder word part-list :start index)
never (or (not part) (>= index word-length))
collect part into result
finally
(return (and (= index word-length)
result))))
;;; Examples
#|
* (break-apart "astronaut" #'simple-find-part
'("as" "co" "ast") '("tro" "ro" "mp")
'("na" "ut") '("ut" "er"))
("as" "tro" "na" "ut")
* (break-apart "astronaut" #'greedy-find-part
'("as" "co" "ast") '("tro" "ro" "mp")
'("na" "ut") '("ut" "er"))
("ast" "ro" "na" "ut")
* (break-apart "astronaut" #'simple-find-part
'("as" "co" "ast") '("tro" "mp")
'("na" "ut") '("ut" "er"))
("as" "tro" "na" "ut")
* (break-apart "astronaut" #'greedy-find-part
'("as" "co" "ast") '("tro" "mp")
'("na" "ut") '("ut" "er"))
NIL
newLISP
(define (cartesian-product lists)
(if (null? lists)
'(())
(let (subproduct (cartesian-product (rest lists)))
(apply append
(map
(fn (x) (map (fn (xs) (cons x xs)) subproduct))
(first lists))))))
(define (good? xs) (= (apply string xs) "magnetohydrodynamics"))
(filter good?
(cartesian-product
'(("mag" "ma" "ho" "magn" "in")
("eto" "net" "et")
("ohy" "o" "od" "oh")
("hy" "hyd" "ma" "hi")
("od" "drod" "rod")
("y" "yj" "yn" "yna")
("m" "am" "nam" "nami")
("ic" "is" "i")
("s" "cs"))))
(("mag" "net" "o" "hy" "drod" "y" "nam" "ic" "s")
("mag" "net" "o" "hy" "drod" "y" "nam" "i" "cs")
("mag" "net" "o" "hy" "drod" "yn" "am" "ic" "s")
("mag" "net" "o" "hy" "drod" "yn" "am" "i" "cs")
("mag" "net" "o" "hy" "drod" "yna" "m" "ic" "s")
("mag" "net" "o" "hy" "drod" "yna" "m" "i" "cs")
("mag" "net" "o" "hyd" "rod" "y" "nam" "ic" "s")
("mag" "net" "o" "hyd" "rod" "y" "nam" "i" "cs")
("mag" "net" "o" "hyd" "rod" "yn" "am" "ic" "s")
("mag" "net" "o" "hyd" "rod" "yn" "am" "i" "cs")
("mag" "net" "o" "hyd" "rod" "yna" "m" "ic" "s")
("mag" "net" "o" "hyd" "rod" "yna" "m" "i" "cs")
("magn" "et" "o" "hy" "drod" "y" "nam" "ic" "s")
("magn" "et" "o" "hy" "drod" "y" "nam" "i" "cs")
("magn" "et" "o" "hy" "drod" "yn" "am" "ic" "s")
("magn" "et" "o" "hy" "drod" "yn" "am" "i" "cs")
("magn" "et" "o" "hy" "drod" "yna" "m" "ic" "s")
("magn" "et" "o" "hy" "drod" "yna" "m" "i" "cs")
("magn" "et" "o" "hyd" "rod" "y" "nam" "ic" "s")
("magn" "et" "o" "hyd" "rod" "y" "nam" "i" "cs")
("magn" "et" "o" "hyd" "rod" "yn" "am" "ic" "s")
("magn" "et" "o" "hyd" "rod" "yn" "am" "i" "cs")
("magn" "et" "o" "hyd" "rod" "yna" "m" "ic" "s")
("magn" "et" "o" "hyd" "rod" "yna" "m" "i" "cs"))