(use goo)
(dm consonant? (s|<str> i|<int> => <log>)
(def c (elt s i))
(case c
((#\a #\e #\i #\o #\u)
#f)
((#\y)
(if (= i 0)
#t
(not (consonant? s (- i 1))))
)
(#t
#t)
)
)
(dm measure-consonant-sequences (s|<str> => <int>)
(def len-str (len s))
(rep loop ((iChar 0) (con-spans 0) (inCons #t))
(if (< iChar len-str)
(if (consonant? s iChar)
(if inCons
(loop (+ iChar 1) con-spans #t) (loop (+ iChar 1) (+ con-spans 1) #t)) (if inCons
(loop (+ iChar 1) con-spans #f) (loop (+ iChar 1) con-spans #f))) con-spans
)
)
)
(dm vowel-in-string? (s|<str> => <log>)
(< (measure-consonant-sequences s) (len s))
)
(df ends-with-double-consonant? (s|<str> => <log>)
(def l (len s))
(and (>= l 2)
(= (elt s (- l 1)) (elt s (- l 2)))
(consonant? s (- l 1))
)
)
(df ends-with-cvc? (s|<str> => <log>)
(def l (len s))
(and (>= l 3)
(consonant? s (- l 1))
(not (consonant? s (- l 2)))
(consonant? s (- l 3))
(case (elt s (- l 1))
((#\w #\x #\y)
#f)
(#t
#t)
)
)
)
(df last-char (s|<str> => (t? <chr>))
(def l (len s))
(when (> l 0)
(elt s (- l 1))
)
)
(df str-left-sub (s|<str> n|<int>)
(if (< n 0)
(sub s 0 (+ n (len s)))
(sub s 0 n)
)
)
(df str-rev-elt (s|<str> n|<int>)
(elt s (- (- (len s) n) 1))
)
(dm step1 (s|<str>)
(if (suffix? s "s")
(seq
(if (suffix? s "sses")
(step1-b (str-left-sub s -2))
(if (suffix? s "ies")
(step1-b (str-left-sub s -2))
(if (~= (str-rev-elt s 1) #\s)
(step1-b (str-left-sub s -1))
(step1-b s)
)
)
)
)
(step1-b s)
)
)
(dm step1-b (s|<str>)
(if (suffix? s "eed")
(if (> (measure-consonant-sequences (str-left-sub s -3)) 0)
(str-left-sub s -1)
s)
(cond ((and (suffix? s "ed")
(vowel-in-string? (str-left-sub s -2)))
(or (step1-c (str-left-sub s -2))
s)
)
((and (suffix? s "ing")
(vowel-in-string? (str-left-sub s -3)))
(or (step1-c (str-left-sub s -3))
s)
)
(#t
s)
)
)
)
(dm step1-c (s|<str>)
(cond ((suffix? s "at")
(cat s "e") )
((suffix? s "bl")
(cat s "e") )
((suffix? s "iz")
(cat s "e") )
((ends-with-double-consonant? s)
(case (last-char s)
((#\l #\s #\z)
s )
(#t
(str-left-sub s -1) )
)
)
((and (= (measure-consonant-sequences s) 1)
(ends-with-cvc? s))
(cat s "e")
)
(#t
s
)
)
)
(dm step2 (s|<str>)
(if (and (suffix? s "y")
(vowel-in-string? (str-left-sub s -1)))
(cat (str-left-sub s -1) "i")
s
)
)
(df step3-transform (s|<str> suf|<str> rwith|<str>)
(when (suffix? s suf)
(def l (len s))
(def root (sub s 0 (- l (len suf))))
(when (> (measure-consonant-sequences root) 0)
(cat root rwith)
)
)
)
(dm step3 (s|<str>)
(def l (len s))
(if (>= l 2)
(seq
(case (str-rev-elt s 1)
((#\a)
(or (step3-transform s "ational" "ate")
(step3-transform s "tional" "tion")
s
)
)
((#\c)
(or (step3-transform s "enci" "ence")
(step3-transform s "anci" "ance")
s
)
)
((#\e)
(or (step3-transform s "izer" "ize")
s
)
)
((#\l)
(or (step3-transform s "bli" "ble")
(step3-transform s "alli" "al")
(step3-transform s "entli" "ent")
(step3-transform s "eli" "e")
(step3-transform s "ousli" "ous")
s
)
)
((#\o)
(or (step3-transform s "ization" "ize")
(step3-transform s "ation" "ate")
(step3-transform s "ator" "ate")
s
)
)
((#\s)
(or (step3-transform s "alism" "al")
(step3-transform s "iveness" "ive")
(step3-transform s "fulness" "ful")
(step3-transform s "ousness" "ous")
s
)
)
((#\t)
(or (step3-transform s "aliti" "al")
(step3-transform s "iviti" "ive")
(step3-transform s "biliti" "ble")
s
)
)
((#\g)
(or (step3-transform s "logi" "log")
s
)
)
(#t
s)
)
)
s
)
)
(dm step4 (s|<str>)
(def l (len s))
(case (last-char s)
((#\e)
(or (step3-transform s "icate" "ic")
(step3-transform s "ative" "")
(step3-transform s "alize" "al")
s
)
)
((#\i)
(or (step3-transform s "iciti" "ic")
s
)
)
((#\l)
(or (step3-transform s "ical" "ic")
(step3-transform s "ful" "")
s
)
)
((#\s)
(or (step3-transform s "ness" "")
s
)
)
(#t
s
)
)
)
(df step5-transform (s|<str> suf|<str>)
(when (suffix? s suf)
(def l (len s))
(def root (sub s 0 (- l (len suf))))
(when (> (measure-consonant-sequences root) 1)
root
)
)
)
(dm step5 (s|<str>)
(def l (len s))
(if (>= l 2)
(seq
(case (str-rev-elt s 1)
((#\a)
(or (step5-transform s "al")
s)
)
((#\c)
(or (step5-transform s "ance")
(step5-transform s "ence")
s)
)
((#\e)
(or (step5-transform s "er")
s)
)
((#\i)
(or (step5-transform s "ic")
s)
)
((#\l)
(or (step5-transform s "able")
(step5-transform s "ible")
s)
)
((#\n)
(or (step5-transform s "ant")
(step5-transform s "ement")
(step5-transform s "ment")
(step5-transform s "ent")
s)
)
((#\o)
(if (and (suffix? s "ion")
(> l 3)
(or (= (elt s (- l 4)) #\s)
(= (elt s (- l 4)) #\t)))
(seq
(or (step5-transform s "ion")
s)
)
(or (step5-transform s "ou")
s)
)
)
((#\s)
(or (step5-transform s "ism")
s)
)
((#\t)
(or (step5-transform s "ate")
(step5-transform s "iti")
s)
)
((#\u)
(or (step5-transform s "ous")
s)
)
((#\v)
(or (step5-transform s "ive")
s)
)
((#\z)
(or (step5-transform s "ize")
s)
)
(#t
s)
)
)
s
)
)
(dm step6 (s|<str>)
(if (suffix? s "e")
(seq
(def m (measure-consonant-sequences s))
(if (or (and (= m 1)
(not (ends-with-cvc? (str-left-sub s -1))))
(> m 1))
(step6-b (str-left-sub s -1))
(step6-b s)
)
)
(step6-b s)
)
)
(dm step6-b (s|<str>)
(if (and (suffix? s "l")
(ends-with-double-consonant? s)
(> (measure-consonant-sequences s) 1))
(str-left-sub s -1)
s)
)
(dm stem (word|<str>)
(step6 (step5 (step4 (step3 (step2 (step1 word))))))
)
(export
stem
step1
step2
step3
step4
step5
step6
)