(ns cases.core
  "Case handling transducers for strings."
  (:import [java.util ArrayList]))

(defn string-build
  "String builder reduction of the application of `rf` to string `s`."
  [rf s]
  (str
   (transduce
    rf
    (fn
      ([] (StringBuilder/new))
      ([^StringBuilder r] r)
      ([^StringBuilder r c] (.append r c)))
    s)))

(defn lower-case
  "Convert every character to lower case, or return a lower-case transducer."
  ([] (map Character/toLowerCase))
  ([s] (string-build (lower-case) s)))

(defn upper-case
  "Convert every character to upper case, or return an upper-case transducer."
  ([] (map Character/toUpperCase))
  ([s] (string-build (upper-case) s)))

(defn title-case
  "Convert every character to title case, or return a title-case transducer."
  ([] (map Character/toTitleCase))
  ([s] (string-build (title-case) s)))

(defn- title-by
  "Returns a transducer for case conversion (`fc`) with words defined by `fw`.

  The word checker function `fw` should take a character as input and return a
  boolean. The conversion function `fc` should take a character as input and
  return a character. Any non-word (effectively the inverse of `fw`) signifies
  a break. If not supplied, case conversion is handled by
  `Character/toTitleCase` with words detected by `Character/isLetter`.

  ```clojure
  (string-build
    (title-by
      (fn [c] (Character/isLetterOrDigit (int c)))
      Character/toTitleCase)
    \"ab12cd\")
  ;; => \"Ab12cd\"
  ```"
  ([] (title-by Character/isLetter Character/toTitleCase))
  ([fw] (title-by fw Character/toTitleCase))
  ([fw fc]
   (fn [rf]
     (let [prev (volatile! ::none)]
       (fn
         ([] (rf))
         ([r] (rf r))
         ([r c]
          (let [p @prev]
            (vreset! prev c)
            (cond
              (= p ::none) (rf r (fc c))
              (fw p) (rf r (Character/toLowerCase c))
              :else (rf r (Character/toTitleCase c))))))))))

(defn title
  "Converts a string to title case or returns a stateful transducer to do this.

  Uses (but possibly doesn't fully match) the broad Python definition that the
  beginning of a word is considered to be any letter that isn't preceded by
  another letter. The first letter is converted using `Character/toTitleCase`.

  ```clojure
  (title \"they're bill's friends from the UK\")
  ;; => \"They'Re Bill'S Friends From The Uk\"
  ```"
  ([] (title-by))
  ([s] (string-build (title) s)))

(defn capitalize-first
  "Capitalize only the first letter of `s` or return a transducer to do this.

  All other characters will be left as-is. Leading non-letter characters, such
  as spaces, are skipped. Title case is used, rather than upper case.

  ```clojure
  (capitalize-first \" the United Kingdom (UK) or Britain, is a country \")
  ;; => \" The United Kingdom (UK) or Britain, is a country \"
  ```"
  ([]
   (fn [rf]
     (let [seen? (volatile! false)]
       (fn
         ([] (rf))
         ([r] (rf r))
         ([r c]
          (if (or @seen? (not (Character/isLetter c)))
            (rf r c)
            (do
              (vreset! seen? true)
              (rf r (Character/toTitleCase c)))))))))
  ([s] (string-build (capitalize-first) s)))

(defn capitalize
  "Capitalize only the first letter of `s` or return a transducer to do this.

  All other characters will be lower case. Unlike the Clojure and Python
  capitalization functions, leading non-letter characters, such as spaces,
  are skipped. Title case is used, rather than upper case.

  ```clojure
  (capitalize \" the United Kingdom (UK) or Britain, is a country \")
  ;; => \" The united kingdom (uk) or britain, is a country \"
  ```"
  ([] (comp (lower-case) (capitalize-first)))
  ([s] (string-build (capitalize) s)))

(defn capitalize-words
  "Capitalizes space-separated words or returns a transducer for this.

  Matches Python's default `string.capcase()` function except that whitespace
  is not compressed automatically.

  ```clojure
  (capitalize-words \" 1aa  bb2cc \")
  ;; => \" 1aa  Bb2cc \"
  ```"
  ([] (title-by (complement Character/isSpace)))
  ([s] (string-build (capitalize-words) s)))

(defn- triml-by
  "Transducer to trim characters matching `f` from the left of a string.

  The function `f` should take a character and return a boolean."
  [f]
  (fn [rf]
    (let [flag (volatile! false)]
      (fn
        ([] (rf))
        ([r] (rf r))
        ([r c]
         (cond
           @flag (rf r c)
           (f c) r
           :else (do (vreset! flag true) (rf r c))))))))

(defn triml
  "Trims whitespace from from the left of a string or a transducer for this.

  ```clojure
  (triml \" \r \nA\n\")
  ;; => \"A\n\"
  ```"
  ([] (triml-by Character/isWhitespace))
  ([s] (string-build (triml) s)))

(defn- trimr-by
  "Transducer to trim characters matching `f` from the right of a string.

  The function `f` should take a character and return a boolean."
  [f]
  (fn [rf]
    (let [emit (cat rf)
          buf (ArrayList/new)]
      (fn
        ([] (rf))
        ([r] (rf r))
        ([r c]
         (if (f c)
           (do (.add buf c) r)
           (let [r' (emit r (vec buf))]
             (.clear buf)
             (if (reduced? r') r' (rf r' c)))))))))

(defn trimr
  "Trims whitespace from from the right of a string or a transducer for this.

  ```clojure
  (trimr \" \r \nA\n\")
  ;; => \" \r \nA\"
  ```"
  ([] (trimr-by Character/isWhitespace))
  ([s] (string-build (trimr) s)))

(defn trim
  "Trim whitespace around a string or returns a transducer to do this.

  ```clojure
  (trim \"\n  A\n \r\")
  ;; => \"A\"
  ```"
  ([]
   (comp (triml) (trimr)))
  ([s] (string-build (trim) s)))

(defn trim-newline
  "Trim newlines at the end of a string or return a transducer for this.

  Newline characters are either `\r` or `\n`.

  ```clojure
  (trim-newline \"\r\na b c\r\n\n\r\")
  ;; => \"\r\na b c\"
  ```"
  ([] (trimr-by #{\newline \return}))
  ([s] (string-build (trim-newline) s)))

(defn- dedupe-by
  "Returns a transducer for deduping a collection according to `f`."
  [f]
  (fn [rf]
    (let [dupe (volatile! ::none)]
      (fn
        ([] (rf))
        ([r] (rf r))
        ([r c]
         (let [d @dupe
               v (f c)]
           (vreset! dupe v)
           (if (= d v)
             r
             (rf r c))))))))

(defn compress-whitespace
  "Squeezes whitespace to one character or returns a transducer for this.

  The first whitespace character matched each time is kept. Matches are made
  according to `Character/isWhitespace`.
  ```clojure
  (compress-whitespace \"  a  \rb \n c\n  \")
  ;; => \" a b c\n\"
  ```"
  ([] (dedupe-by Character/isWhitespace))
  ([s] (string-build (compress-whitespace) s)))
