(ns com.timezynk.useful.logback.pii-scrubber
  "Logback Layout which filters out sensitive information.

   Is configured via <field> and <memento> XML elements.
   When logging Clojure maps:
     Finds all occurrences of <field> and replaces their value with <memento>.
   Regardless of formatting:
     Replaces all email addresses and phone numbers with <memento>."
  (:gen-class
   :extends ch.qos.logback.contrib.json.classic.JsonLayout
   :exposes-methods {doLayout superDoLayout}
   :name com.timezynk.useful.logback.PiiScrubber
   :state state
   :init init
   :methods [[setField [String] void]
             [setMemento [String] void]])
  (:require [clojure.string :as string]
            [cheshire.core :as json]))

(def ^:const EMAIL_REGEXP
  #"(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|\"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*\")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])")

(def ^:const PHONENO_REGEXP
  (let [delimiter #"[\s,:;\[\]]"]
    (re-pattern (str "(?<=(?:^|" delimiter "))"
                     #"(?:\+|\d|\(\+?)"
                     #"[\s\d().\-\/\\]{6,18}"
                     #"\d"
                     "(?=(?:$|" delimiter "))"))))

(def ^:const DEFAULT_MEMENTO
  "[SCRUBBED]")

(defn -init
  []
  [[] (atom {:fields [] :memento DEFAULT_MEMENTO})])

(defn -setField
  [this field]
  (swap! (.state this) update :fields conj field))

(defn -setMemento
  [this memento]
  (swap! (.state this) assoc :memento memento))

(defn- field->regexp
  "Turns a Clojure field into a regular expression which matches the value of
   the field in log output. Assumes that a Clojure map is being logged."
  [field]
  (re-pattern (format "(?<=[{, ]%s )[^,}:]+(?=[,}])" field)))

(defn- regexps
  [this]
  (let [field-regexps (->> this .state deref :fields (mapv field->regexp))]
    (conj field-regexps EMAIL_REGEXP PHONENO_REGEXP)))

(defn -doLayout [this event]
  (let [memento (-> this .state deref :memento)
        scrub (fn [msg] (reduce #(string/replace %1 %2 memento)
                                (or msg "")
                                (regexps this)))]
    (-> this
        (.superDoLayout event)
        json/parse-string
        (update "message" scrub)
        json/generate-string
        (str "\n"))))
