(ns piippin.core
  (:require [clojure.string :as str]))

(def dob (str "(?i)(dob|birth).{0,20}"
              "(\\d{2,4}-\\d{2}-\\d{2}|\\d{2}/\\d{2}/\\d{2,4})"))

(def common-patterns
  {"longnum"    (re-pattern "\\d{6,}")
   ;https://www.regular-expressions.info/creditcard.html
   "creditcard" (re-pattern "(?:\\d[ -]*?){13,16}")
   ;http://www.cardinalpath.com/what-you-need-to-know-about-google-analytics-\
   ;personally-identifiable-information/
   "email"      (re-pattern (str "([a-zA-Z0-9_\\.-]+)@([\\da-zA-Z\\.-]+)"
                                 ".([a-zA-Z\\.]{2,6})"))
   "ssn"        (re-pattern "\\d{3}-?\\d{2}-?\\d{4}")
   ;https://stackoverflow.com/questions/16699007/regular-expression-to-match-\
   ;standard-10-digit-phone-number
   "phone"      (re-pattern (str "(\\+\\d{1,2}[ \\t])?\\(?\\d{3}\\)?[[ \\t].-]?"
                                 "\\d{3}[ \\t.-]?\\d{4}"))
   "dob"        (re-pattern dob)})


(defn- simpleid []
  (str/join (map char (take 10 (repeatedly (comp (partial + 97)
                                                 (partial rand-int 26)))))))

(defn fingerprint [exceptions s]
  (let [found (map #(re-seq % s) exceptions)
        replacements (zipmap (map second (reduce concat found))
                             (repeatedly simpleid))]
    {:result (reduce-kv (fn [s k v] (str/replace s k v)) s replacements)
     :replacements replacements}))

(defn return [replacements fingerprinted-string]
  (reduce-kv (fn [s k v] (str/replace s v k))
             fingerprinted-string replacements))

(defn find-pii
  "Given a type `t`, check if the string `s` contains any lines that match
   either the regular expression that is found in `common-patterns` with
   its name or a regular expression that contains the value itself."
  [exceptions t s]
  (let [{:keys [result replacements]} (fingerprint exceptions s)
        lines (str/split-lines result)

        pattern (common-patterns t)
        pattern (or pattern (re-pattern (str "(?i)" t)))]
    (->> lines
         (filter (partial re-find pattern))
         (map (partial return replacements)))))

(defn remove-pii
  "Remove any PII from the given `s`"
  [exceptions s]
  (reduce (fn [s p]
            (let [{:keys [result replacements]} (fingerprint exceptions s)
                  masked (str/replace result p "****")]
              (return replacements masked))) s
          (vals common-patterns)))