(ns piippin.core
  (:require [clojure.string :as s]))

(def dob (str "(?i)(dob|birth).{0,20}"
              "(\\d{2,4}-\\d{2}-\\d{2}|\\d{2}/\\d{2}/\\d{2,4})"))

(defn- d-patt [r]
  (let [delimiter "(^|[\\.!?\\-\\s\\:\\,\\;]|$)"]
    (re-pattern (str delimiter r delimiter))))

(def common-patterns
  {;https://www.regular-expressions.info/creditcard.html
   "creditcard" (d-patt "(?:\\d[ -]*?){13,16}")
   ;http://www.cardinalpath.com/what-you-need-to-know-about-google-analytics-\
   ;personally-identifiable-information/
   "email"      (d-patt (str "([a-zA-Z0-9_\\.-]+)@([\\da-zA-Z\\.-]+)"
                             ".([a-zA-Z\\.]{2,6})"))
   "ssn"        (d-patt "\\d{3}-?\\d{2}-?\\d{4}")
   ;https://stackoverflow.com/questions/16699007/regular-expression-to-match-\
   ;standard-10-digit-phone-number
   "phone"      (d-patt (str "(\\+\\d{1,2}\\s)?\\(?\\d{3}\\)?[\\s.-]?"
                             "\\d{3}[\\s.-]?\\d{4}"))
   "dob"        (re-pattern dob)
   "longnum"    (d-patt "\\d{6,}")})

(defn find-pii
  "Given a type `t`, check if the string `s` contains any lines that match
   either the regular expression that is found in `common-patterns` with
   its name or a regular expression that contains the value itself."
  [t s]
  (let [lines (s/split-lines s)
        pattern (common-patterns t)
        pattern (or pattern (re-pattern (str "(?i)" t)))]
    (->> lines
         (filter (partial re-find pattern)))))

