(ns leafgrabber.free-text.author-davidg
  (:use [leafgrabber.free-text.author-common :only (review-xpath)]
        [leafgrabber.free-text.extractor :only (add-ext-group add-enum-regex add-tag-regex)]
        [clojure.contrib.generic.math-functions :only (log)]
        )
  )

(add-enum-regex {:name :any-review
                 :core "-BREAK-"
                 :default true
                 :nobreak true
                 :only-xpath review-xpath
                 :content-only true})

(add-tag-regex {:name :tag_test
                :core "great_JJ"
                :patterns [["CORE((?:\\s\\w+_NN)+)" 1]]
                :default "no-evidence"
                :only-xpath review-xpath
                :content-only true})

(add-ext-group :great_tag #{:great_pred :great_attrib_pos})

(add-enum-regex {:name :great_pred
                 :core "amazing|awesome|brilliant|excellent|exceptional|fabulous|fantastic|good|great|incredible|magnificent|outstanding|splendid|superior|the best"
                 :patterns [["(?:the|their|our|my|her|his) (\\w+(?:\\s+\\w+){0,4}) (?:was|is|were|are)(?: (?:absolutely|also|always|beyond|exceptionally|extremely|incredibly|just|most|pretty|quite|really|simply|so|super|very))? CORE" 1]]
                 :default "no-evidence"
                 :only-xpath review-xpath
                 :content-only true})

(add-enum-regex {:name :great_attrib
                 :core "good|great|excellent"
                 :patterns [["CORE (\\w+(?:\\s+\\w+){0,2})" 1]]
                 :default "no-evidence"
                 :only-xpath review-xpath
                 :content-only true})

(add-tag-regex {:name :great_attrib_pos
                :core "(?:awesome|brilliant|excellent|exceptional|fabulous|fantastic|good|great|incredible|magnificent|nice|outstanding|splendid|superior)_JJ"
                :patterns [["CORE ((?:\\w+_(?:NN(?:S|P|PS)?|JJ)\\s+)*\\w+_NN(?:S|P|PS)?)" 1]]
                :default "no-evidence"
                :only-xpath review-xpath
                :content-only true})

(add-enum-regex {:name :checkin_count
                 :core "check-ins"
                 :patterns [["CORE\\s*(\\d+)" 1]]
                 :normalize #(let [norm (/ (log (+ 1 %)) 5)]
                               (if (> norm 1) 1 norm))
                 :default "no-evidence"
                 :nobreak true
                 :content-only true})

(add-ext-group :place_rank #{:review_count :title_char})

(add-enum-regex {:name :review_count
                 :core "reviews"
                 :patterns [["-(\\d+) CORE" "no-evidence"]
                            ["(\\d+) CORE" 1]]
                 :normalize #(let [norm (/ (log (+ 1 %)) 5)]
                               (if (> norm 1) 1 norm))
                 :default "no-evidence"})
(comment :filter #{"select" "a" "script" "input" "label"
                   "div[@id='sidebar_deals_div']"})

(add-enum-regex {:name :title_char
                 :core "<title>"
                 :patterns [
                            ["COREA" "0"]
                            ["COREB" "1"]
                            ["COREC" "2"]
                            ["CORED" "3"]
                            ["COREE" "4"]
                            ["COREF" "5"]
                            ["COREG" "6"]
                            ["COREH" "7"]
                            ["COREI" "8"]
                            ["COREJ" "9"]
                            ]
                 :normalize #(let [norm (/ % 10.0)]
                               (if (> norm 1) 1 norm))
                 :default "no-evidence"
                 :nobreak true})

(add-enum-regex {:name :menu_link
                 :core "\"([^\"]*?menu[^/\"]*\\.(?:pdf|jpg|jpeg|png|gif))\""
                 :patterns [["href=CORE" 1]]
                 :default "no-evidence"
                 :nobreak true
                 :filter #{"select" "script"}})

(add-enum-regex {:name :raw_hours_of_operation
                 ;;:core "hours(?: of operation)?(?!-)"
                 :core "hours(?: of operation)?"
                 :patterns [["CORE-" "no-evidence"]
                            [(str "CORE(?::)?\\s*"         ; core with optional :, whitespace
                                  "</(\\w+)>[\\s|:]*"      ; closing tag, whitespace/colon
                                  "<\\1[^>]*>(.*?)</\\1>") ; same tag as before, open and close, capture the content
                             2]                            ; example: "hours</div> <div>Sun: 11:00-6:00, Mon: ...</div>"
                            
                            [(str "CORE(?::)?"                     ; core with optional :
                                  "(?:\\s*</\\w+>)+[\\s|:]*"       ; one or more closing tags, whitespace/colon
                                  "(?:<(\\w+)>\\s*</\\1>\\s*)?"    ; optional open & close tags with empty content
                                  "(<(\\w+)[^>]*>.*?</\\3>"        ; capture: matching open and close tags, with content
                                  "(?:\\s*<\\3[^>]*>.*?</\\3>)*)") ;          any number of further open & close tags of the same kind, with content
                             2]                           ; example: "hours</div> <li>Sun: 11-6</li> <li>Mon: ...</li>
                            
                            [(str "CORE(?::)?"               ; core with optional :
                                  "(?:\\s*</\\w+>)+[\\s|:]*" ; one or more closing tags, whitespace/colon
                                  "([^<]+)")                 ; match everything up to the next tag
                             1]]                             ; example "hours</div> Sun: 11-6, Mon: ... <foo"
                 :default "no-evidence"
                 :filter #{"select" "a" "script" "br" "input" "label" "comment()"}
                 })

(add-enum-regex {:name :raw_hours_of_operation_orig
                 :core "hours(?: of operation)?(?!-)"
                 :patterns [["CORE(?::)?\\s*</\\w+>\\s*(<(\\w+)[^>]*>.*?</\\2>\\s*(?:<\\2[^<]*>.*?</\\2>\\s*)*)" 1]
                            ["CORE(?::)?\\s*</\\w+>\\s*([^<]+)" 1]]
                 :default "no-evidence"
                 :filter #{"select" "a" "script" "br" "input" "label"}
                 })

(add-ext-group :closed #{:closed-cap_1 :out-of-business_1})

(add-ext-group :pets #{:pets_allowed :pets_fee :pets_small})

(add-enum-regex {:name :pets_allowed
                 :core #"pets?"
                 :patterns [[#"no CORE" false]
                            [#"not (?:allow|accommodate|accept) CORE" false]
                            [#"CORE not (?:allowed|accepted)" false]
                            [#"CORE (?:friendly|allowed|accepted|welcome)" true]
                            [#"(?:allows?|accepts?) CORE" true]]
                 :default "no-evidence"
                 :comment "values: true  - pets are allowed
                                   false - pets are not allowed

                           part of the :pets group"})

(add-enum-regex {:name :pets_fee
                 :core #"pets"
                 :patterns [[#"CORE (?:allowed|welcome) (?:for|with|\$)" true]]
                 :default "no-evidence"
                 :comment "values: true - pets are allowed for a fee

                           part of the :pets group"})

(add-enum-regex {:name :pets_small
                 :core #"pets"
                 :patterns [[#"small CORE" true]]
                 :default "no-evidence"
                 :comment "values: true - only small pets are allowed

                           part of the :pets group"})

(add-enum-regex {:name :non-smoking
                 :core #"smoking"
                 :patterns [["100pct non[- ]?CORE" "only"]
                            ["all rooms are non[- ]?CORE" "only"]
                            ["non[- ]?CORE only" "only"]
                            ["all non[- ]?CORE" "only"]
                            ["no CORE" "only"]
                            ["non[- ]?CORE" "available"]]
                 :default "no-evidence"
                 :comment "values: available - non-smoking rooms are available
                                   only      - all rooms are non-smoking"})

(add-enum-regex {:name :air-conditioning
                 :core #"air[- ]condition(?:ing|ed)"
                 :patterns [[#"not? CORE" "false"]]
                 :default "true"
                 :comment "values: true  - rooms have air conditioning
                                   false - rooms do not have air conditioning"})

(add-ext-group :h_parking #{:h_parking_garage :h_parking_free :h_parking_surcharge
                            :h_parking_secure :h_parking_valet :h_parking_self})

(add-enum-regex {:name :h_parking_garage
                 :core #"garage"
                 :patterns [[#"no CORE parking" false]
                            [#"no parking CORE" false]
                            [#"CORE parking" true]
                            [#"parking CORE" true]]
                 :default "no-evidence"
                 :comment "values: true  - there is a parking garage
                                   false - there is no parking garage

                           part of the :h_parking group"})

(add-enum-regex {:name :h_parking_free
                 :core #"parking"
                 :patterns [[#"no free CORE" false]
                            [#"free CORE" true]
                            [#"CORE \(free\)" true]
                            [#"CORE \(complimentary\)"]]
                 :default "no-evidence"
                 :comment "values: true  - offers free parking
                                   false - does not offer free parking

                           part of the :h_parking group"})

(add-enum-regex {:name :h_parking_surcharge
                 :core #"parking (surcharge)"
                 :default "true"
                 :comment "values: true - parking is available for a fee

                           part of the :h_parking group"})

(add-enum-regex {:name :h_parking_secure
                 :core #"secure parking"
                 :default "true"
                 :comment "values: true - offers secure parking

                           part of the :h_parking group"})

(add-enum-regex {:name :h_parking_valet
                 :core #"valet"
                 :patterns [[#"no CORE" false]]
                 :default "true"
                 :comment "values: true  - offers valet parking
                                   false - does not offer valet parking

                           part of the :h_parking group"})

(add-enum-regex {:name :h_parking_self
                 :core #"self parking"
                 :default "true"
                 :comment "values: true - self parking available

                           part of the :h_parking group"})

(add-ext-group :internet #{:internet_any :internet_free :internet_wireless :internet_public})

(add-enum-regex {:name :internet_any
                 :core #"(?:high[- ]speed )?internet"
                 :patterns [[#"no CORE" false]
                            [#"CORE access" true]]
                 :default "no-evidence"
                 :comment "values: true  - offers internet access
                                   false - does not offer internet access

                           part of the :internet group"})

(add-enum-regex {:name :internet_free
                 :core #"internet"
                 :patterns [[#"no (?:free|complimentary)(?: high[- ]speed)? CORE" false]
                            [#"(?:free|complimentary)(?: high[- ]speed)?(?: wireless)? CORE" true]
                            [#"CORE(?: access)? (?:\(| - )?(?:complimentary|free)\)?" true]]
                 :default "no-evidence"
                 :comment "values: true  - offers free internet access
                                   false - does not offer free internet access

                           part of the :internet group"})

(add-enum-regex {:name :internet_wireless
                 :core #"internet"
                 :patterns [[#"no(?: high[- ]speed)? wireless(?: high[- ]speed)? CORE" false]
                            [#"(?: high[- ]speed)? wireless(?: high[- ]speed)?  CORE" true]
                            [#"CORE wireless" true]]
                 :default "no-evidence"
                 :comment "values: true  - offers wireless interet
                                   false - does not offer wireless internet

                           part of the :internet group"})

(add-enum-regex {:name :internet_public
                 :core #"internet access in public areas"
                 :default true
                 :comment "values: true - internet available only in public areas

                           part of the :internet group"})

(add-ext-group :room_service #{:room_service_any :room_service_24-hour})

(add-enum-regex {:name :room_service_any
                 :core #"room service"
                 :patterns [[#"no CORE" false]]
                 :default true
                 :comment "values: true  - room service available
                                   false - room service not available

                           part of the :room_service group"})

(add-enum-regex {:name :room_service_24-hour
                 :core #"room service"
                 :patterns [[#"CORE \(limited" false]
                            [#"24(?:hr| hours?) CORE" true]
                            [#"CORE \(24 hours" true]
                            [#"CORE is available 24 hours" true]]
                 :default "no-evidence"
                 :comment "values: true  - room service available 24 hours a day
                                   false - limited room service available

                           part of the :room_service group"})

(add-ext-group :pool #{:pool_available :pool_outdoor :pool_indoor :pool_seasonal :pool_heated})

(add-enum-regex {:name :pool_available
                 :core #"(?:swimming )?pool"
                 :patterns [[#"no CORE" false]]
                 :default true
                 :comment "values: true  - has a swimming pool
                                   false - does not have a swimming pool

                           part of the :pool group"})

(add-enum-regex {:name :pool_outdoor
                 :core #"(?:swimming )?pool"
                 :patterns [[#"outdoor(?:,? seasonal|,? heated)* CORE" true]
                            [#"CORE - outdoor" true]
                            [#"CORE \(outdoor\)" true]
                            ]
                 :default "no-evidence"
                 :comment "values: true - has an outdoor swimming pool

                           part of the :pool group"})

(add-enum-regex {:name :pool_indoor
                 :core #"(?:swimming )?pool"
                 :patterns [[#"in-?door(?:-outdoor)?(?:,? heated)? CORE" true]
                            [#"CORE - indoor" true]
                            [#"CORE \(indoor\)" true]]
                 :default "no-evidence"
                 :comment "values: true - has an indoor swimming pool

                           part of the :pool group"})

(add-enum-regex {:name :pool_seasonal
                 :core #"(?:swimming )?pool"
                 :patterns [[#"CORE \(seasonal\)" true]
                            [#"seasonal(?: outdoor)?" true]]
                 :default "no-evidence"
                 :comment "values: true - swimming pool is seasonal

                           part of the :pool group"})

(add-enum-regex {:name :pool_heated
                 :core #"(?:swimming )?pool"
                 :patterns [[#"heated(?: indoor| outdoor)? CORE" true]]
                 :default "no-evidence"
                 :comment "values: true - swimming pool is heated

                           part of the :pool group"})

(add-enum-regex {:name :business_center
                 :core #"business center"
                 :patterns [[#"no CORE" false]]
                 :default true
                 :comment "values: true  - has a business center
                                   false - does not have a business center"})

(add-enum-regex {:name :laundry
                 :core #"laundry"
                 :patterns [[#"no CORE" false]]
                 :default "true"
                 :comment "values: true  - has laundry facilities
                                   false - does not have laundry facilities"})

(add-ext-group :breakfast #{:breakfast_available :breakfast_full :breakfast_continental})

(add-enum-regex {:name :breakfast_available
                 :core #"breakfast"
                 :patterns [[#"no CORE" false]
                            [#"(?:free|complimentary) CORE" true]]
                 :default "no-evidence"
                 :comment "values: true  - free breakfast
                                   false - no free breakfast

                           part of the :breakfast group"})

(add-enum-regex {:name :breakfast_full
                 :core #"breakfast"
                 :patterns [[#"bed (?:&amp;|and) CORE" true]
                            [#"full(?: country)? CORE" true]]
                 :default "no-evidence"
                 :comment "values: true - offers a full breakfast

                           part of the :breakfast group"})

(add-enum-regex {:name :breakfast_core
                 :core #"breakfast"
                 :default true})

(add-enum-regex {:name :breakfast_continental
                 :core #"breakfast"
                 :patterns [[#"(?:continental|daybreak|superstart|quikstart) CORE" true]]
                 :default "no-evidence"
                 :comment "values: true - offers a continental breakfast

                           part of the :breakfast group"})

(add-enum-regex {:name :restaurant
                 :core #"restaurant"
                 :patterns [[#"no CORE" false]
                            [#"CORE (?:onsite|serves|offers|serving|inside|available)" true]
                            [#"(?:site|own|onsite|house|hotel) CORE" true]]
                 :default "no-evidence"
                 :comment "values: true  - has a restaurant
                                   false - does not have a restaurant"})

(add-enum-regex {:name :concierge
                 :core #"concierge"
                 :patterns [[#"no CORE" false]]
                 :default true
                 :comment "values: true  - has a concierge
                                   false - does not have a concierge"})

(add-enum-regex {:name :fitness_test
                 :core "center"
                 :patterns [["fitness CORE" true]]
                 :default "no-evidence"})

(add-ext-group :fitness_facilities #{:fitness_any :fitness_nearby})

(add-enum-regex {:name :fitness_any
                 :core #"fitness|gym|exercise|workout"
                 :patterns [[#"CORE center nearby" "no-evidence"]
                            [#"CORE nearby" "no-evidence"]
                            [#"drive to the CORE" "no-evidence"]
                            [#"no CORE" false]]
                 :default true
                 :comment "values: true  - has fitness facilties
                                   false - has no fitness facilities

                           part of the :fitness_facilities group"})

(add-enum-regex {:name :fitness_nearby
                 :core #"fitness|gym|exercise|workout"
                 :patterns [[#"no CORE(?: center)?" false]
                            [#"CORE(?: center)? nearby" true]
                            [#"drive to the CORE" true]]
                 :default "no-evidence"
                 :comment "values: true  - fitness center nearby
                                   false - no fitness center at all

                           part of the :fitness_facilities group"})

(add-enum-regex {:name :bar
                 :core #"(?:bar|lounge)"
                 :patterns [[#"no CORE" false]
                            [#"(?:hotel|house|on-?site) CORE" true]
                            [#"CORE on-?site" true]]
                 :default "no evidence"
                 :comment "values: true  - has a bar
                                   false - does not have a bar"})

(add-ext-group :accessibility #{:braille_signage :no-step_showers :accessible_bathroom :handicapped_parking
                                :wheelchair_accessible :deaf_equipment})

(add-enum-regex {:name :braille_signage
                 :core #"braille"
                 :default true
                 :filter #{"select" "a" "script" "link"}
                 :comment "values: true - signs in braille

                           part of the :accessibility group"})

(add-enum-regex {:name :no-step_showers
                 :core #"showers?"
                 :patterns [[#"(?:roll[- ]in|no[- ]step) CORE" true]]
                 :default "no evidence"
                 :comment "values: true - has no-step/roll-in showers

                           part of the :accessibility group"})

(add-enum-regex {:name :accessible_bathroom
                 :core #"accessible bathrooms?"
                 :patterns [[#"no CORE" false]]
                 :default true
                 :comment "values: true  - has accessible bathrooms
                                   false - does not have accessible bathrooms

                           part of the :accessibility group"})

(add-enum-regex {:name :handicapped_parking
                 :core #"handicapped parking"
                 :patterns [[#"no CORE" false]]
                 :default true
                 :comment "values: true  - has handicapped parking
                                   false - does not have handicapped parking

                           part of the :accessibility group"})

(add-enum-regex {:name :wheelchair_accessible
                 :core "wheelchair accessible"
                 :patterns [[#"not CORE" false]]
                 :default true
                 :comment "values: true  - is wheelchair accessible
                                   false - is not wheelchair accessible

                           part of the :accessibility group"})

(add-enum-regex {:name :deaf_equipment
                 :core "(?:equipment for the deaf|tty)"
                 :patterns [["no CORE" false]]
                 :default true
                 :comment "values: true  - has equipment for the deaf
                                   false - does not have equipment for the deaf

                           part of the :accessibility group"})

(add-enum-regex {:name :elevator
                 :core "elevator"
                 :patterns [["no CORE" false]]
                 :default true
                 :comment "values: true  - has an elevator
                                   false - does not have an elevator"})

(add-enum-regex {:name :24-hour_front_desk
                 :core "front desk"
                 :patterns [[#"24(?: |-| - )hour CORE" true]
                            [#"CORE is open 24" true]
                            [#"CORE \(limited hours\)" false]]
                 :default "no-evidence"
                 :comment "values: true  - front desk open 24 hours a day
                                   false - front desk open limited hours"})

(add-enum-regex {:name :coffee
                 :core "coffee shop|cafe"
                 :patterns [["no CORE" false]
                            ["CORE (?:onsite|serves|offers|serving|inside|available)" true]
                            [#"(?:site|own|onsite|\bhouse|hotel) CORE" true]]
                 :default "no-evidence"
                 :comment "values: true  - has a coffee shop
                                   false - does not have a coffee shop"})

(add-enum-regex {:name :cable_tv
                 :core "television|tv"
                 :patterns [["(cable|satellite|premium) CORE" true]
                            ["CORE with cable" true]]
                 :default "no-evidence"
                 :comment "values: true - has cable/satellite television"})

(add-enum-regex {:name :secretarial_services
                 :core "secretarial (?:services?|svc)"
                 :default true
                 :comment "values: true - provides secretarial services"})

(add-enum-regex {:name :conference_rooms
                 :core "conference"
                 :patterns [["no CORE" false]
                            ["CORE (?:facilities|center|rooms?)" true]]
                 :default "no-evidence"
                 :comment "values: true  - has conference rooms
                                   false - does not have conference rooms"
                 })

(add-enum-regex {:name :meeting_rooms
                 :core "meeting"
                 :patterns [["no CORE" false]
                            ["CORE (?:facilities|rooms?|space|and (?:event|banquet)|center)" true]
                            ["CORE\\/(?:banquet|conference)" true]]
                 :default "no-evidence"
                 :comment "values: true  - has meeting rooms
                                   false - does not have meeting rooms"})

(add-enum-regex {:name :banquet_facilities
                 :core "banquet"
                 :patterns [["no CORE" false]
                            ["CORE (?:facilities|space|halls|rooms|venues)" true]]
                 :default "no-evidence"
                 :comment "values: true  - has banquet facilities
                                   false - does not have banquet facilities"})

(add-enum-regex {:name :event_catering
                 :core "catering"
                 :patterns [["CORE to" "no-evidence"]]
                 :default true
                 :comment "values: true  - provides catering
                                   false - does not provide catering"})

(add-ext-group :spa #{:spa_any :spa_massage :spa_sauna :spa_whirlpool :spa_jacuzzi
                      :spa_steam_room :spa_beauty_services})

(add-enum-regex {:name :spa_any
                 :core "spa services"
                 :default true
                 :comment "values: true - provides spa services

                           part of the :spa group"})

(add-enum-regex {:name :spa_massage
                 :core "massage"
                 :patterns [["CORE ?\\/ ?(?:treatment|beauty|spa)" true]
                            ["CORE (?:and spa|stations?|rooms?|therap(?:y|ist)|available|treatments?|services?)" true]
                            ["CORE," true]
                            ["spa(?: ?\\/ ?| and )CORE" true]]
                 :default "no-evidence"
                 :comment "values: true - provides massage

                           part of the :spa group"})

(add-enum-regex {:name :spa_sauna
                 :core "sauna"
                 :default true
                 :comment "values: true - has a sauna

                           part of the :spa group"})

(add-enum-regex {:name :spa_whirlpool
                 :core "whirlpool"
                 :patterns [["CORE (?:tubs?|suites?|rooms?)" "no-evidence"]
                            ["(?:private|two-person) CORE" "no-evidence"]
                            ["(?:rooms|suites) with(?: a)? CORE" "no-evidence"]]
                 :default true
                 :comment "values: true - has a whirlpool

                           part of the :spa group"})

(add-enum-regex {:name :spa_jacuzzi
                 :core "jacuzzi"
                 :patterns [["CORE (?:tubs?|bathtubs?|suites?|rooms?)" "no-evidence"]]
                 :default true
                 :comment "values: true - has a jacuzzi

                           part of the :spa group"})

(add-enum-regex {:name :spa_steam_room
                 :core "steam room"
                 :default true
                 :comment "values: true - has a steam room

                           part of the spa group"})

(add-enum-regex {:name :spa_beauty_services
                 :core "beauty services"
                 :default true
                 :comment "values: true - provides beauty services

                           part of the spa group"})

(add-enum-regex {:name :in-room_massage
                 :core "in-room massage"
                 :default true
                 :comment "values: true - provides in-room massage"})

(add-enum-regex {:name :wedding_services
                 :core "wedding services"
                 :default true
                 :comment "values: true - provides wedding services"})

(add-enum-regex {:name :child_care
                 :core "child ?(?:care|services)|baby[- ]?sitting"
                 :patterns [["no CORE" false]]
                 :default true
                 :comment "values: true  - childcare available
                                   false - no childcare available"})

(add-enum-regex {:name :complimentary_newspapers
                 :core "newspapers?"
                 :patterns [["no (?:free|complimentary|courtesy) CORE" false]
                            ["weekday CORE" "weekday"]
                            ["(?:free|complimentary|courtesy|daily) CORE" true]]
                 :default "no-evidence"
                 :comment "values: true    - provides free newspaper
                                   weekday - provides only weekday newspaper
                                   false   - does not provide free newspaper"})

(add-enum-regex {:name :multilingual_staff
                 :core "multilingual"
                 :default true
                 :comment "values: true - multilingual staff"})

(add-enum-regex {:name :currency_exchange
                 :core "currency exchange"
                 :default true
                 :comment "values: true - has a currency exchange"})

(add-enum-regex {:name :express_checkin
                 :core "express check[ -]?in"
                 :patterns [["no CORE" false]]
                 :default true
                 :comment "values: true  - has express check-in
                                   false - does not have express check-in"})

(add-enum-regex {:name :express_checkout
                 :core "express check[ -]?out"
                 :patterns [["no CORE" false]]
                 :default true
                 :comment "values: true  - has express check-out
                                   false - does not have express check-out"})

(add-ext-group :pickup #{:pickup_any :pickup_complimentary :pickup_surcharge :pickup_airport :pickup_train_station})

(add-enum-regex {:name :pickup_any
                 :core "pick[- ]?up|shuttle"
                 :patterns [["\\bno(?: (?:airport|train station))? CORE" false]
                            ["CORE\\?" "no-evidence"]
                            ["(?:transport|available) CORE" true]
                            ["CORE (?:services?|transportation)" true]]
                 :default "no-evidence"
                 :comment "values: true  - provides pick-up service
                                   false - does not provide pick-up service

                           part of the :pickup group"})

(add-enum-regex {:name :pickup_complimentary
                 :core "pick[- ]?up|shuttle"
                 :patterns [["(?:free|complimentary)(?: (?:airport|hotel|train station|24[- ]hour))? CORE" true]]
                 :default "no-evidence"
                 :comment "values: true - provides free pick-up service

                           part of the :pickup group"})

(add-enum-regex {:name :pickup_surcharge
                 :core #"shuttle \(surcharge"
                 :default true
                 :comment "values: true - provides pick-up service for a fee

                           part of the :pickup group"})

(add-enum-regex {:name :pickup_airport
                 :core "pick[- ]?up|shuttle"
                 :patterns [;["no airport CORE" false]
                            ;["airport CORE\\?" "no-evidence"]
                            ["airport CORE" true]
                            ["CORE(?: service)? (?:to|from|to (?:and|or) from)(?: the) airport" true]]
                 :default "no-evidence"
                 :comment "values: true  - provides airport shuttle service
                                   false - does not provide airport shuttle service

                           part of the :pickup group"})

(add-enum-regex {:name :pickup_train_station
                 :core "pick[- ]?up|shuttle"
                 :patterns [["no train[- ]station CORE" false]
                            ["train[- ]station CORE" true]]
                 :default "no-evidence"
                 :comment "values: true  - provides train-station shuttle service
                                   false - does not provide train-station shuttle service

                           part of the :pickup group"})

(add-enum-regex {:name :roll-out_beds
                 :core "roll[- ]?(?:out|away) beds?"
                 :patterns [["no CORE" false]
                            ["CORE(?::| for| fee:) (?:US)?\\$" "surcharge"]
                            ["CORE(?: available)? \\(surcharge" "surcharge"]]
                 :default true
                 :comment "values: true      - provides roll-out beds
                                   surcharge - provides roll-out beds for a fee
                                   false     - does not provide roll-out beds"})

(add-enum-regex {:name :cribs
                 :core "(?:crib|cot|infant bed)s?"
                 :patterns [["no CORE" false]
                            ["CORE(?:\\))?(?::| are available for)? (?:US)?\\$" "surcharge"]]
                 :default true
                 :comment "values: true      - provides cribs
                                   surcharge - provides cribs for a fee
                                   false     - does not provide cribs"})

(add-enum-regex {:name :private_beach
                 :core "private beach"
                 :default true
                 :comment "values: true - has a private beach"})

(add-ext-group :all_hotel_extractors #{:pets_allowed :pets_fee :pets_small :non-smoking :air-conditioning
                                       :h_parking_garage :h_parking_free :h_parking_surcharge :h_parking_secure
                                       :h_parking_valet :h_parking_self :internet_any :internet_free :internet_wireless
                                       :internet_public :room_service_any :room_service_24-hour :pool_available
                                       :pool_outdoor :pool_indoor :pool_seasonal :pool_heated :business_center :laundry
                                       :breakfast_available :breakfast_full :breakfast_continental :restaurant
                                       :concierge :fitness_any :fitness_nearby :bar :braille_signage :no-step_showers
                                       :accessible_bathroom :handicapped_parking :wheelchair_accessible :deaf_equipment
                                       :elevator :24-hour_front_desk :coffee :cable_tv :secretarial_services
                                       :conference_rooms :meeting_rooms :banquet_facilities :event_catering :spa_any
                                       :spa_massage :spa_sauna :spa_whirlpool :spa_jacuzzi :spa_steam_room
                                       :spa_beauty_services :in-room_massage :wedding_services :child_care
                                       :complimentary_newspapers :multilingual_staff :currency_exchange :express_checkin
                                       :express_checkout :pickup_any :pickup_complimentary :pickup_surcharge
                                       :pickup_airport :pickup_train_station :roll-out_beds :cribs :private_beach})

(add-ext-group :possible_closure #{:may_be_closed :may_be_oob})

(add-enum-regex {:name :may_be_closed
                 :core "closed"
                 :patterns [["this \\w+(?: \\w+)? (?:is|has|has been) CORE" true]
                            ["this \\w+(?: \\w+)? CORE down" true]
                            ["is now CORE" true]
                            ["\\(CORE\\)" true]
                            ["CORE (?:sun|mon|tue|wed|thu|fri|sat).*" "no-evidence"]
                            ["- CORE" true]
                            ]
                 :default "no-evidence"
                 :content-only true
                 :filter #{"select" "a" "script" "input" "label" "button"}
                 :comment "the following patterns were filtered out first in order to find positives:

                           [30]0\\s*CORE
                           \\d[ap]m\\s*CORE
                           (?:sun(?:day)?|mon(?:day)?|tue(?:s(?:day))?|wed(?:nesday)?|thu(?:rsday)?|fri(?:day)?|sat(?:urday)?)(?::)?\\s*CORE
                           CORE (?:on )(?:sun(?:day)?|mon(?:day)?|tue(?:s(?:day))?|wed(?:nesday)?|thu(?:rsday)?|fri(?:day)?|sat(?:urday)?)s?
                           status: CORE
                           CORE holidays
                           sometimes CORE
                           \\s\\s\\sCORE\\s\\s\\s
                           or CORE
                           (?:was|are) CORE
                           (?:temporary|either|the(?:now)?|banks) CORE
                           CORE (?:saturday\\w*|to|schools|\\d hours)"})

(add-enum-regex {:name :may_be_oob
                 :core "out of business"
                 :patterns [
                            ["now CORE" true]
                            ["this listing is CORE" "no-evidence"]
                            ["this \\w+ (?:went|is) CORE" true]
                            ["they (?:went|are) CORE" true]
                            ]
                 :default "no-evidence"
                 :content-only true})

(add-enum-regex {:name :closed-cap_1
                 :core #"(?-i:CLOSED)"
                 :patterns [
                            ["CORE (?:sun|mon|tue|wed|thu|fri|sat)" "no-evidence"]
                            [#"(?:sun|mon|tue|wed|thu|fri|sat)\s+CORE" "no-evidence"]
                            [#"(?-i:RESTAURANT IS) CORE" "true"]
                            ["CORE (?-i:[A-Z])" "no-evidence"]
                            ["(?-i:[A-Z]) CORE" "no-evidence"]
                            [#"\d\d\s+ CORE" "no-evidence"]
                            [#"CORE\s*<(?:br|/a|/option)" "no-evidence"]
                            [">CORE<" "no-evidence"]
                            ["CORE'" "no-evidence"]
                            ]
                 :default true})

(add-enum-regex {:name :shut-down_1
                    :core #"has shut down"
                    :default true})

(add-enum-regex {:name :out-of-business_1
                 :core #"out of business"
                 :patterns [[#"=s*\"[^\"]*CORE" "no-evidence"]
                   [#"CORE\?" "no-evidence"]
                   ["looks like it's CORE" true]
                            ["this place went CORE" true]]
                 :default "no-evidence"})

(add-enum-regex {:name :closed-title_1
                 :core #"closed"
                 :patterns [[#"=s*\"[^\"]*CORE" "no-evidence"]
                   [#"CORE\?" "no-evidence"]
                   [#"<title>[^<>]*CORE\s*(?:temporarily|for\s)[^<>]*</title>" "no-evidence"]
                            [#"<title>[^<>]*CORE[^<>]*</title>" "true"]]
                 :default "no-evidence"})

(add-enum-regex {:name :cap-test
                 :core "food"
                 :patterns [[#"(\d)-star CORE" 1]]
                 :default "no-evidence"})

(add-enum-regex {:name :cap-test_2
                 :core #"\s\$(\d\d\.\d\d)"
                 :patterns [[#"we\b.{0,40}CORE" "no-evidence"]
                            [#"meal\b.{0,40}CORE" 1]]
                 :default "no-evidence"})

(add-enum-regex {:name :happy-hour_1
                 :core #"happy hour"
                 :patterns [[#"your CORE guide" "no-evidence"]
                            [#"no CORE" false]]
                 :default true})
