Coverage report: /home/samppa/personal/opiskelu/ohtuprojekti/pulsu/trunk/scenario.lisp
Kind | Covered | All | % |
expression | 237 | 463 | 51.2 |
branch | 8 | 38 | 21.1 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
; This file contains stuff for implementing scenario-genericness
3
(in-package :puls.controller)
5
; we default to medical scenario
6
(defparameter *scenario* :medical)
8
(defun scenario-string ()
9
"Retrurn current scenario as string"
10
(string-downcase (string *scenario*)))
12
; attributes are objects
13
(defclass attribute ()
17
:documentation "Name of corresponding field in database")
21
:documentation "Textual description")
25
:documentation "How to store offsets of attribute in db.
26
Can be (start-field end-field) or single-field"
32
:documentation "type of attribute: :plain means
33
non-editable, :text freeform text, and :select a selection
34
box. attr-options gets options for an attribute with :select
39
:reader attr-search-type
40
:documentation "What sort of search box to display,
41
can be nil (non-searchable), :text or :confidence")
45
:documentation "Function: takes a row and returns
46
options for value of attribute")
50
:documentation "Function: takes a row and returns
51
value of attribute for that row")
55
:documentation "Function: takes a row and returns row
56
with (normalized etc.) value stored. May also return nil for
57
invalid values. Function should return the row as-is when
58
attribute is not set for row. This field should be nil if no
59
normalizing is necessary.")))
62
(defmethod initialize-instance :after ((attribute attribute) &key)
63
(if (null (slot-value attribute 'get-value))
64
(setf (slot-value attribute 'get-value)
66
(getf row (slot-value attribute 'field))))))
70
(defparameter *attr-alist* nil)
73
"Get list of attributes associated with current scenario"
74
(cdr (assoc *scenario* *attr-alist*)))
76
(defun attr-options (attr row)
77
"Get options for value of attribute"
78
(funcall (slot-value attr 'get-options) row))
80
(defun attr-value (attr row)
81
"Get current value of attribute"
82
(funcall (slot-value attr 'get-value) row))
84
(defun start-index (attr row)
85
"Get starting index (offset in text) of attribute"
86
(getf row (first (slot-value attr 'offsets))))
88
(defun end-index (attr row)
89
"Get ending index (offset in text) of attribute"
90
(getf row (second (slot-value attr 'offsets))))
93
"Get short textual id for attribute. Consists of lowercase alphanumerical characters"
94
(string-downcase (string (slot-value attr 'field))))
96
(defun find-offsets (words text)
97
(let ((loc (search words text)))
100
(format nil "##~A#~A#"
102
(+ loc (length words))))))
104
(defun attr-do-offsets (row text)
105
"Inserts the offset fields for the attrs of row into row, using text
107
(dolist (a (get-attrs))
108
(when (not (null (getf row (attr-field a))))
109
(typecase (slot-value a 'offsets)
112
(destructuring-bind (startsym endsym) (slot-value a 'offsets)
113
(let* ((start (or (search (getf row (attr-field a)) text) -1))
114
(end (if (= -1 start)
116
(+ start (length (getf row (attr-field a) row))))))
117
(nconc row (list startsym start endsym end)))))
121
(list (attr-offsets a) (find-offsets (getf row (attr-field a))
125
(define-condition invalid-attr-value
127
((attr :initarg :attr
128
:reader invalid-attr-value-attr)
129
(value :initarg :value
130
:reader invalid-attr-value-value)))
132
(defun normalize-row (row)
133
"Takes a row and normalizes it's fields."
134
(dolist (a (get-attrs))
135
(unless (null (slot-value a 'normalize))
136
(when (null (funcall (slot-value a 'normalize) row))
137
(error 'invalid-attr-value
139
:value (getf row (attr-field a))))))
142
(defun clean-row (row text)
143
"attr-do-offsets + normalize-row + remove :filter, :docno and :event_id attributes from row as they don't go into db"
144
(let ((newrow (normalize-row (attr-do-offsets row text))))
145
(remf newrow :trigger)
147
(remf newrow :event_id)
150
;; scenario-specific stuff
152
(defun get-unified-case-title (rows)
153
"Generates a title for a bunch of cases (list of rows)"
156
(let (diseases places)
157
(loop :for row :in rows
159
(pushnew (getf row :disease_canon) diseases :test 'equal)
160
(pushnew (getf row :country) places :test 'equal))
161
(format nil "~A in ~A" diseases places)))
163
(error "invalid scenario"))))
165
(defun field-getter (field &rest rest)
167
(adjoin (getf row field)
168
(apply #'get-all-fields-of-type (getf row :docno) rest))))
170
(defun get-document-diseases (row)
171
"Get list of disease names that appear in the document"
172
(get-all-fields-of-type (getf row :docno) :disease))
173
(defun get-document-countries (row)
174
"Get list of countries that appear in the document"
175
(get-all-fields-of-type (getf row :docno) :state))
176
(defun get-document-locations (row)
177
"Get list of locations that appear in the document"
178
(get-all-fields-of-type (getf row :docno) :location))
179
(defun get-document-times (row)
180
"Get list of times that appear in the document"
181
(get-all-fields-of-type (getf row :docno) :date))
182
(defun get-document-starts (row)
183
"Get list of starting times that appear in the document"
184
(get-all-fields-of-type (getf row :docno) :date :start))
185
(defun get-document-ends (row)
186
"Get list of ending times that appear in the document"
187
(get-all-fields-of-type (getf row :docno) :date :end))
189
(defun validate-regex (field regex)
191
(and (or (null (getf row field))
192
(cl-ppcre:scan regex (getf row field)))
195
(defun validate-disease (row)
196
(if (getf row :disease_name)
197
(let ((canon (get-canonical (getf row :docno)
198
(getf row :disease_name)
201
(nconc row (list :disease_canon canon))
207
(flet ((f (&rest args)
208
(apply #'make-instance 'attribute args)))
213
(f :field :doc_date :desc "Published")
214
(f :field :disease_name :desc "Disease" :type :select
216
:get-options (field-getter :disease_name :disease)
217
:offsets '(:disease_name_start :disease_name_end)
218
:normalize #'validate-disease)
219
(f :field :country :desc "Country" :type :select
221
:get-options (field-getter :country :state)
222
:offsets '(:country_start :country_end))
223
(f :field :location :desc "Location" :type :select
225
:get-options (field-getter :location :location)
226
:offsets '(:location_start :location_end))
227
(f :field :case_total :desc "Total" :type :text)
228
(f :field :time :desc "Time" :type :select
230
:get-options (field-getter :time :date)
231
:offsets '(:time_start :time_end))
232
(f :field :norm_stime :desc "Begin" :type :select
234
:get-options (field-getter :time :date :start)
235
:normalize (validate-regex :norm_stime
236
"^\\d{4}\\.\\d{2}\\.\\d{2}$"))
237
(f :field :norm_etime :desc "End" :type :select
239
:get-options (field-getter :time :date :end)
240
:normalize (validate-regex :norm_stime
241
"^\\d{4}\\.\\d{2}\\.\\d{2}$"))
242
(f :field :case_status :desc "Status" :type :select
243
:get-options (lambda (&optional x) '("sick" "dead")))
244
(f :field :case_descriptor :desc "Descriptor" :type :text
245
:offsets '(:case_descriptor_start :case_descriptor_end))
246
(f :field :trigger :desc "Trigger" :type :plain
248
:get-value (lambda (x) (first (get-trigger-spans x)))
249
:normalize (lambda (x) (or (null (getf x :trigger))
250
(< 0 (length (getf x :trigger))))))
251
(f :field :confidence :desc "Confidence"
252
:search-type :confidence)
253
(f :field :source :desc "Source"
254
:get-value (lambda (x) (get-document-source (getf x :docno))))
255
(f :field :verify :desc "Verify"))
260
(let ((row '(:docno "20050101_bbc_b118e8867977ba4feae312bb5ea17ef4")))
262
(deftest test-get-values-date-start ()
263
(check (equal (write-to-string (get-document-starts row))
264
"(\"1991\" \"2003.05\")")))
266
(deftest test-get-values-date-end ()
267
(check (equal (write-to-string(get-document-ends row))
268
"(\"2005\" \"2003.05\")")))
271
(deftest test-get-values-disease ()
272
(check (equal (write-to-string(get-document-diseases row)) "(\"weapons-grade anthrax\")")))
274
(deftest test-get-values-state ()
275
(check (equal (write-to-string(get-document-countries row))
276
"(\"the US\" \"Iraq\" \"Washington\" \"US\")"))))