Coverage report: /home/samppa/personal/opiskelu/ohtuprojekti/pulsu/trunk/files.lisp
Kind | Covered | All | % |
expression | 61 | 185 | 33.0 |
branch | 2 | 16 | 12.5 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
; This file contains functions for handling the document text, .auxil
4
(in-package :puls.controller)
6
(defun get-document-text (docno)
7
"Gets the full text of the document identified by docno.
8
You need to have the documents directory (available from
9
/home/group/pulsu/documents) in your project root for this to work."
10
(let ((year (subseq docno 0 4))
11
(month (subseq docno 4 6)))
12
(flex:octets-to-string ; documents are stored as latin1, we want them
13
(flex:string-to-octets ; in unicode to make offsets match :/
15
(merge-pathnames (format nil "documents/~A/~A/~A/~A"
18
cl-user::*project-root*))
19
:external-format :utf-8))))
21
(defun get-document-title (docno)
22
"Gets title of the document"
25
(get-document-text docno)
30
(defun get-trigger-spans (row)
31
"Returns a list of trigger spans for the IE-event identified by row
32
You need to have the documents directory, see the docstring of get-document-text"
33
(let ((text (get-document-text (getf row :docno))))
36
(get-trigger-offsets row))))
38
(defun get-trigger-offsets (row)
40
(mapcar #'parse-integer
41
(delete-if (lambda (x) (equal x ""))
42
(cl-ppcre:split "#+" (getf row :offsets))))))
44
(defun get-snippet (row)
45
"Returns snippet for case described by row"
46
(when (get-trigger-offsets row)
48
(loop :with (beg end) = (get-trigger-offsets row)
49
:with text = (get-document-text (getf row :docno))
50
:for span :in (get-spans-of-type (get-document-auxil
53
:when (and (<= (get-begin span) beg)
54
(>= (get-end span) end))
55
:collecting (subseq text
61
(defun get-document-auxil (docno)
62
(let* ((year (subseq docno 0 4))
63
(month (subseq docno 4 6))
64
(filepath (format nil "~Adocuments/~A/~A/~A/~A.auxil"
65
cl-user::*project-root*
68
(if (probe-file filepath)
69
(first (puls.auxil::parse-auxil-file filepath)))))
73
(defun get-all-fields-of-type (docno typ &optional (typ2 :text))
75
(loop :for i :in (puls.auxil:get-spans-of-type
76
(get-document-auxil docno) typ)
77
:do (pushnew (puls.auxil:get-property i typ2) list
81
(defun get-canonical (docno word typ)
82
(dolist (i (puls.auxil:get-spans-of-type (get-document-auxil docno) typ))
83
(when (string= word (puls.auxil:get-property i :text))
84
(return-from get-canonical (puls.auxil:get-property i :canon)))))
89
(defun get-document-paf (docno)
90
(let* ((year (subseq docno 0 4))
91
(month (subseq docno 4 6))
92
(filepath (format nil "~Adocuments/~A/~A/~A/~A.paf"
93
cl-user::*project-root*
96
(if (probe-file filepath)
97
(puls.auxil::parse-paf-file filepath))))
99
(defun get-document-source (docno)
100
(puls.auxil:get-property (get-document-paf docno) '(:url :url)))
105
(deftest test-get-document-source ()
106
(check (equal (get-document-source "20050101_bbc_b118e8867977ba4feae312bb5ea17ef4") NIL)))
108
(deftest test-get-document-auxil ()
109
(let* ((aux (get-document-auxil "20050101_bbc_b118e8867977ba4feae312bb5ea17ef4")))
111
(check (equal (write-to-string aux) "#I(PULS.AUXIL:AUXIL PULS.AUXIL::PROPS
112
(:DOC (:DOCID \"20050101_bbc_b118e8867977ba4feae312bb5ea17ef4\") :SPANS
113
(#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS (:TYPE :S :V :NIL)
114
PULS.AUXIL::BEGIN 55 PULS.AUXIL::END 88)
115
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS (:TYPE :S)
116
PULS.AUXIL::BEGIN 89 PULS.AUXIL::END 203)
117
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS (:TYPE :S)
118
PULS.AUXIL::BEGIN 204 PULS.AUXIL::END 351)
119
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS (:TYPE :S)
120
PULS.AUXIL::BEGIN 352 PULS.AUXIL::END 439)
121
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS (:TYPE :S)
122
PULS.AUXIL::BEGIN 440 PULS.AUXIL::END 553)
123
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS
124
(:TYPE :STATE :TEXT \"US\" :CANON \"USA\") PULS.AUXIL::BEGIN 501
126
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS
127
(:TYPE :DATE :TEXT \"May 2003\" :START \"2003.05\" :END \"2003.05\"
129
PULS.AUXIL::BEGIN 514 PULS.AUXIL::END 522)
130
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS (:TYPE :S)
131
PULS.AUXIL::BEGIN 554 PULS.AUXIL::END 611)
132
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS
133
(:TYPE :STATE :TEXT \"US\" :CANON \"USA\") PULS.AUXIL::BEGIN 558
135
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS (:TYPE :S)
136
PULS.AUXIL::BEGIN 613 PULS.AUXIL::END 730)
137
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS
138
(:TYPE :STATE :TEXT \"US\" :CANON \"USA\") PULS.AUXIL::BEGIN 688
140
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS (:TYPE :S)
141
PULS.AUXIL::BEGIN 731 PULS.AUXIL::END 824)
142
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS (:TYPE :S)
143
PULS.AUXIL::BEGIN 826 PULS.AUXIL::END 915)
144
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS (:TYPE :S)
145
PULS.AUXIL::BEGIN 917 PULS.AUXIL::END 1082)
146
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS (:TYPE :S)
147
PULS.AUXIL::BEGIN 1083 PULS.AUXIL::END 1190)
148
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS (:TYPE :S)
149
PULS.AUXIL::BEGIN 1191 PULS.AUXIL::END 1320)
150
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS
151
(:TYPE :STATE :TEXT \"US\" :CANON \"USA\") PULS.AUXIL::BEGIN 1287
152
PULS.AUXIL::END 1289)
153
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS (:TYPE :S)
154
PULS.AUXIL::BEGIN 1321 PULS.AUXIL::END 1459)
155
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS (:TYPE :S :V :NIL)
156
PULS.AUXIL::BEGIN 1460 PULS.AUXIL::END 1588)
157
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS (:TYPE :S)
158
PULS.AUXIL::BEGIN 1589 PULS.AUXIL::END 1754)
159
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS
160
(:TYPE :STATE :TEXT \"Washington\" :CANON \"USA/Washington\")
161
PULS.AUXIL::BEGIN 1613 PULS.AUXIL::END 1623)
162
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS
163
(:TYPE :STATE :TEXT \"US\" :CANON \"USA\") PULS.AUXIL::BEGIN 1625
164
PULS.AUXIL::END 1627)
165
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS
166
(:TYPE :STATE :TEXT \"Iraq\" :CANON \"Iraq\") PULS.AUXIL::BEGIN 1693
167
PULS.AUXIL::END 1697)
168
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS
169
(:TYPE :DATE :TEXT \"after the 1991\" :START \"1991\" :END \"2005\")
170
PULS.AUXIL::BEGIN 1730 PULS.AUXIL::END 1744)
171
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS (:TYPE :S)
172
PULS.AUXIL::BEGIN 1755 PULS.AUXIL::END 1926)
173
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS
174
(:TYPE :STATE :TEXT \"the US\" :CANON \"USA\") PULS.AUXIL::BEGIN 1804
175
PULS.AUXIL::END 1810)
176
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS
177
(:TYPE :STATE :TEXT \"Iraq\" :CANON \"Iraq\") PULS.AUXIL::BEGIN 1891
178
PULS.AUXIL::END 1895)
179
#I(PULS.AUXIL:ANNOTATED-SPAN PULS.AUXIL::PROPS
180
(:TYPE :DISEASE :TEXT \"weapons-grade anthrax\" :ALIAS \"anthrax\" :CANON
182
PULS.AUXIL::BEGIN 1904 PULS.AUXIL::END 1925))))"