File transform.jq changed (mode: 100755) (index 0016ceb..16bc0b2) |
4 |
4 |
# Source data combines multiple values into one field, so split that up |
# Source data combines multiple values into one field, so split that up |
5 |
5 |
# also use native data types if possible. |
# also use native data types if possible. |
6 |
6 |
|
|
|
7 |
|
import "./data/species.sci.names" as $species_sci_names; |
|
8 |
|
import "./data/species.common.names" as $species_common_names; |
|
9 |
|
|
7 |
10 |
# Convert Degree.Minute coordinates into decimal notation |
# Convert Degree.Minute coordinates into decimal notation |
8 |
11 |
def as_coord: |
def as_coord: |
9 |
12 |
# Example W073 or W70, degree only, negate |
# Example W073 or W70, degree only, negate |
|
... |
... |
def as_coord: |
35 |
38 |
null |
null |
36 |
39 |
end; |
end; |
37 |
40 |
|
|
|
41 |
|
def as_date: |
|
42 |
|
capture("^(?<date>\\d{1,2}-\\w{3}-\\d{4})") | .date | strptime("%d-%B-%Y") | todateiso8601; |
|
43 |
|
|
|
44 |
|
def as_signal_overlap: |
|
45 |
|
{ |
|
46 |
|
"OF": "Frequency", |
|
47 |
|
"OT": "Time", |
|
48 |
|
"OTF": "Time and Frequency", |
|
49 |
|
"N": "No" |
|
50 |
|
} as $overlap_type | capture("(?<o>O[TF]{1,2}|N)") | $overlap_type[.o]?; |
|
51 |
|
|
|
52 |
|
def as_species_code: |
|
53 |
|
capture("(?<code>[A-C][A-Z]\\d+[A-Z])") | .code; |
|
54 |
|
|
|
55 |
|
def as_species_common_name: |
|
56 |
|
as_species_code | $species_common_names[0][.?]; |
|
57 |
|
|
|
58 |
|
def as_species_sci_name: |
|
59 |
|
as_species_code | $species_sci_names[0][.?]; |
|
60 |
|
|
38 |
61 |
# root |
# root |
39 |
62 |
{ |
{ |
40 |
63 |
# record number is unique, can be used as _id |
# record number is unique, can be used as _id |
41 |
64 |
record_number: .RN, |
record_number: .RN, |
42 |
65 |
note: .NT, |
note: .NT, |
43 |
66 |
# a lot of noise in the original field, only parsing date |
# a lot of noise in the original field, only parsing date |
44 |
|
observation_date: [ |
|
45 |
|
.OD | capture("^(?<date>\\d{1,2}-\\w{3}-\\d{4})") | .date | |
|
46 |
|
strptime("%d-%B-%Y") | todateiso8601 |
|
47 |
|
] | .[0], |
|
48 |
|
last_modified_date: [ |
|
49 |
|
.DA | capture("^(?<date>\\d{1,2}-\\w{3}-\\d{4})") | .date | |
|
50 |
|
strptime("%d-%B-%Y") | todateiso8601 |
|
51 |
|
] | .[0], |
|
|
67 |
|
observation_date: .OD | as_date, |
|
68 |
|
last_modified_date: .DA | as_date, |
52 |
69 |
location: { |
location: { |
53 |
70 |
name: .GB | split("|") | map(gsub("(\\s+)?[A-D][A-Z]\\d+[A-Z](\\s+)?|(X$)"; ""; "gm")), |
name: .GB | split("|") | map(gsub("(\\s+)?[A-D][A-Z]\\d+[A-Z](\\s+)?|(X$)"; ""; "gm")), |
54 |
71 |
coordinates: .GC | split("|") |
coordinates: .GC | split("|") |
|
... |
... |
def as_coord: |
130 |
147 |
} as $class_names | |
} as $class_names | |
131 |
148 |
[ .SC | capture("(?<c>[SMVDUC]{1})") ] | $class_names[.[0].c]? |
[ .SC | capture("(?<c>[SMVDUC]{1})") ] | $class_names[.[0].c]? |
132 |
149 |
] | .[0], |
] | .[0], |
133 |
|
overlap: [ |
|
134 |
|
# overlap lookup table |
|
135 |
|
{ |
|
136 |
|
"OF": "Frequency", |
|
137 |
|
"OT": "Time", |
|
138 |
|
"OTF": "Time and Frequency", |
|
139 |
|
"N": "No" |
|
140 |
|
} as $overlap_type | |
|
141 |
|
[ .SC | capture("(?<o>O[TF]{1,2}|N)") ] | $overlap_type[.[0].o]? |
|
142 |
|
] | .[0], |
|
|
150 |
|
overlap: .SC | as_signal_overlap, |
143 |
151 |
# other general sound producing sources listed in genus field |
# other general sound producing sources listed in genus field |
144 |
152 |
source: ( .GS | split("|") | |
source: ( .GS | split("|") | |
145 |
153 |
map(. as $s | match("\\s+[E-Z]{1}(\\s+)?$"; "m") | |
map(. as $s | match("\\s+[E-Z]{1}(\\s+)?$"; "m") | |
|
... |
... |
def as_coord: |
291 |
299 |
end |
end |
292 |
300 |
) |
) |
293 |
301 |
), |
), |
294 |
|
# Genus name and species code |
|
295 |
|
genus: ( .GS | split("|") | |
|
296 |
|
map(. as $s | match("[A-C][A-Z]\\d+[A-Z](\\s+)?$"; "m") | |
|
|
302 |
|
# Genus |
|
303 |
|
species: .GS | split("|") | |
|
304 |
|
map(. as $s | |
297 |
305 |
{ |
{ |
298 |
|
name: $s[0:.offset] | gsub("^\\s+|\\s+$";""), |
|
299 |
|
species_code: .string | gsub("^\\s+|\\s+$";"") |
|
|
306 |
|
_as_noted: $s | gsub("^\\s+|\\s+$";""), |
|
307 |
|
species_code: $s | as_species_code, |
|
308 |
|
scientific_name: $s | as_species_sci_name, |
|
309 |
|
common_name: $s | as_species_common_name, |
300 |
310 |
}) |
}) |
301 |
|
), |
|
302 |
311 |
} |
} |
303 |
312 |
} |
} |