File transform.jq changed (mode: 100755) (index cd2aa89..3da8644) |
1 |
1 |
#!/usr/bin/jq -fr |
#!/usr/bin/jq -fr |
2 |
2 |
|
|
3 |
3 |
# jq filter chain to transform flat source metadata into object structure. |
# jq filter chain to transform flat source metadata into object structure. |
4 |
|
# Source data combines multiple values into one field, so split that up |
|
5 |
|
# also use native data types if possible. |
|
|
4 |
|
# |
|
5 |
|
# Each transformation is it's own function with documented input examples. |
|
6 |
|
# |
|
7 |
|
# |
|
8 |
|
# - Source data combines multiple values into one field with "|" |
|
9 |
|
# - Use native JSON data types if possible |
|
10 |
|
# - Clean-up whitespace and normalize value formats |
6 |
11 |
|
|
|
12 |
|
# Mapping of species code to names |
|
13 |
|
# Extracted from WHOI website by `download.sh` |
7 |
14 |
import "./data/species.sci.names" as $species_sci_names; |
import "./data/species.sci.names" as $species_sci_names; |
8 |
15 |
import "./data/species.common.names" as $species_common_names; |
import "./data/species.common.names" as $species_common_names; |
9 |
16 |
|
|
|
17 |
|
# |
|
18 |
|
# helper functions |
|
19 |
|
# |
|
20 |
|
|
|
21 |
|
def trim: |
|
22 |
|
# remove leading and trailing whitespace |
|
23 |
|
gsub("^\\s+|\\s+$";""); |
|
24 |
|
|
|
25 |
|
# |
|
26 |
|
# transform functions |
|
27 |
|
# |
|
28 |
|
|
10 |
29 |
# Convert Degree.Minute coordinates into decimal notation |
# Convert Degree.Minute coordinates into decimal notation |
11 |
30 |
def as_coord: |
def as_coord: |
12 |
31 |
# Example W073 or W70, degree only, negate |
# Example W073 or W70, degree only, negate |
|
... |
... |
def as_coord: |
41 |
60 |
def as_date: |
def as_date: |
42 |
61 |
(capture("^(?<date>\\d{1,2}-\\w{3}-\\d{4})") | .date | strptime("%d-%B-%Y") | todateiso8601)//null; |
(capture("^(?<date>\\d{1,2}-\\w{3}-\\d{4})") | .date | strptime("%d-%B-%Y") | todateiso8601)//null; |
43 |
62 |
|
|
44 |
|
def as_signal_overlap: |
|
45 |
|
({ |
|
46 |
|
"OF": "Frequency", |
|
47 |
|
"OT": "Time", |
|
48 |
|
"OTF": "Time and Frequency", |
|
49 |
|
"N": "No" |
|
50 |
|
} as $overlap_type | capture("(?<o>O[TF]{1,2}|N)") | $overlap_type[.o]?)//null ; |
|
|
63 |
|
def as_location_name: |
|
64 |
|
# Location Name |
|
65 |
|
# Remove species code and whitespace |
|
66 |
|
# |
|
67 |
|
# Example source data: |
|
68 |
|
# 2.25 mi. west of Castle Rock, McMurdo Sound, Antarctica CC5A |
|
69 |
|
# 20 mi. NW Gambell, St. Lawrence Island, Alaska CC2A X |
|
70 |
|
# Castle Harbour, Bermuda AC2A |
|
71 |
|
# |
|
72 |
|
# TODO improve clean-up |
|
73 |
|
# jq -r '.GB | split("|")[]' data/rn/*json| sort -u | grep -P '(\s+)?[A-D][A-Z]\d+[A-Z](\s+)?|([\sXO]*$)' |
|
74 |
|
gsub("(\\s+)?[A-D][A-Z]\\d+[A-Z](\\s+)?|(X$)"; ""; "gm"); |
|
75 |
|
|
|
76 |
|
def as_location_coordinates: |
|
77 |
|
# Example source data |
|
78 |
|
# N10BD15A W086BD15A |
|
79 |
|
# N13BA2A W061BA2A |
|
80 |
|
# N13BA2A W061BA2A |
|
81 |
|
# N13BD15B W061BD15B |
|
82 |
|
# N14X W061X |
|
83 |
|
# N75BB2A W075BB2A approx |
|
84 |
|
# S52BD1A W070BD1A |
|
85 |
|
# S71CC14A E170CC14A |
|
86 |
|
map(capture("(?<lat>[NS]{1}\\d{1,4})[A-Z]{1,2}(\\d{1,2})?([A-Z]{1})?\\s+(?<lon>[EW]{1}\\d{1,5})")) | |
|
87 |
|
map({ lat: (.lat | as_coord), lon: (.lon | as_coord) }); |
51 |
88 |
|
|
52 |
89 |
def as_species_code: |
def as_species_code: |
53 |
90 |
(capture("(?<code>[A-C][A-Z]\\d+[A-Z])") | .code)//null; |
(capture("(?<code>[A-C][A-Z]\\d+[A-Z])") | .code)//null; |
|
... |
... |
def as_species_common_name: |
58 |
95 |
def as_species_sci_name: |
def as_species_sci_name: |
59 |
96 |
as_species_code | $species_sci_names[0][.?]; |
as_species_code | $species_sci_names[0][.?]; |
60 |
97 |
|
|
|
98 |
|
# |
|
99 |
|
# Animal |
|
100 |
|
# |
|
101 |
|
|
61 |
102 |
def as_animal_interaction: |
def as_animal_interaction: |
62 |
103 |
# interaction between animals |
# interaction between animals |
63 |
104 |
# always a pair, and multiple sets of pairs are possible |
# always a pair, and multiple sets of pairs are possible |
|
... |
... |
def as_animal_behavior: |
119 |
160 |
. as $b | match("[A-C][A-Z]\\d+[A-Z]([\\s\\.]+)?$"; "m")//false | |
. as $b | match("[A-C][A-Z]\\d+[A-Z]([\\s\\.]+)?$"; "m")//false | |
120 |
161 |
if . then |
if . then |
121 |
162 |
{ |
{ |
122 |
|
type_of: ($b[0:.offset] | gsub("^\\s+|\\s+$";"") | if (.|length) > 0 then . else null end), |
|
123 |
|
species_code: .string | gsub("^\\s+|\\s+$";"") |
|
|
163 |
|
type_of: ($b[0:.offset] | trim | if (.|length) > 0 then . else null end), |
|
164 |
|
species_code: .string | trim |
124 |
165 |
} |
} |
125 |
166 |
# fallback without species code |
# fallback without species code |
126 |
167 |
else |
else |
127 |
|
{ type_of: $b | gsub("^\\s+|\\s+$";"") } |
|
|
168 |
|
{ type_of: $b | trim } |
128 |
169 |
end; |
end; |
129 |
|
|
|
|
170 |
|
|
|
171 |
|
def as_animal_vocal: |
|
172 |
|
# List of vocal animals, name and species code |
|
173 |
|
# All existing entries: |
|
174 |
|
# FB145 #?? BD19D |
|
175 |
|
# FB147 #?? BD19D |
|
176 |
|
# FB150 #?? BD19D |
|
177 |
|
# FB153 #50 Blacktip Doubledip BD19D |
|
178 |
|
# FB34 #30 Wee Willie BD19D |
|
179 |
|
# FB55 #159 BD19D |
|
180 |
|
# FB5 #5 BD19D |
|
181 |
|
# FB73 #35 BD19D |
|
182 |
|
# Keiko |
|
183 |
|
# Keiko BE7A |
|
184 |
|
# Minks BF2A | Jinks BF2A |
|
185 |
|
# Moby Doll |
|
186 |
|
# Moby Doll BE7A |
|
187 |
|
# Olaf CB1A |
|
188 |
|
# Snoopy BA2A |
|
189 |
|
# The lark BE3B |
|
190 |
|
# Wolfie CB1A | Farouk CB1A |
|
191 |
|
# |
|
192 |
|
# create array with objects for each animal |
|
193 |
|
# save input as fallback and split by | |
|
194 |
|
. as $input | $input | split("|") | |
|
195 |
|
# try to match species code |
|
196 |
|
map(. as $s | match("[A-C][A-Z]\\d+[A-Z](\\s+)?$"; "m") | |
|
197 |
|
# create object, anything before matched species code is id |
|
198 |
|
# also trim space from resulting string |
|
199 |
|
{ |
|
200 |
|
animal_id: $s[0:.offset] | trim, |
|
201 |
|
species_code: .string | trim |
|
202 |
|
}) | |
|
203 |
|
# if no object was created, use input as fallback |
|
204 |
|
# this is for entries without a species code like "Keiko" |
|
205 |
|
if (. == [] and ($input|length)>0 ) then [{animal_id: $input}] else . end; |
|
206 |
|
|
|
207 |
|
def as_animal_genus: |
|
208 |
|
map(. as $s | match("[A-C][A-Z]\\d+[A-Z](\\s+)?$"; "m") | |
|
209 |
|
{ |
|
210 |
|
name: $s[0:.offset] | trim, |
|
211 |
|
species_code: .string | trim |
|
212 |
|
}); |
|
213 |
|
|
|
214 |
|
def as_animal_species: |
|
215 |
|
map(. as $s | |
|
216 |
|
{ |
|
217 |
|
_as_noted: $s | trim, |
|
218 |
|
species_code: $s | as_species_code, |
|
219 |
|
scientific_name: $s | as_species_sci_name, |
|
220 |
|
common_name: $s | as_species_common_name, |
|
221 |
|
}); |
|
222 |
|
|
|
223 |
|
# |
|
224 |
|
# Signal |
|
225 |
|
# |
|
226 |
|
|
|
227 |
|
# Cue field contains 3 values describing the postion on tape |
|
228 |
|
# Example input from the docu |
|
229 |
|
# 542 B2:8 8.130 |
|
230 |
|
# 1:03:12 B2:8 8.130 |
|
231 |
|
# however, following formats are also found |
|
232 |
|
# 0:00:00 B30:00 10:20.602 |
|
233 |
|
# 995 B11:28.497 5:20.426 |
|
234 |
|
# 96 B4.00 1.525 |
|
235 |
|
# 93 B23.7 9.164 |
|
236 |
|
# 93 B3:00 2:13.828 |
|
237 |
|
# 01:52:52:04 |
|
238 |
|
# 09:11:00 20:00 951.50 |
|
239 |
|
# 0 B2:00:00 |
|
240 |
|
|
|
241 |
|
def as_signal_position_cue: |
|
242 |
|
# "cue" as in a first matched single integer, |
|
243 |
|
# without dot or colon followed by space or end of string |
|
244 |
|
# do not use \b because of the colon in 00:00 values |
|
245 |
|
capture("(?<c>^\\d+(\\s|$))") | {"cue": (.c | tonumber)}; |
|
246 |
|
|
|
247 |
|
def as_signal_position_time: |
|
248 |
|
# "time" as in first matched integer with 2 or 3 colons |
|
249 |
|
# followed by space or end of string |
|
250 |
|
capture("(?<time>^\\d+:\\d+:\\d+(:\\d+)?(\\s|$))"); |
|
251 |
|
|
|
252 |
|
def as_signal_position_analyzer_buffer_size: |
|
253 |
|
# buffer size, B followed by integer with colon or dot, |
|
254 |
|
# also remove B prefix |
|
255 |
|
# TODO match 2 colon version |
|
256 |
|
capture("(?<analyzer_buffer_size>(?<=B)\\d+[:\\.]\\d+(\\.\\d+)?)"); |
|
257 |
|
|
|
258 |
|
# Signal class encodes multiple values, quality, overlap and class |
|
259 |
|
# |
|
260 |
|
# it's only been used 123 times |
|
261 |
|
# |
|
262 |
|
# Example source data: |
|
263 |
|
# 3 OT |
|
264 |
|
# 3 OTF |
|
265 |
|
# C 4 OF |
|
266 |
|
# D |
|
267 |
|
# M |
|
268 |
|
# N |
|
269 |
|
# No |
|
270 |
|
# NO |
|
271 |
|
# OF |
|
272 |
|
# OT |
|
273 |
|
# OTF |
|
274 |
|
# OTF 3 |
|
275 |
|
# OTF 4 |
|
276 |
|
# S |
|
277 |
|
# S 5 |
|
278 |
|
# U |
|
279 |
|
# V |
|
280 |
|
|
130 |
281 |
def as_signal_quality: |
def as_signal_quality: |
|
282 |
|
# any digit in the signal class indicates quality |
131 |
283 |
(capture("(?<q>\\d+)") | .q | tonumber)//null; |
(capture("(?<q>\\d+)") | .q | tonumber)//null; |
132 |
284 |
|
|
133 |
285 |
def as_signal_class: |
def as_signal_class: |
|
... |
... |
def as_signal_class: |
141 |
293 |
"C": "Calf" |
"C": "Calf" |
142 |
294 |
} as $class_names | capture("(?<c>[SMVDUC]{1})") | $class_names[.c]?)//null; |
} as $class_names | capture("(?<c>[SMVDUC]{1})") | $class_names[.c]?)//null; |
143 |
295 |
|
|
|
296 |
|
def as_signal_overlap: |
|
297 |
|
({ |
|
298 |
|
"OF": "Frequency", |
|
299 |
|
"OT": "Time", |
|
300 |
|
"OTF": "Time and Frequency", |
|
301 |
|
"N": "No" |
|
302 |
|
} as $overlap_type | capture("(?<o>O[TF]{1,2}|N)") | $overlap_type[.o]?)//null ; |
|
303 |
|
|
|
304 |
|
def as_signal_cut_size: |
|
305 |
|
# Signal cut size |
|
306 |
|
# |
|
307 |
|
# Example source data: |
|
308 |
|
# 3.36 |
|
309 |
|
# 9.411 |
|
310 |
|
# 16.564 |
|
311 |
|
# 20.35 |
|
312 |
|
# etc |
|
313 |
|
# only 210 records use a different format, ignored for now |
|
314 |
|
# 2:00.000 |
|
315 |
|
# 1:00.030 |
|
316 |
|
# 10:25.540 |
|
317 |
|
# 1:25.158 |
|
318 |
|
# etc. |
|
319 |
|
# set to null if empty or contains a colon |
|
320 |
|
if (. | contains(":") or (length == 0)) then |
|
321 |
|
null |
|
322 |
|
else |
|
323 |
|
# cast as number and handle a few remaining badly formated |
|
324 |
|
# records like "0.2.95" |
|
325 |
|
(try (. | tonumber) catch null) |
|
326 |
|
end; |
|
327 |
|
|
|
328 |
|
def as_signal_source: |
|
329 |
|
# Other general sound producing sources listed in genus field |
|
330 |
|
# |
|
331 |
|
# Example source data: |
|
332 |
|
# Transient ship noise X |
|
333 |
|
# Ship electrical noise X |
|
334 |
|
# Rain X |
|
335 |
|
# Homo sapiens E |
|
336 |
|
# Crustacea O |
|
337 |
|
map(. as $s | match("\\s+[E-Z]{1}(\\s+)?$"; "m") | |
|
338 |
|
{ |
|
339 |
|
"E": "Primates", |
|
340 |
|
"O": "Crustacea", |
|
341 |
|
"T": "Fossils", |
|
342 |
|
"U": "Uncertain", |
|
343 |
|
"V": "General pinniped", |
|
344 |
|
"W": "General cetacean", |
|
345 |
|
"X": "Ambient noise" |
|
346 |
|
} as $order | |
|
347 |
|
{ |
|
348 |
|
name: $s[0:.offset] | trim, |
|
349 |
|
# not sort order |
|
350 |
|
order: $order[.string | trim] |
|
351 |
|
}); |
|
352 |
|
|
144 |
353 |
def as_sound_channel: |
def as_sound_channel: |
145 |
354 |
# numbers of channels |
# numbers of channels |
146 |
355 |
# input data mostly follows the documentation: |
# input data mostly follows the documentation: |
|
... |
... |
def as_sound_channel: |
152 |
361 |
# not clear what other input values mean exactly |
# not clear what other input values mean exactly |
153 |
362 |
# 211 |
# 211 |
154 |
363 |
(capture("^(?<r>\\d)(?<m>\\d)(?<s>[A-L]$)") | |
(capture("^(?<r>\\d)(?<m>\\d)(?<s>[A-L]$)") | |
155 |
|
{ |
|
156 |
|
recorded: .r | tonumber, |
|
|
364 |
|
{ |
|
365 |
|
recorded: .r | tonumber, |
157 |
366 |
multiplexed: .m | tonumber, |
multiplexed: .m | tonumber, |
158 |
367 |
side: .s |
side: .s |
159 |
368 |
})//null; |
})//null; |
160 |
369 |
|
|
|
370 |
|
|
|
371 |
|
def as_sound_sample_rate: |
|
372 |
|
# plain sample rate as number |
|
373 |
|
# remove dot or colon, and ignore empty strings |
|
374 |
|
# |
|
375 |
|
# Example source data: |
|
376 |
|
# 1000 |
|
377 |
|
# 10,000 |
|
378 |
|
# 10000 |
|
379 |
|
# 100000 |
|
380 |
|
# 10200 |
|
381 |
|
if (. | length > 0) then . | sub("[\\.,]"; "") | tonumber else null end; |
|
382 |
|
|
|
383 |
|
# |
|
384 |
|
# Assemble the object tree |
|
385 |
|
# |
|
386 |
|
|
161 |
387 |
# root |
# root |
162 |
388 |
{ |
{ |
163 |
389 |
# record number is unique, can be used as _id |
# record number is unique, can be used as _id |
164 |
390 |
record_number: .RN, |
record_number: .RN, |
165 |
391 |
note: .NT, |
note: .NT, |
166 |
|
# a lot of noise in the original field, only parsing date |
|
|
392 |
|
# a lot of noise in the "OD" original field, only parsing date |
167 |
393 |
observation_date: .OD | as_date, |
observation_date: .OD | as_date, |
168 |
394 |
last_modified_date: .DA | as_date, |
last_modified_date: .DA | as_date, |
169 |
395 |
location: { |
location: { |
170 |
|
name: .GB | split("|") | map(gsub("(\\s+)?[A-D][A-Z]\\d+[A-Z](\\s+)?|(X$)"; ""; "gm")), |
|
171 |
|
coordinates: .GC | split("|") |
|
172 |
|
| map(capture("(?<lat>[NS]{1}\\d{1,4})[A-Z]{1,2}(\\d{1,2})?([A-Z]{1})?\\s+(?<lon>[EW]{1}\\d{1,5})")) |
|
173 |
|
| map({ lat: (.lat | as_coord), lon: (.lon | as_coord) }) |
|
|
396 |
|
name: .GB | split("|") | map(as_location_name), |
|
397 |
|
coordinates: .GC | split("|") | as_location_coordinates |
174 |
398 |
}, |
}, |
175 |
399 |
# object contains properties of the captured signal |
# object contains properties of the captured signal |
176 |
400 |
signal: { |
signal: { |
177 |
|
# create a list of JSON objects and add them together |
|
178 |
|
|
|
179 |
|
# Cue field contains 3 values describing the postion on tape |
|
180 |
|
# Example input from the docu |
|
181 |
|
# 542 B2:8 8.130 |
|
182 |
|
# 1:03:12 B2:8 8.130 |
|
183 |
|
# however, following formats are also found |
|
184 |
|
# 0:00:00 B30:00 10:20.602 |
|
185 |
|
# 995 B11:28.497 5:20.426 |
|
186 |
|
# 96 B4.00 1.525 |
|
187 |
|
# 93 B23.7 9.164 |
|
188 |
|
# 93 B3:00 2:13.828 |
|
189 |
|
# 01:52:52:04 |
|
190 |
|
# 09:11:00 20:00 951.50 |
|
191 |
|
# 0 B2:00:00 |
|
192 |
401 |
position: [ |
position: [ |
193 |
402 |
# keep the source string as reference? |
# keep the source string as reference? |
194 |
403 |
{_source_cu: .CU}, |
{_source_cu: .CU}, |
195 |
|
|
|
196 |
|
# "cue" as in a first matched single integer, |
|
197 |
|
# without dot or colon followed by space or end of string |
|
198 |
|
# do not use \b because of the colon in 00:00 values |
|
199 |
|
(.CU | capture( "(?<c>^\\d+(\\s|$))" ) | {cue: .c|tonumber } ), |
|
200 |
|
|
|
201 |
|
# "time" as in first matched integer with 2 or 3 colons |
|
202 |
|
# followed by space or end of string |
|
203 |
|
(.CU | capture( "(?<time>^\\d+:\\d+:\\d+(:\\d+)?(\\s|$))" ) ), |
|
204 |
|
|
|
205 |
|
# buffer size, B followed by integer with colon or dot, |
|
206 |
|
# also remove B prefix |
|
207 |
|
# TODO match 2 colon version |
|
208 |
|
(.CU | capture("(?<analyzer_buffer_size>(?<=B)\\d+[:\\.]\\d+(\\.\\d+)?)") ) |
|
|
404 |
|
(.CU | as_signal_position_cue), |
|
405 |
|
(.CU | as_signal_position_time), |
|
406 |
|
(.CU | as_signal_position_analyzer_buffer_size ) |
209 |
407 |
] | add, |
] | add, |
210 |
|
# cut size |
|
211 |
|
# 3.36 |
|
212 |
|
# 9.411 |
|
213 |
|
# 16.564 |
|
214 |
|
# 20.35 |
|
215 |
|
# etc |
|
216 |
|
# only 210 records use a different format, ignored for now |
|
217 |
|
# 2:00.000 |
|
218 |
|
# 1:00.030 |
|
219 |
|
# 10:25.540 |
|
220 |
|
# 1:25.158 |
|
221 |
|
# etc. |
|
222 |
|
cut_size: ( |
|
223 |
|
# set to null if empty or contains a colon |
|
224 |
|
if (.CS | contains(":") or (length == 0)) then |
|
225 |
|
null |
|
226 |
|
else |
|
227 |
|
# cast as number and handle a few remaining badly formated |
|
228 |
|
# records like "0.2.95" |
|
229 |
|
(try (.CS | tonumber) catch null) |
|
230 |
|
end |
|
231 |
|
), |
|
232 |
|
# any digit in the signal class indicates quality |
|
233 |
|
# it's only been used 123 times |
|
|
408 |
|
cut_size: .CS | as_signal_cut_size, |
234 |
409 |
_source_sc: .SC, |
_source_sc: .SC, |
235 |
410 |
quality: .SC | as_signal_quality, |
quality: .SC | as_signal_quality, |
236 |
411 |
class: .SC | as_signal_class, |
class: .SC | as_signal_class, |
237 |
412 |
overlap: .SC | as_signal_overlap, |
overlap: .SC | as_signal_overlap, |
238 |
|
# other general sound producing sources listed in genus field |
|
239 |
|
source: ( .GS | split("|") | |
|
240 |
|
map(. as $s | match("\\s+[E-Z]{1}(\\s+)?$"; "m") | |
|
241 |
|
{ |
|
242 |
|
"E": "Primates", |
|
243 |
|
"O": "Crustacea", |
|
244 |
|
"T": "Fossils", |
|
245 |
|
"U": "Uncertain", |
|
246 |
|
"V": "General pinniped", |
|
247 |
|
"W": "General cetacean", |
|
248 |
|
"X": "Ambient noise" |
|
249 |
|
} as $order | |
|
250 |
|
{ |
|
251 |
|
name: $s[0:.offset] | gsub("^\\s+|\\s+$";""), |
|
252 |
|
# not sort order |
|
253 |
|
order: $order[.string | gsub("^\\s+|\\s+$";"")] |
|
254 |
|
}) |
|
255 |
|
) |
|
|
413 |
|
source: .GS | split("|") | as_signal_source, |
256 |
414 |
}, |
}, |
257 |
415 |
sound: { |
sound: { |
258 |
|
# plain sample rate as number, however not normalized in digit length |
|
259 |
|
# remove dot or colon, and ignore empty strings |
|
260 |
|
# a bit difficult to tell what is hz and what khz |
|
261 |
|
sample_rate: ( |
|
262 |
|
if (.SR | length > 0) then |
|
263 |
|
.SR | sub("[\\.,]"; "") | tonumber |
|
264 |
|
else |
|
265 |
|
null |
|
266 |
|
end |
|
267 |
|
), |
|
|
416 |
|
sample_rate: .SR | as_sound_sample_rate, |
268 |
417 |
channel: [ |
channel: [ |
269 |
|
{"_source_nc": .NC }, |
|
|
418 |
|
{"_source_nc": .NC }, |
270 |
419 |
(.NC | as_sound_channel) |
(.NC | as_sound_channel) |
271 |
420 |
] | add |
] | add |
272 |
421 |
}, |
}, |
273 |
422 |
animal: { |
animal: { |
274 |
423 |
_source_id: .ID, |
_source_id: .ID, |
275 |
424 |
# List of vocal animals, name and species code |
# List of vocal animals, name and species code |
276 |
|
# All existing entries: |
|
277 |
|
# FB145 #?? BD19D |
|
278 |
|
# FB147 #?? BD19D |
|
279 |
|
# FB150 #?? BD19D |
|
280 |
|
# FB153 #50 Blacktip Doubledip BD19D |
|
281 |
|
# FB34 #30 Wee Willie BD19D |
|
282 |
|
# FB55 #159 BD19D |
|
283 |
|
# FB5 #5 BD19D |
|
284 |
|
# FB73 #35 BD19D |
|
285 |
|
# Keiko |
|
286 |
|
# Keiko BE7A |
|
287 |
|
# Minks BF2A | Jinks BF2A |
|
288 |
|
# Moby Doll |
|
289 |
|
# Moby Doll BE7A |
|
290 |
|
# Olaf CB1A |
|
291 |
|
# Snoopy BA2A |
|
292 |
|
# The lark BE3B |
|
293 |
|
# Wolfie CB1A | Farouk CB1A |
|
294 |
|
# |
|
295 |
|
# create array with objects for each animal |
|
296 |
|
# save input as fallback and split by | |
|
297 |
|
vocal: ( .ID as $input | $input | split("|") | |
|
298 |
|
# try to match species code |
|
299 |
|
map(. as $s | match("[A-C][A-Z]\\d+[A-Z](\\s+)?$"; "m") | |
|
300 |
|
# create object, anything before matched species code is id |
|
301 |
|
# also trim space from resulting string |
|
302 |
|
{ |
|
303 |
|
animal_id: $s[0:.offset] | gsub("^\\s+|\\s+$";""), |
|
304 |
|
species_code: .string | gsub("^\\s+|\\s+$";"") |
|
305 |
|
}) | |
|
306 |
|
# if no object was created, use input as fallback |
|
307 |
|
# this is for entries without a species code like "Keiko" |
|
308 |
|
if (. == [] and ($input|length)>0 ) then |
|
309 |
|
[{animal_id: $input}] |
|
310 |
|
else |
|
311 |
|
. |
|
312 |
|
end |
|
313 |
|
), |
|
|
425 |
|
vocal: .ID | as_animal_vocal, |
314 |
426 |
# age, sex and id, a animal profile |
# age, sex and id, a animal profile |
315 |
427 |
profile: .AG | as_animal_profile, |
profile: .AG | as_animal_profile, |
316 |
428 |
# interaction between animals |
# interaction between animals |
317 |
429 |
interaction: .IA | split("|") | map([as_animal_interaction]), |
interaction: .IA | split("|") | map([as_animal_interaction]), |
318 |
|
# behavior type and species code |
|
319 |
430 |
# species code not always present, use input as fallback |
# species code not always present, use input as fallback |
320 |
431 |
behavior: .BH | split("|") | map(as_animal_behavior), |
behavior: .BH | split("|") | map(as_animal_behavior), |
321 |
432 |
# Genus name and species code |
# Genus name and species code |
322 |
|
genus: ( .GS | split("|") | |
|
323 |
|
map(. as $s | match("[A-C][A-Z]\\d+[A-Z](\\s+)?$"; "m") | |
|
324 |
|
{ |
|
325 |
|
name: $s[0:.offset] | gsub("^\\s+|\\s+$";""), |
|
326 |
|
species_code: .string | gsub("^\\s+|\\s+$";"") |
|
327 |
|
}) |
|
328 |
|
) |
|
|
433 |
|
genus: .GS | split("|") | as_animal_genus, |
329 |
434 |
# Species |
# Species |
330 |
|
#species: .GS | split("|") | |
|
331 |
|
# map(. as $s | |
|
332 |
|
# { |
|
333 |
|
# _as_noted: $s | gsub("^\\s+|\\s+$";""), |
|
334 |
|
# species_code: $s | as_species_code, |
|
335 |
|
# scientific_name: $s | as_species_sci_name, |
|
336 |
|
# common_name: $s | as_species_common_name, |
|
337 |
|
# }) |
|
|
435 |
|
# species: .GS | split("|") | as_animal_species, |
338 |
436 |
} |
} |
339 |
437 |
} |
} |