dleucas / wmmsdb (public) (License: GPLv3) (since 2018-07-08) (hash sha1)
A collection of scripts to download, transform and normalize the Watkins Marine Mammal Sound Database.

Credit:

“Watkins Marine Mammal Sound Database, Woods Hole Oceanographic Institution.”

http://cis.whoi.edu/science/B/whalesounds/index.cfm
List of commits:
Subject Hash Author Date (UTC)
more conversion to functions. match old output for now fc4a8157a6902f4571b54c6ab84174f005adbe0d dleucas 2021-12-18 10:26:23
WIP convert filters to functions 32badc3512dd9094d51ba2cc2ef8112eba2698bf dleucas 2021-12-16 18:33:21
convert html only once. extract species names as json. formating and lint. e755dc7f4fe2d7c9b97826a0f3f2cf5385e90ef9 dleucas 2021-12-16 13:35:31
download once. use wget only. get species names. test for commands. formating 572dbf1eaffe17c43a4a01dc9675737628c5a234 dleucas 2021-12-16 12:14:26
add filter by behavior type, sort by modified date c3f9f9f9d9501e714117af7fff573e7f3fa4052b dleucas 2019-06-14 03:51:19
rename type to type_of 4269dc257530a9a7fa21ff8708f4594a2f1a453d dleucas 2019-06-14 03:39:49
ElasticSearch setting for larger HTTP request e83e501f949473538096f984220934c0a51de0b4 dleucas 2019-06-14 03:25:22
rename type to type_of e1fcd27b05eabc8bce06751a9925200e4707168b dleucas 2019-06-14 02:34:15
add animal behavior transformation and documentation 7550db3bbd1c69c9369cf8dfe3a5d1195e761ae2 dleucas 2019-06-14 00:57:16
add lost modified date c4922a44cebebd63da6c23a2a71f97cdb47b4a68 dleucas 2019-06-12 22:46:35
describe remaining db fields e3c7f44ad24a3d7c8e4eb74c777a4eecc3675d75 dleucas 2019-06-11 21:37:06
WIP document acoustat 3a47cfcfa204503f682879d7485a6ef941e248e4 dleucas 2019-06-07 00:17:30
WIP document acoustat 4591875fd32c1c91d20133ff90dcf5676b3c216c dleucas 2019-06-07 00:08:18
WIP document acoustat ccc4a6de663a7272ee3d5777fe1479af549e9938 dleucas 2019-06-06 01:39:08
WIP document acoustat 1c6b03267e3016d9b637775df2b4b153866ac040 dleucas 2019-06-05 22:39:39
add dependency on nav.html and pandoc.css b4f054eb6675117d576fa9462220bc5bc8d15be4 dleucas 2019-06-01 01:12:44
nav title 7c5fadd2e143028d614fab4c31ed7389ed17e6f6 dleucas 2019-06-01 01:12:04
document world map 07b70f4b85731456b559edeecea12b339e724aaf dleucas 2019-06-01 01:11:43
use live ElasticSearch URL in example query b6df3b91ba395f62003fa89f1d6ae3f6a705ea9e dleucas 2019-05-31 23:38:40
Document geo coordinates based on the .GC field d3c9fc90773c6252209074583a00b290633f340c dleucas 2019-05-31 23:24:02
Commit fc4a8157a6902f4571b54c6ab84174f005adbe0d - more conversion to functions. match old output for now
Author: dleucas
Author date (UTC): 2021-12-18 10:26
Committer name: dleucas
Committer date (UTC): 2021-12-18 10:26
Parent(s): 32badc3512dd9094d51ba2cc2ef8112eba2698bf
Signing key:
Tree: 4670619fe3d8629ba4bccf529c8e2055ce108550
File Lines added Lines deleted
transform.jq 125 98
File transform.jq changed (mode: 100755) (index 16bc0b2..cd2aa89)
... ... def as_coord:
39 39 end; end;
40 40
41 41 def as_date: def as_date:
42 capture("^(?<date>\\d{1,2}-\\w{3}-\\d{4})") | .date | strptime("%d-%B-%Y") | todateiso8601;
42 (capture("^(?<date>\\d{1,2}-\\w{3}-\\d{4})") | .date | strptime("%d-%B-%Y") | todateiso8601)//null;
43 43
44 44 def as_signal_overlap: def as_signal_overlap:
45 {
45 ({
46 46 "OF": "Frequency", "OF": "Frequency",
47 47 "OT": "Time", "OT": "Time",
48 48 "OTF": "Time and Frequency", "OTF": "Time and Frequency",
49 49 "N": "No" "N": "No"
50 } as $overlap_type | capture("(?<o>O[TF]{1,2}|N)") | $overlap_type[.o]?;
50 } as $overlap_type | capture("(?<o>O[TF]{1,2}|N)") | $overlap_type[.o]?)//null ;
51 51
52 52 def as_species_code: def as_species_code:
53 capture("(?<code>[A-C][A-Z]\\d+[A-Z])") | .code;
53 (capture("(?<code>[A-C][A-Z]\\d+[A-Z])") | .code)//null;
54 54
55 55 def as_species_common_name: def as_species_common_name:
56 56 as_species_code | $species_common_names[0][.?]; as_species_code | $species_common_names[0][.?];
 
... ... def as_species_common_name:
58 58 def as_species_sci_name: def as_species_sci_name:
59 59 as_species_code | $species_sci_names[0][.?]; as_species_code | $species_sci_names[0][.?];
60 60
61 def as_animal_interaction:
62 # interaction between animals
63 # always a pair, and multiple sets of pairs are possible
64 # [[{}, {}]] or [[{} {}], [{} {}], ...]
65 #
66 # Example source data:
67 # FCFB147 FCFB145
68 # FCFB147 FCFB145 | FFFB147 FFFB149 | FFFB145 FFFB149 | FCFB153 FCFB150
69 # FCFB153 FCFB150
70 # FCFB5 FCFB55
71 # FCFB73 FCFB34
72 capture("(?<type>[FMC]{2})(?<animal_id>FB\\d+)"; "g")//null;
73
74 def as_animal_profile:
75 # age, sex and id, a animal profile
76 # ignoring species code
77 #
78 # Example source data:
79 # F03FB55 F1986FB55
80 # F26FB5 F1963FB5
81 # F??FB145 F????FB145
82 # F??FB147 F????FB147
83 # F??FB153 F????FB153
84 # F??FB73 F????FB73
85 # F??FB73 F????FB7370
86 # M05FB150 M1984FB150
87 # M17Keiko M1975Keiko BE7A
88 # M17Keiko M1975Keiko BE7A
89 # M??FB34 M????FB34
90 # M??FB73 M????FB73
91 capture("^(?<sex>[FM])" +
92 "(?<age>[\\?\\d]{2})" +
93 "(?<animal_id>(FB\\d+|\\w+))" +
94 "\\s+" +
95 "[FM](?<birth_year>[\\d\\?]{4})")//null |
96 {"F": "Female", "M": "Male"} as $sex |
97 {
98 sex: (if (.sex != null) then $sex[.sex] else null end),
99 age: (try (.age | tonumber) catch null),
100 animal_id: .animal_id,
101 birth_year: (try (.birth_year | tonumber) catch null)
102 };
103
104 def as_animal_behavior:
105 # Behavior of the recorded animal with species code
106 # species code not always present, use input as fallback
107 #
108 # Example source data:
109 # Approaching ship BA2A
110 # BA2A A few larger whales seen mixed with others
111 # BE7A
112 # Bow riding BD17A
113 # Courtship CB1A
114 # Dive BA2A
115 # Feeding AA3A
116
117 # find the species code position and use the text before as behavior
118 # match() returns "empty" which we can not test with if
119 . as $b | match("[A-C][A-Z]\\d+[A-Z]([\\s\\.]+)?$"; "m")//false |
120 if . then
121 {
122 type_of: ($b[0:.offset] | gsub("^\\s+|\\s+$";"") | if (.|length) > 0 then . else null end),
123 species_code: .string | gsub("^\\s+|\\s+$";"")
124 }
125 # fallback without species code
126 else
127 { type_of: $b | gsub("^\\s+|\\s+$";"") }
128 end;
129
130 def as_signal_quality:
131 (capture("(?<q>\\d+)") | .q | tonumber)//null;
132
133 def as_signal_class:
134 # class name lookup table
135 ({
136 "S": "Signature",
137 "M": "Mimic",
138 "V": "Variant",
139 "D": "Deletion",
140 "U": "Uncharacteristic",
141 "C": "Calf"
142 } as $class_names | capture("(?<c>[SMVDUC]{1})") | $class_names[.c]?)//null;
143
144 def as_sound_channel:
145 # numbers of channels
146 # input data mostly follows the documentation:
147 # 11A
148 # 11B
149 # 41D
150 # 21L
151 # regex will match only those
152 # not clear what other input values mean exactly
153 # 211
154 (capture("^(?<r>\\d)(?<m>\\d)(?<s>[A-L]$)") |
155 {
156 recorded: .r | tonumber,
157 multiplexed: .m | tonumber,
158 side: .s
159 })//null;
160
61 161 # root # root
62 162 { {
63 163 # record number is unique, can be used as _id # record number is unique, can be used as _id
 
... ... def as_species_sci_name:
131 231 ), ),
132 232 # any digit in the signal class indicates quality # any digit in the signal class indicates quality
133 233 # it's only been used 123 times # it's only been used 123 times
134 # note enclosing [] instead of (), otherwise capture() will remove
135 # non-matching items
136 234 _source_sc: .SC, _source_sc: .SC,
137 quality: [ .SC | capture("(?<q>\\d+)") | .q | tonumber ] | .[0],
138 class: [
139 # class name lookup table
140 {
141 "S": "Signature",
142 "M": "Mimic",
143 "V": "Variant",
144 "D": "Deletion",
145 "U": "Uncharacteristic",
146 "C": "Calf"
147 } as $class_names |
148 [ .SC | capture("(?<c>[SMVDUC]{1})") ] | $class_names[.[0].c]?
149 ] | .[0],
235 quality: .SC | as_signal_quality,
236 class: .SC | as_signal_class,
150 237 overlap: .SC | as_signal_overlap, overlap: .SC | as_signal_overlap,
151 238 # other general sound producing sources listed in genus field # other general sound producing sources listed in genus field
152 239 source: ( .GS | split("|") | source: ( .GS | split("|") |
 
... ... def as_species_sci_name:
178 265 null null
179 266 end end
180 267 ), ),
181 # numbers of channels
182 # input data mostly follows the documentation:
183 # 11A
184 # 11B
185 # 41D
186 # 21L
187 # regex will match only those
188 # not clear what other input values mean exactly
189 # 211
190 268 channel: [ channel: [
191 {_source_nc: .NC},
192 (
193 .NC |
194 capture("^(?<r>\\d)(?<m>\\d)(?<s>[A-L]$)") |
195 {
196 recorded: .r | tonumber,
197 multiplexed: .m | tonumber,
198 side: .s
199 }
200 )
269 {"_source_nc": .NC },
270 (.NC | as_sound_channel)
201 271 ] | add ] | add
202 272 }, },
203 273 animal: { animal: {
 
... ... def as_species_sci_name:
242 312 end end
243 313 ), ),
244 314 # age, sex and id, a animal profile # age, sex and id, a animal profile
245 # source data, ignoring species code
246 # F03FB55 F1986FB55
247 # F26FB5 F1963FB5
248 # F??FB145 F????FB145
249 # F??FB147 F????FB147
250 # F??FB153 F????FB153
251 # F??FB73 F????FB73
252 # F??FB73 F????FB7370
253 # M05FB150 M1984FB150
254 # M17Keiko M1975Keiko BE7A
255 # M17Keiko M1975Keiko BE7A
256 # M??FB34 M????FB34
257 # M??FB73 M????FB73
258 profile: [.AG |
259 capture("^(?<sex>[FM])" +
260 "(?<age>[\\?\\d]{2})" +
261 "(?<animal_id>(FB\\d+|\\w+))" +
262 "\\s+" +
263 "[FM](?<birth_year>[\\d\\?]{4})")
264 ] | .[0] |
265 (
266 {"F": "Female", "M": "Male"} as $sex |
267 {
268 sex: (if (.sex != null) then $sex[.sex] else null end),
269 age: (try (.age | tonumber) catch null),
270 animal_id: .animal_id,
271 birth_year: (try (.birth_year | tonumber) catch null)
272 }
273 ),
315 profile: .AG | as_animal_profile,
274 316 # interaction between animals # interaction between animals
275 # always a pair, and multiple sets of pairs are possible
276 # [[{}, {}]] or [[{} {}], [{} {}], ...]
277 # source data
278 # FCFB147 FCFB145
279 # FCFB147 FCFB145 | FFFB147 FFFB149 | FFFB145 FFFB149 | FCFB153 FCFB150
280 # FCFB153 FCFB150
281 # FCFB5 FCFB55
282 # FCFB73 FCFB34
283 interaction: ( .IA | split("|") |
284 map([capture("(?<type>[FMC]{2})(?<animal_id>FB\\d+)"; "g")])
285 ),
317 interaction: .IA | split("|") | map([as_animal_interaction]),
286 318 # behavior type and species code # behavior type and species code
287 319 # species code not always present, use input as fallback # species code not always present, use input as fallback
288 behavior: ( .BH | split("|") |
289 # match() returns "empty" which we can not test with if
290 map(. as $b | match("[A-C][A-Z]\\d+[A-Z]([\\s\\.]+)?$"; "m")//false |
291 if . then
320 behavior: .BH | split("|") | map(as_animal_behavior),
321 # Genus name and species code
322 genus: ( .GS | split("|") |
323 map(. as $s | match("[A-C][A-Z]\\d+[A-Z](\\s+)?$"; "m") |
292 324 { {
293 type_of: ($b[0:.offset] | gsub("^\\s+|\\s+$";"") | if (.|length) > 0 then . else null end),
325 name: $s[0:.offset] | gsub("^\\s+|\\s+$";""),
294 326 species_code: .string | gsub("^\\s+|\\s+$";"") species_code: .string | gsub("^\\s+|\\s+$";"")
295 }
296 # fallback without species code
297 else
298 { type_of: $b | gsub("^\\s+|\\s+$";"") }
299 end
300 )
301 ),
302 # Genus
303 species: .GS | split("|") |
304 map(. as $s |
305 {
306 _as_noted: $s | gsub("^\\s+|\\s+$";""),
307 species_code: $s | as_species_code,
308 scientific_name: $s | as_species_sci_name,
309 common_name: $s | as_species_common_name,
310 327 }) })
328 )
329 # Species
330 #species: .GS | split("|") |
331 # map(. as $s |
332 # {
333 # _as_noted: $s | gsub("^\\s+|\\s+$";""),
334 # species_code: $s | as_species_code,
335 # scientific_name: $s | as_species_sci_name,
336 # common_name: $s | as_species_common_name,
337 # })
311 338 } }
312 339 } }
Hints:
Before first commit, do not forget to setup your git environment:
git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):
git clone https://rocketgit.com/user/dleucas/wmmsdb

Clone this repository using ssh (do not forget to upload a key first):
git clone ssh://rocketgit@ssh.rocketgit.com/user/dleucas/wmmsdb

Clone this repository using git:
git clone git://git.rocketgit.com/user/dleucas/wmmsdb

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a merge request:
... clone the repository ...
... make some changes and some commits ...
git push origin main