File transform.jq changed (mode: 100644) (index 341859d..45aa5be) |
2 |
2 |
# Source data combines multiple values into one field, so split that up |
# Source data combines multiple values into one field, so split that up |
3 |
3 |
# also use native data types if possible. |
# also use native data types if possible. |
4 |
4 |
|
|
5 |
|
{ |
|
6 |
|
# record number is unique, use as _id |
|
7 |
|
_id: .RN, |
|
8 |
|
# object contains properties of the captured signal |
|
9 |
|
signal: { |
|
10 |
|
# create a list of JSON objects and add them together |
|
|
5 |
|
{ |
|
6 |
|
# record number is unique, use as _id |
|
7 |
|
_id: .RN, |
|
8 |
|
# object contains properties of the captured signal |
|
9 |
|
signal: { |
|
10 |
|
# create a list of JSON objects and add them together |
11 |
11 |
|
|
12 |
|
# Cue field contains 3 values describing the postion on tape |
|
13 |
|
# Example input from the docu |
|
14 |
|
# 542 B2:8 8.130 |
|
15 |
|
# 1:03:12 B2:8 8.130 |
|
16 |
|
# however, following formats are also found |
|
17 |
|
# 0:00:00 B30:00 10:20.602 |
|
18 |
|
# 995 B11:28.497 5:20.426 |
|
19 |
|
# 96 B4.00 1.525 |
|
20 |
|
# 93 B23.7 9.164 |
|
21 |
|
# 93 B3:00 2:13.828 |
|
22 |
|
# 01:52:52:04 |
|
23 |
|
# 09:11:00 20:00 951.50 |
|
24 |
|
# 0 B2:00:00 |
|
25 |
|
position: [ |
|
26 |
|
# keep the source string as reference? |
|
27 |
|
{_source_string: .CU}, |
|
|
12 |
|
# Cue field contains 3 values describing the postion on tape |
|
13 |
|
# Example input from the docu |
|
14 |
|
# 542 B2:8 8.130 |
|
15 |
|
# 1:03:12 B2:8 8.130 |
|
16 |
|
# however, following formats are also found |
|
17 |
|
# 0:00:00 B30:00 10:20.602 |
|
18 |
|
# 995 B11:28.497 5:20.426 |
|
19 |
|
# 96 B4.00 1.525 |
|
20 |
|
# 93 B23.7 9.164 |
|
21 |
|
# 93 B3:00 2:13.828 |
|
22 |
|
# 01:52:52:04 |
|
23 |
|
# 09:11:00 20:00 951.50 |
|
24 |
|
# 0 B2:00:00 |
|
25 |
|
position: [ |
|
26 |
|
# keep the source string as reference? |
|
27 |
|
{_source_string: .CU}, |
28 |
28 |
|
|
29 |
|
# "cue" as in a first matched single integer, |
|
30 |
|
# without dot or colon followed by space or end of string |
|
31 |
|
# do not use \b because of the colon in 00:00 values |
|
32 |
|
(.CU | capture( "(?<cue>^\\d+(\\s|$))" ) | {cue: .cue|tonumber } ), |
|
|
29 |
|
# "cue" as in a first matched single integer, |
|
30 |
|
# without dot or colon followed by space or end of string |
|
31 |
|
# do not use \b because of the colon in 00:00 values |
|
32 |
|
(.CU | capture( "(?<c>^\\d+(\\s|$))" ) | {cue: .c|tonumber } ), |
33 |
33 |
|
|
34 |
|
# "time" as in first matched integer with 2 or 3 colons |
|
35 |
|
# followed by space or end of string |
|
36 |
|
(.CU | capture( "(?<time>^\\d+:\\d+:\\d+(:\\d+)?(\\s|$))" ) ), |
|
|
34 |
|
# "time" as in first matched integer with 2 or 3 colons |
|
35 |
|
# followed by space or end of string |
|
36 |
|
(.CU | capture( "(?<time>^\\d+:\\d+:\\d+(:\\d+)?(\\s|$))" ) ), |
37 |
37 |
|
|
38 |
|
# buffer size, B followed by integer with colon or dot, also remove B prefix |
|
39 |
|
# TODO match 2 colon version |
|
40 |
|
(.CU | capture("(?<analyzer_buffer_size>(?<=B)\\d+[:\\.]\\d+(\\.\\d+)?)") ) |
|
41 |
|
] | add, |
|
42 |
|
# cut size |
|
43 |
|
# 3.36 |
|
44 |
|
# 9.411 |
|
45 |
|
# 16.564 |
|
46 |
|
# 20.35 |
|
47 |
|
# etc |
|
48 |
|
# only 210 records use a different format, ignored for now |
|
49 |
|
# 2:00.000 |
|
50 |
|
# 1:00.030 |
|
51 |
|
# 10:25.540 |
|
52 |
|
# 1:25.158 |
|
53 |
|
# etc. |
|
54 |
|
cut_size: ( |
|
55 |
|
# skip empty records, or with a colon |
|
56 |
|
if (.CS | contains(":") or (length == 0)) then |
|
57 |
|
empty |
|
58 |
|
else |
|
59 |
|
# cast as number and handle a few remaining badly formated records like "0.2.95" |
|
60 |
|
(try (.CS | tonumber) catch empty) |
|
61 |
|
end |
|
62 |
|
), |
|
63 |
|
# any digit in the signal class indicates quality |
|
64 |
|
# it's only been used 121 times |
|
65 |
|
quality: ( .SC | capture("(?<q>\\d+)") | .q | tonumber ) |
|
66 |
|
#_source_string_sc: .SC, |
|
67 |
|
#class: ( |
|
|
38 |
|
# buffer size, B followed by integer with colon or dot, |
|
39 |
|
# also remove B prefix |
|
40 |
|
# TODO match 2 colon version |
|
41 |
|
(.CU | capture("(?<analyzer_buffer_size>(?<=B)\\d+[:\\.]\\d+(\\.\\d+)?)") ) |
|
42 |
|
] | add, |
|
43 |
|
# cut size |
|
44 |
|
# 3.36 |
|
45 |
|
# 9.411 |
|
46 |
|
# 16.564 |
|
47 |
|
# 20.35 |
|
48 |
|
# etc |
|
49 |
|
# only 210 records use a different format, ignored for now |
|
50 |
|
# 2:00.000 |
|
51 |
|
# 1:00.030 |
|
52 |
|
# 10:25.540 |
|
53 |
|
# 1:25.158 |
|
54 |
|
# etc. |
|
55 |
|
cut_size: ( |
|
56 |
|
# skip empty records, or with a colon |
|
57 |
|
if (.CS | contains(":") or (length == 0)) then |
|
58 |
|
empty |
|
59 |
|
else |
|
60 |
|
# cast as number and handle a few remaining badly formated |
|
61 |
|
# records like "0.2.95" |
|
62 |
|
(try (.CS | tonumber) catch empty) |
|
63 |
|
end |
|
64 |
|
), |
|
65 |
|
# any digit in the signal class indicates quality |
|
66 |
|
# it's only been used 121 times |
|
67 |
|
quality: ( .SC | capture("(?<q>\\d+)") | .q | tonumber ) |
|
68 |
|
#_source_string_sc: .SC, |
|
69 |
|
#class: ( |
68 |
70 |
#if ( .SC == "M") then "Mimic" else empty end |
#if ( .SC == "M") then "Mimic" else empty end |
69 |
71 |
# elif ( ($SC | contains("M")) or ($SC == "M")) then "Mimic" |
# elif ( ($SC | contains("M")) or ($SC == "M")) then "Mimic" |
70 |
72 |
#elif ( .SC | contains("V") or .SC == "V") then "Variant" |
#elif ( .SC | contains("V") or .SC == "V") then "Variant" |
71 |
73 |
#elif ( .SC | contains("D") or .SC == "D") then "Deletion" |
#elif ( .SC | contains("D") or .SC == "D") then "Deletion" |
72 |
74 |
#elif ( .SC | contains("U") or .SC == "U") then "Uncharacteristic" |
#elif ( .SC | contains("U") or .SC == "U") then "Uncharacteristic" |
73 |
75 |
#elif ( .SC | contains("C") or .SC == "C") then "Calf" |
#elif ( .SC | contains("C") or .SC == "C") then "Calf" |
74 |
|
#) |
|
75 |
|
}, |
|
76 |
|
sound: { |
|
|
76 |
|
#) |
|
77 |
|
}, |
|
78 |
|
sound: { |
77 |
79 |
# plain sample rate as number, however not normalized in digit length |
# plain sample rate as number, however not normalized in digit length |
78 |
80 |
# remove dot or colon, and ignore empty strings |
# remove dot or colon, and ignore empty strings |
79 |
81 |
# a bit difficult to tell what is hz and what khz |
# a bit difficult to tell what is hz and what khz |
|
97 |
99 |
{_source_string: .NC}, |
{_source_string: .NC}, |
98 |
100 |
( |
( |
99 |
101 |
.NC | |
.NC | |
100 |
|
capture("^(?<recorded>\\d)(?<multiplexed>\\d)(?<side>[A-L]$)") | |
|
|
102 |
|
capture("^(?<r>\\d)(?<m>\\d)(?<s>[A-L]$)") | |
101 |
103 |
{ |
{ |
102 |
|
recorded: .recorded | tonumber, |
|
103 |
|
multiplexed: .multiplexed | tonumber, |
|
104 |
|
side: .side |
|
|
104 |
|
recorded: .r | tonumber, |
|
105 |
|
multiplexed: .m | tonumber, |
|
106 |
|
side: .s |
105 |
107 |
} |
} |
106 |
108 |
) |
) |
107 |
109 |
] | add |
] | add |
108 |
|
} |
|
109 |
|
} |
|
110 |
|
|
|
|
110 |
|
} |
|
111 |
|
} |