/transform.sh (93d9e62fb6f1e4b2444fb3c8ded4ea6a856c7c13) (1336 bytes) (mode 100755) (type blob)
#!/bin/bash
set -e # abort on any errors
# Transform HTML metadata from source site into JSON
# for xpath
XIDEL='xidel -s --input-format=html --output-format=json-wrapped'
# select all rows from the 2nd table element
XPATH_ENTRY='/html/body/table[2]/tbody/tr/td'
# with the xpath xidel returns a single JSON array with all table keys and values
# [["RN:","99002005","CU:","0 B2:30 1:55.149","NC:","11A","SR:","3400",... ]
# chunk the array into pairs of two and combine into a JSON object with key: value
#{
# "RN:": "99002005",
# "CU:": "0 B2:30 1:55.149",
# "NC:": "11A",
# "SR:": "3400",
# "CS:": "3.388",
# ...
#}
# The jq filter explained
# 1. assign the whole array to $row
# 2. create a range with a step of 2 over the lenght of the array, 0,2,4,...
# 3. create a object and use the range as index for the $row elements
# 3.5 remove right most colon from key
# 4. combine the list of objects into a single object with "add"
JQ_ARR2OBJ='[ .[] as $row | range(0; $row|length; 2) | {( $row[.] | rtrimstr(":")): ($row[.+1]) } ] | add'
while read RN
do
$XIDEL --xpath "$XPATH_ENTRY" "raw/rn/metaData.cfm?RN=$RN" | jq "$JQ_ARR2OBJ" > "data/rn/$RN.json"
done < data/retrieval.numbers
# transform all sound records with jq, this is where the magic happens
jq -r -f transform.jq data/rn/*json > data/transformed.json
Mode |
Type |
Size |
Ref |
File |
100644 |
blob |
11 |
dea9097138acee79ce13ef95ecaed594eea1fc34 |
.gitignore |
100644 |
blob |
2019 |
fa6fa76919a7cf93fba42f24e05ce897839c8bd2 |
DATA.md |
100755 |
blob |
1964 |
77e9f5c9baaa1276ae068eeb31f51d0fe7ca4893 |
GeoJSON.jq |
100644 |
blob |
864 |
e0082817c39b573df78e0ed2d3e2d4cfc9255036 |
README.md |
100644 |
blob |
865 |
6ac29799fea3cd2dd8c0e8116a12e6da93809572 |
TODO.md |
100755 |
blob |
1509 |
d154c6171b793c0b32b8f3020b6703ca4fdcc1ee |
download.sh |
100644 |
blob |
218 |
9100d4eb109a354733264a3b989d0de699db3c9c |
index.jq |
100644 |
blob |
4648 |
8029c7e3c56f4c61e38ec0abc8135bc1a063071b |
index.mapping.json |
100755 |
blob |
482 |
7abb8bcf9a49d7c849ab50538e47c9033a921f28 |
index.sh |
040000 |
tree |
- |
60e19bf3f6e7f2fba709ae362e565cf2df36ac29 |
snd |
040000 |
tree |
- |
6e02959705bd738b6776f95e9db4ad6e7abdbcd1 |
srv |
100644 |
blob |
8614 |
475fb0324d694a9e90c19017b971e5dd5b23aa15 |
transform.jq |
100755 |
blob |
1336 |
93d9e62fb6f1e4b2444fb3c8ded4ea6a856c7c13 |
transform.sh |
040000 |
tree |
- |
686320f33b64603b585154cc9653b1429c917aeb |
webroot |
Hints:
Before first commit, do not forget to setup your git environment:
git config --global user.name "your_name_here"
git config --global user.email "your@email_here"
Clone this repository using HTTP(S):
git clone https://rocketgit.com/user/dleucas/wmmsdb
Clone this repository using ssh (do not forget to upload a key first):
git clone ssh://rocketgit@ssh.rocketgit.com/user/dleucas/wmmsdb
Clone this repository using git:
git clone git://git.rocketgit.com/user/dleucas/wmmsdb
You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a
merge request:
... clone the repository ...
... make some changes and some commits ...
git push origin main