diff --git a/download_dataset.sh b/download_dataset.sh index 7cb674b046bbbc7c64cc13ce7bbe49a69d751edd..d47ad3c66bc8bb1b18c82cc980aff51dadbdec7e 100755 --- a/download_dataset.sh +++ b/download_dataset.sh @@ -1,5 +1,5 @@ mkdir -p files/dataset -wget --header="Host: storage.googleapis.com" --header="User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" --header="Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,/;q=0.8,application/signed-exchange;v=b3;q=0.7" --header="Accept-Language: fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7,zh-TW;q=0.6,zh-CN;q=0.5,zh;q=0.4" --header="Referer: https://www.kaggle.com/" "https://storage.googleapis.com/kaggle-data-sets/3384322/6207733/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20240519%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240519T145233Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=1ac8f9216a239f62f3aa19666ce2b09c188d1d34d5199cf254a3677292e1b893eb10d0e2280baf0cbfb1f21d38a2b99f55e3e080beaa4a376d07326750503e15f35e123e2efd21c2c300a82c5bc06c787528bbe5e0d6b7be5a31bc0e6fb458b9a59456233fb852c658827d1dd547ca683890de508dd88940526568357bdd28611409ed5db0e479abf7b6f98855cd942d0cebfae55d463f288640c594bce7e11cd9f460e941cec80a7713e7faa54e69e3e9c4e9e3cd87b11bc35aa74439f96f80c2d592c6a97519353ca099d62e7276bec190a99e9327aee45ab9531d86f8f6be65fb3931148dbd4342712849494a71adcfe0b4eb54051582393fe8a98ebf68bc" -c -O 'dataset.zip' +wget --header="Host: storage.googleapis.com" --header="User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" --header="Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7" --header="Accept-Language: fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7,zh-TW;q=0.6,zh-CN;q=0.5,zh;q=0.4" --header="Referer: https://www.kaggle.com/" "https://storage.googleapis.com/kaggle-data-sets/3384322/6207733/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20240522%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240522T203759Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=453dbfb399d0714269c8ea5f7707f893443249dc297dd21a32d408dbfb38eb2bced6c0e361d1ab57b8ad8554ea2f813a96347a68892b3d3bd9de02e1a81c77822bd5130f0966133859b331f355a223fbf2325ddcfbbac9626de8ca6be001165a4508e4523d3c82c0625a635719e779ccb35fa574b9f647727c34f65d36e95a24d099a714c088a51ead5620fa695093d5c35380276284f5d56fd6e13df688865e1567622adcc8f2fc31c341b5c45763f056596bc119216b52ea086205c3745260237af2422144c636ee31a3d396ce8b8473223edd76b4d15d3c5fde1cb33287977620762439136892e45dd8dd86adc7c0947737c6f79eee364510483354984acb" -c -O 'dataset.zip' unzip dataset.zip -d files/dataset echo "WARNING: Fix csv files for Hive import with script ./files/scripts/replace_newlines.sh"