X-Git-Url: https://git.xandkar.net/?a=blobdiff_plain;f=bin%2Fwget-clone-web;fp=bin%2Fwget-clone-web;h=1f19c0edb8eb7e475c38e6c3aae97971a60a8902;hb=06ec33cbfa90c35e1c507bf54bba8a3b01b58f90;hp=0000000000000000000000000000000000000000;hpb=96c72196005dcd266d7561e88b9baef7e3e62522;p=khome.git diff --git a/bin/wget-clone-web b/bin/wget-clone-web new file mode 100755 index 0000000..1f19c0e --- /dev/null +++ b/bin/wget-clone-web @@ -0,0 +1,48 @@ +#! /bin/sh + +uri_and_extra_options=$@; + + +# Continue partially downloaded +OPT_CONTINUE='-c' + +OPT_WAIT='--wait 0.75' + +# Randomly select wait (above) in range from wait*0.5 to wait*1.5 +OPT_WAIT_RANDON='--random-wait' + +# --page-requisites "download all the files that are necessary to properly +# display a given HTML page." +OPT_PAGE_REQUISITES='-p' + +# --user-agent="" +#OPT_USER_AGENT="-U='Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0'" +OPT_USER_AGENT="-U='ELinks (0.12pre6; NetBSD; 800x600)'" + +# --convert-links +# After the download is complete, convert the links in the document to make +# them suitable for local viewing. This affects not only the visible +# hyperlinks, but any part of the document that links to external content, such +# as embedded images, links to style sheets, hyperlinks to non-HTML content, +# etc. +OPT_LINKS_LOCAL='-k' + +# --adjust-extension +# Ensure file extension matches file's MIME type +OPT_EXT_ADJUST='-E' + +# Disrespect robots.txt +OPT_ROBOTS_OFF='-e robots=off' + + +wget \ + -a wget.log \ + $OPT_CONTINUE \ + $OPT_WAIT \ + $OPT_WAIT_RANDON \ + $OPT_PAGE_REQUISITES \ + $OPT_LINKS_LOCAL \ + $OPT_EXT_ADJUST \ + "$OPT_USER_AGENT" \ + $OPT_ROBOTS_OFF \ + $uri_and_extra_options