comparison moefetch.sh @ 214:a6624fb9b317

major cleanup. tweaking. untested
author edhoprima@gmail.com <edhoprima@gmail.com>
date Thu, 02 Jul 2009 19:10:36 +0000
parents dd95cf01602c
children 710082ce6788
comparison
equal deleted inserted replaced
213:dd95cf01602c 214:a6624fb9b317
49 ### WILL BE FOR 0.3 49 ### WILL BE FOR 0.3
50 50
51 # useless welcome message. Also version 51 # useless welcome message. Also version
52 Msg_Welcome() { 52 Msg_Welcome() {
53 MOEFETCHVERSION="0.2.1" 53 MOEFETCHVERSION="0.2.1"
54 cat <<EOF 54 echo "moefetch ${MOEFETCHVERSION}
55 moefetch ${MOEFETCHVERSION} 55 Copyright (c) 2009 edogawaconan <me@myconan.net>"
56 Copyright (c) 2009 edogawaconan <me@myconan.net>
57
58 EOF
59 } 56 }
60 57
61 # fatal error handler 58 # fatal error handler
62 Err_Fatal() { 59 Err_Fatal() {
63 printf "\nFatal error: ${1}\n" 60 echo "
61 Fatal error: ${1}"
64 exit 1 62 exit 1
65 } 63 }
66 64
67 # help message 65 # help message
68 Err_Help() { 66 Err_Help() {
69 cat <<EOF 67 echo "moefetch.sh COMMAND [-s SITE_URL] TAGS
70 moefetch.sh COMMAND [-s SITE_URL] TAGS
71 68
72 COMMAND: 69 COMMAND:
73 (quick)fetch: do a complete update. Add prefix quick to skip file checking 70 (quick)fetch: do a complete update. Add prefix quick to skip file checking
74 check: get list of new files, clean up local folder and print total new files 71 check: get list of new files, clean up local folder and print total new files
75 72
76 -s SITE_URL: Specify URL of the Danbooru powered site you want to leech from. Default is ${DEFAULT_SITE} 73 -s SITE_URL: Specify URL of the Danbooru powered site you want to leech from. Default is ${DEFAULT_SITE}
77 74
78 TAGS: Tags you want to download. Separated by spaces. Tag name follows standard Danbooru tagging scheme 75 TAGS: Tags you want to download. Separated by spaces. Tag name follows standard Danbooru tagging scheme"
79
80 EOF
81 exit 2 76 exit 2
82 } 77 }
83 78
84 # generate link by transforming xml 79 # generate link by transforming xml
85 Generate_Link() { 80 Generate_Link() {
86 printf "\nFetching xml file\n" 81 echo "
87 TEMPNUM=1001 82 Fetching xml file"
83 TEMPNUM=1000
88 _i=1 84 _i=1
89 > "${TEMP_PREFIX}-list" 85 > "${TEMP_PREFIX}-list"
90 while [ "${TEMPNUM}" -ge 1000 ]; do 86 while [ "${TEMPNUM}" -ge 1000 ]; do
91 wget "http://${SITE}/post/index.xml?tags=${TAGS}&offset=0&limit=1000&page=${_i}" -O "${TEMP_PREFIX}-xml" -e continue=off 87 wget "http://${SITE}/post/index.xml?tags=${TAGS}&offset=0&limit=1000&page=${_i}" -O "${TEMP_PREFIX}-xml" -e continue=off
92 printf "Processing XML file..." 88 printf "Processing XML file... "
93 # xslt evilry 89 # xslt evilry
94 > "${TEMP_PREFIX}-templist" 90 > "${TEMP_PREFIX}-templist"
95 xsltproc - "${TEMP_PREFIX}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([^\.]*\)/\1\2.\3/g' | grep ^http > "${TEMP_PREFIX}-templist" 2>/dev/null 91 xsltproc - "${TEMP_PREFIX}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([^\.]*\)/\1\2.\3/g' | grep ^http > "${TEMP_PREFIX}-templist" 2>/dev/null
96 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> 92 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
97 <xsl:output method="xml" indent="yes"/> 93 <xsl:output method="xml" indent="yes"/>
101 </xsl:stylesheet> 97 </xsl:stylesheet>
102 EOF 98 EOF
103 TEMPNUM=$(echo $(wc -l < "${TEMP_PREFIX}-templist")) 99 TEMPNUM=$(echo $(wc -l < "${TEMP_PREFIX}-templist"))
104 _i=$((_i+1)) 100 _i=$((_i+1))
105 cat "${TEMP_PREFIX}-templist" >> "${TEMP_PREFIX}-list" 101 cat "${TEMP_PREFIX}-templist" >> "${TEMP_PREFIX}-list"
102 echo "${TEMPNUM} file(s) available"
106 done 103 done
107 NUMFILES=$(echo $(wc -l < "${TEMP_PREFIX}-list")) 104 NUMFILES=$(echo $(wc -l < "${TEMP_PREFIX}-list"))
105 echo "${NUMFILES} file(s) available on server"
108 [ "${NUMFILES}" -gt 0 ] || Err_Fatal "Error in processing list or no files can be found with specified tag(s) or site" 106 [ "${NUMFILES}" -gt 0 ] || Err_Fatal "Error in processing list or no files can be found with specified tag(s) or site"
109 echo " ${NUMFILES} file(s) available on server" 107 }
110 #output file: ${TARGET_DIR}-list 108
109 Is_NotMD5() {
110 echo "${*}" | sed -e 's/\([0-9a-f]\{32\}\..*\)//g'
111 } 111 }
112 112
113 Progress_Init() { 113 Progress_Init() {
114 _last="-" 114 _last="-"
115 printf "${_last}" 115 printf "${_last}"
177 # 177 #
178 } 178 }
179 179
180 # Do some cleanup 180 # Do some cleanup
181 Cleanup_Repository() { 181 Cleanup_Repository() {
182
183 # THE FILES
184
185 # current dir: ${BASE_DIR}/${SITE_DIR}/${TARGET_DIR} 182 # current dir: ${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}
186 printf "Cleaning up repository folder... " 183 printf "Cleaning up repository folder... "
187 Progress_Init 184 Progress_Init
188 TRASH_DIR=$(date -u "+${SITE_DIR}-${TARGET_DIR}-%Y%m%d-%H.%M") 185 TRASH_DIR=$(date -u "+${SITE_DIR}-${TARGET_DIR}-%Y%m%d-%H.%M")
186 TRASHES=
189 mkdir -p "${BASE_DIR}/trash/${TRASH_DIR}" || Err_Fatal "Unable to create trash folder" 187 mkdir -p "${BASE_DIR}/trash/${TRASH_DIR}" || Err_Fatal "Unable to create trash folder"
190 for TRASH in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"* 188 for TRASH in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"*
191 do 189 do
192 ISTRASH= 190 ISTRASH=
193 if [ -d "${TRASH}" ]; then 191 if [ -d "${TRASH}" ]; then
194 ISTRASH=1 192 ISTRASH=1
195 else 193 else
196 if [ "$(echo "${TRASH}" | sed -e "${SED_GET_FILENAME};${SED_IS_MD5_FILE}" | grep -v ^$)" ]; then 194 if [ "$(Is_NotMD5 "$(basename "${TRASH}")")" ]; then
197 ISTRASH=1 195 ISTRASH=1
198 else 196 else
199 [ "$(cat "${TEMP_PREFIX}-list" | sed -e "${SED_GET_FILENAME}" | grep $(echo "${TRASH}" | sed -e "${SED_GET_FILENAME}"))" ] || ISTRASH=1 197 [ "$(grep "$(basename "${TRASH}")" "${TEMP_PREFIX}-list")" ] || ISTRASH=1
200 fi 198 fi
201 fi 199 fi
202 if [ "${ISTRASH}" ]; then 200 if [ "${ISTRASH}" ]; then
203 mv -f "${TRASH}" "${BASE_DIR}/trash/${TRASH_DIR}" || Err_Fatal "Error deleting files" 201 mv -f "${TRASH}" "${BASE_DIR}/trash/${TRASH_DIR}" || Err_Fatal "Error deleting files"
204 printf "\bMoved $(echo "${TRASH}" | sed -e "${SED_GET_FILENAME}") to ${BASE_DIR}/trash/${TRASH_DIR}\n${_last}" 202 TRASHES="${TRASHES}
203 $(basename "${TRASH}")"
205 fi 204 fi
206 Progress_Anim 205 Progress_Anim
207 done 206 done
208 rmdir "${BASE_DIR}/trash/${TRASH_DIR}" 2>/dev/null 207 rmdir "${BASE_DIR}/trash/${TRASH_DIR}" 2>/dev/null
209 Progress_Done 208 Progress_Done
209 echo "These files have been moved to ${BASE_DIR}/trash/${TRASH_DIR}:
210 ${TRASHES}"
210 } 211 }
211 212
212 # check files correctness 213 # check files correctness
213 Check_Files() { 214 Check_Files() {
214 if [ ! "${ISNEW}" ]; then 215 if test ! -n "${ISNEW}"; then
215 [ "${NOCLEAN}" ] || Cleanup_Repository 216 test -z "${NOCLEAN}" && Cleanup_Repository
216 printf "Checking for errors... " 217 printf "Checking for errors... "
217 Progress_Init 218 Progress_Init
219 files_error="These files do not match its md5:"
220 files_notdanbooru="These files are not checked:"
221 has_err_filename=
222 has_err_md5=
218 > "${TEMP_PREFIX}-error" 223 > "${TEMP_PREFIX}-error"
219 for FILE in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"* 224 > "${TEMP_PREFIX}-ok"
225 for file in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"*
220 do 226 do
221 if [ "$(echo "${FILE}" | sed -e "${SED_GET_FILENAME};${SED_IS_MD5_FILE}" | grep -v ^$)" ]; then 227 if test -n "$(Is_NotMD5 "$(basename "${file}")")" || test -d "${file}"; then
222 printf "\bNot a valid danbooru file: $(echo ${FILE} | sed -e "${SED_GET_FILENAME}")\n${_last}" 228 files_notdanbooru="${files_notdanbooru}
229 $(basename "${file}")"
230 has_err_filename=1
223 else 231 else
224 if [ "$(${MD5} "${FILE}" | cut -d ' ' -f1 -)" != "$(echo "${FILE}" | sed -e "${SED_GET_FILENAME}" | cut -d '.' -f1)" ] 232 if test "$(${MD5} "${file}" | cut -d ' ' -f1 -)" = "$(basename "${file}" | cut -d '.' -f1)"; then
225 then 233 echo "$(basename "${file}")" >> "${TEMP_PREFIX}-ok"
226 echo "${FILE}" >> "${TEMP_PREFIX}-error" 234 else
227 printf "\bError: $(echo "${FILE}" | sed -e "${SED_GET_FILENAME}")\n${_last}" 235 rm "${file}" || Err_Fatal "Error removing ${file}"
236 echo "$(basename "${file}")" >> "${TEMP_PREFIX}-error"
237 files_error="${files_error}
238 $(basename "${file}")"
239 has_err_md5=1
228 fi 240 fi
229 Progress_Anim
230 fi 241 fi
242 Progress_Anim
231 done 243 done
232 Progress_Done 244 Progress_Done
233 TOTAL_ERROR=$(echo $(wc -l < "${TEMP_PREFIX}-error")) 245 if test ! -n "${has_err_md5}" && test ! -n "${has_err_filename}"; then
234 if [ "${TOTAL_ERROR}" -eq 0 ]; then
235 echo "All files OK" 246 echo "All files OK"
236 else 247 else
237 printf "${TOTAL_ERROR} file(s) broken: removing..." 248 if test ! -n "${has_err_md5}"; then
238 cat "${TEMP_PREFIX}-error" | xargs rm 249 echo "${files_error}"
239 echo " ${TOTAL_ERROR} file(s) removed" 250 echo "$(echo $(wc -l < "${TEMP_PREFIX}-error")) file(s) removed"
240 fi 251 fi
241 echo "$(Count_Files "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}") file(s) available locally" 252 test -n "${has_err_filename}" && echo "${files_notdanbooru}"
253 fi
254 echo "$(echo $(wc -l < "${TEMP_PREFIX}-ok")) file(s) available locally"
242 255
243 printf "Generating list of new files... " 256 printf "Generating list of new files... "
244 Progress_Init 257 Progress_Init
245 # THE FILES
246 #ls "../${TARGET_DIR}" | grep -vf "${TARGET_DIR}-error" > "${TARGET_DIR}-ok"
247 #
248 find "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" | comm -1 -3 "${TEMP_PREFIX}-error" - | sed -e "${SED_GET_FILENAME}" > "${TEMP_PREFIX}-ok"
249 cat "${TEMP_PREFIX}-list" > "${TEMP_PREFIX}-templist" 258 cat "${TEMP_PREFIX}-list" > "${TEMP_PREFIX}-templist"
250 while read -r IS_OK; do 259 while read -r is_ok; do
251 cat "${TEMP_PREFIX}-templist" | grep -v "${IS_OK}" > "${TEMP_PREFIX}-newlist" 260 grep -v "${is_ok}" "${TEMP_PREFIX}-templist" > "${TEMP_PREFIX}-newlist"
252 cat "${TEMP_PREFIX}-newlist" > "${TEMP_PREFIX}-templist" 261 cat "${TEMP_PREFIX}-newlist" > "${TEMP_PREFIX}-templist"
253 Progress_Anim 262 Progress_Anim
254 done < "${TEMP_PREFIX}-ok" 263 done < "${TEMP_PREFIX}-ok"
255 Progress_Done 264 Progress_Done
256 #cat "${TEMP_PREFIX}-list" | egrep -vf "${TEMP_PREFIX}-ok" > "${TEMP_PREFIX}-newlist" 265 echo "$(echo $(wc -l < "${TEMP_PREFIX}-newlist")) file(s) to be downloaded"
257 echo "$(echo $(wc -l < "${TEMP_PREFIX}-newlist")) file(s) to be downloaded"
258
259 else 266 else
260 if [ "${ISQUICK}" ]; then 267 if test -n "${ISQUICK}"; then
261 echo "Quick mode selected. Skipping check" 268 echo "Quick mode selected. Skipping check"
262 else 269 else
263 echo "Empty local repository" 270 echo "Empty local repository"
264 fi 271 fi
265 cat "${TEMP_PREFIX}-list" > "${TEMP_PREFIX}-newlist" 272 cat "${TEMP_PREFIX}-list" > "${TEMP_PREFIX}-newlist"