comparison moefetch.sh @ 203:94a585031e3b

speed?
author edhoprima@gmail.com <edhoprima@gmail.com>
date Mon, 29 Jun 2009 16:36:16 +0000
parents 3dbb5a6678f9
children fb02adf58c00
comparison
equal deleted inserted replaced
202:3dbb5a6678f9 203:94a585031e3b
49 ### - sanity checking 49 ### - sanity checking
50 ### WILL BE FOR 0.3 50 ### WILL BE FOR 0.3
51 51
52 # useless welcome message. Also version 52 # useless welcome message. Also version
53 Msg_Welcome() { 53 Msg_Welcome() {
54 MOEFETCHVERSION="0.2-beta1" 54 MOEFETCHVERSION="0.2-beta2"
55 cat <<EOF 55 cat <<EOF
56 moefetch ${MOEFETCHVERSION} 56 moefetch ${MOEFETCHVERSION}
57 Copyright (c) 2009 edogawaconan <me@myconan.net> 57 Copyright (c) 2009 edogawaconan <me@myconan.net>
58 58
59 EOF 59 EOF
87 printf "\nFetching xml file\n" 87 printf "\nFetching xml file\n"
88 wget "http://${SITE}/post/index.xml?tags=${TAGS}&offset=0&limit=100000" -O "${TEMP_PREFIX}-xml" -e continue=off 88 wget "http://${SITE}/post/index.xml?tags=${TAGS}&offset=0&limit=100000" -O "${TEMP_PREFIX}-xml" -e continue=off
89 printf "Processing XML file..." 89 printf "Processing XML file..."
90 # xslt evilry 90 # xslt evilry
91 > "${TEMP_PREFIX}-list" 91 > "${TEMP_PREFIX}-list"
92 xsltproc - "${TEMP_PREFIX}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([^\.]*\)/\1\2.\3/g' | egrep ^http > "${TEMP_PREFIX}-list" 2>/dev/null 92 xsltproc - "${TEMP_PREFIX}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([^\.]*\)/\1\2.\3/g' | grep ^http > "${TEMP_PREFIX}-list" 2>/dev/null
93 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> 93 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
94 <xsl:output method="xml" indent="yes"/> 94 <xsl:output method="xml" indent="yes"/>
95 <xsl:template match="post"> 95 <xsl:template match="post">
96 <xsl:value-of select="@file_url" /> 96 <xsl:value-of select="@file_url" />
97 </xsl:template> 97 </xsl:template>
125 *) Fatal_Err "No known md5 tool for this platform. Please specify manually" 125 *) Fatal_Err "No known md5 tool for this platform. Please specify manually"
126 esac 126 esac
127 fi 127 fi
128 MD5_COMMAND=$(echo ${MD5} | cut -d' ' -f1) 128 MD5_COMMAND=$(echo ${MD5} | cut -d' ' -f1)
129 # basic tools 129 # basic tools
130 COMMANDS="cut sed wc wget xsltproc xargs rm mkdir chown comm egrep grep date ${MD5_COMMAND}" 130 COMMANDS="cut sed wc wget xsltproc xargs rm mkdir chown comm grep date ${MD5_COMMAND}"
131 for COMMAND in ${COMMANDS} 131 for COMMAND in ${COMMANDS}
132 do 132 do
133 [ "$(command -v "${COMMAND}")" ] || Err_Fatal "${COMMAND} doesn't exist in ${PATH}" 133 [ "$(command -v "${COMMAND}")" ] || Err_Fatal "${COMMAND} doesn't exist in ${PATH}"
134 done 134 done
135 } 135 }
166 do 166 do
167 ISTRASH= 167 ISTRASH=
168 if [ -d "${TRASH}" ]; then 168 if [ -d "${TRASH}" ]; then
169 ISTRASH=1 169 ISTRASH=1
170 else 170 else
171 if [ "$(echo "${TRASH}" | sed -e "${SED_GET_FILENAME};${SED_IS_MD5_FILE}" | egrep -v ^$)" ]; then 171 if [ "$(echo "${TRASH}" | sed -e "${SED_GET_FILENAME};${SED_IS_MD5_FILE}" | grep -v ^$)" ]; then
172 ISTRASH=1 172 ISTRASH=1
173 else 173 else
174 [ "$(cat "${TEMP_PREFIX}-list" | sed -e "${SED_GET_FILENAME}" | grep $(echo "${TRASH}" | sed -e "${SED_GET_FILENAME}"))" ] || ISTRASH=1 174 [ "$(cat "${TEMP_PREFIX}-list" | sed -e "${SED_GET_FILENAME}" | grep $(echo "${TRASH}" | sed -e "${SED_GET_FILENAME}"))" ] || ISTRASH=1
175 fi 175 fi
176 fi 176 fi
185 185
186 # check files correctness 186 # check files correctness
187 Check_Files() { 187 Check_Files() {
188 if [ ! "${ISNEW}" ]; then 188 if [ ! "${ISNEW}" ]; then
189 [ "${NOCLEAN}" ] || Cleanup_Repository 189 [ "${NOCLEAN}" ] || Cleanup_Repository
190 echo "Checking for errors..." 190 printf "Checking for errors..."
191 > "${TEMP_PREFIX}-error" 191 > "${TEMP_PREFIX}-error"
192 for FILE in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"* 192 for FILE in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"*
193 do 193 do
194 if [ "$(echo "${FILE}" | sed -e "${SED_GET_FILENAME};${SED_IS_MD5_FILE}" | egrep -v ^$)" ]; then 194 if [ "$(echo "${FILE}" | sed -e "${SED_GET_FILENAME};${SED_IS_MD5_FILE}" | grep -v ^$)" ]; then
195 echo 195 printf "\nNot a valid danbooru file: $(echo ${FILE} | sed -e "${SED_GET_FILENAME}")\n"
196 echo "Not a valid danbooru file: $(echo ${FILE} | sed -e "${SED_GET_FILENAME}")"
197 else 196 else
198 if [ "$(${MD5} "${FILE}" | cut -d ' ' -f1 -)" != "$(echo "${FILE}" | sed -e "${SED_GET_FILENAME}" | cut -d '.' -f1)" ] 197 if [ "$(${MD5} "${FILE}" | cut -d ' ' -f1 -)" != "$(echo "${FILE}" | sed -e "${SED_GET_FILENAME}" | cut -d '.' -f1)" ]
199 then 198 then
200 echo "${FILE}" >> "${TEMP_PREFIX}-error" 199 echo "${FILE}" >> "${TEMP_PREFIX}-error"
201 echo 200 echo
202 echo "Error: $(echo "${FILE}" | sed -e "${SED_GET_FILENAME}")" 201 echo "Error: $(echo "${FILE}" | sed -e "${SED_GET_FILENAME}")"
203 fi 202 fi
204 printf "." 203 printf "."
205 fi 204 fi
206 done 205 done
207 echo 206 echo " done"
208 TOTAL_ERROR=$(echo $(wc -l < "${TEMP_PREFIX}-error")) 207 TOTAL_ERROR=$(echo $(wc -l < "${TEMP_PREFIX}-error"))
209 echo "${TOTAL_ERROR} file(s) error"
210 echo "Removing error files"
211 if [ "${TOTAL_ERROR}" -eq 0 ]; then 208 if [ "${TOTAL_ERROR}" -eq 0 ]; then
212 echo "No error file. 0 file removed" 209 echo "All files OK"
213 else 210 else
211 printf "${TOTAL_ERROR} file(s) broken: removing..."
214 cat "${TEMP_PREFIX}-error" | xargs rm 212 cat "${TEMP_PREFIX}-error" | xargs rm
215 echo "${TOTAL_ERROR} file(s) removed" 213 echo " ${TOTAL_ERROR} file(s) removed"
216 fi 214 fi
217 echo "$(Count_Files "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}") file(s) available locally" 215 echo "$(Count_Files "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}") file(s) available locally"
218 216
219 echo "Generating list of new files..." 217 printf "Generating list of new files..."
220 # THE FILES 218 # THE FILES
221 #ls "../${TARGET_DIR}" | grep -vf "${TARGET_DIR}-error" > "${TARGET_DIR}-ok" 219 #ls "../${TARGET_DIR}" | grep -vf "${TARGET_DIR}-error" > "${TARGET_DIR}-ok"
222 # 220 #
223 find "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" | comm -1 -3 "${TEMP_PREFIX}-error" - | sed -e "${SED_GET_FILENAME}" > "${TEMP_PREFIX}-ok" 221 find "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" | comm -1 -3 "${TEMP_PREFIX}-error" - | sed -e "${SED_GET_FILENAME}" > "${TEMP_PREFIX}-ok"
224 cat "${TEMP_PREFIX}-list" > "${TEMP_PREFIX}-templist" 222 cat "${TEMP_PREFIX}-list" > "${TEMP_PREFIX}-templist"
225 #while read -r IS_OK; do 223 while read -r IS_OK; do
226 # cat "${TEMP_PREFIX}-templist" | grep -v "${IS_OK}" > "${TEMP_PREFIX}-newlist" 224 cat "${TEMP_PREFIX}-templist" | grep -v "${IS_OK}" > "${TEMP_PREFIX}-newlist"
227 # cat "${TEMP_PREFIX}-newlist" > "${TEMP_PREFIX}-templist" 225 cat "${TEMP_PREFIX}-newlist" > "${TEMP_PREFIX}-templist"
228 #done < "${TEMP_PREFIX}-ok" 226 printf "."
229 cat "${TEMP_PREFIX}-list" | egrep -vf "${TEMP_PREFIX}-ok" > "${TEMP_PREFIX}-newlist" 227 done < "${TEMP_PREFIX}-ok"
228 #cat "${TEMP_PREFIX}-list" | egrep -vf "${TEMP_PREFIX}-ok" > "${TEMP_PREFIX}-newlist"
229 echo " done"
230 echo "$(echo $(wc -l < "${TEMP_PREFIX}-newlist")) file(s) to be downloaded" 230 echo "$(echo $(wc -l < "${TEMP_PREFIX}-newlist")) file(s) to be downloaded"
231 231
232 else 232 else
233 if [ "${ISQUICK}" ]; then 233 if [ "${ISQUICK}" ]; then
234 echo "quick mode selected. Skipping check" 234 echo "quick mode selected. Skipping check"
242 # start downloading the images 242 # start downloading the images
243 Fetch_Images() { 243 Fetch_Images() {
244 if [ "$(echo $(wc -l < "${TEMP_PREFIX}-newlist"))" -eq 0 ]; then 244 if [ "$(echo $(wc -l < "${TEMP_PREFIX}-newlist"))" -eq 0 ]; then
245 echo "No new file" 245 echo "No new file"
246 else 246 else
247 echo "Starting wget" 247 printf "Starting wget..."
248 cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" 248 cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}"
249 wget -e continue=on -bi "${TEMP_PREFIX}-newlist" -o "${TEMP_PREFIX}.log" 249 wget -e continue=on -bi "${TEMP_PREFIX}-newlist" -o "${TEMP_PREFIX}.log"
250 fi 250 fi
251 } 251 }
252 252