annotate moefetch.sh @ 157:ffca564daefa

fix: I suck at comm
author edhoprima
date Sat, 25 Apr 2009 16:26:00 +0000
parents d3b002fd944e
children cba73f6a96bb
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
148
edhoprima
parents:
diff changeset
1 #!/bin/sh
edhoprima
parents:
diff changeset
2
edhoprima
parents:
diff changeset
3 # Copyright (c) 2009, edogawaconan <me@myconan.net>
edhoprima
parents:
diff changeset
4 #
edhoprima
parents:
diff changeset
5 # Permission to use, copy, modify, and/or distribute this software for any
edhoprima
parents:
diff changeset
6 # purpose with or without fee is hereby granted, provided that the above
edhoprima
parents:
diff changeset
7 # copyright notice and this permission notice appear in all copies.
edhoprima
parents:
diff changeset
8 #
edhoprima
parents:
diff changeset
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
edhoprima
parents:
diff changeset
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
edhoprima
parents:
diff changeset
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
edhoprima
parents:
diff changeset
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
edhoprima
parents:
diff changeset
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
edhoprima
parents:
diff changeset
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
edhoprima
parents:
diff changeset
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
edhoprima
parents:
diff changeset
16
edhoprima
parents:
diff changeset
17 # Version 0.1-alpha1. Lots of bugs here. Use with care
edhoprima
parents:
diff changeset
18 # USE WITH CARE
edhoprima
parents:
diff changeset
19
edhoprima
parents:
diff changeset
20 # what it does: fetch every picture that has the specified tags.
edhoprima
parents:
diff changeset
21
edhoprima
parents:
diff changeset
22 # requirement: wget, libxslt, md5sum (or md5)
edhoprima
parents:
diff changeset
23
edhoprima
parents:
diff changeset
24 # configs
edhoprima
parents:
diff changeset
25 # program additional paths for: cut, sed, wc, md5(sum), wget, xsltproc
edhoprima
parents:
diff changeset
26 extrapath=
edhoprima
parents:
diff changeset
27
edhoprima
parents:
diff changeset
28 # md5 calculation, expected output: <32digit md5><space(s)><filename>
edhoprima
parents:
diff changeset
29 # gnu: "md5sum", bsd: "md5 -r"
edhoprima
parents:
diff changeset
30 md5="md5 -r"
edhoprima
parents:
diff changeset
31
edhoprima
parents:
diff changeset
32 # server address. Danbooru only! I do not take responsibility of stupidity.
edhoprima
parents:
diff changeset
33 site="moe.imouto.org"
edhoprima
parents:
diff changeset
34
edhoprima
parents:
diff changeset
35 # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one.
edhoprima
parents:
diff changeset
36 # Structure is $basedir/<tags>
edhoprima
parents:
diff changeset
37 basedir="/home/ifail/test"
edhoprima
parents:
diff changeset
38
edhoprima
parents:
diff changeset
39 # not user modifiable from here
edhoprima
parents:
diff changeset
40
edhoprima
parents:
diff changeset
41 GENERATE()
edhoprima
parents:
diff changeset
42 {
edhoprima
parents:
diff changeset
43 echo
edhoprima
parents:
diff changeset
44 echo "Fetching xml file"
edhoprima
parents:
diff changeset
45 wget "http://$site/post/index.xml?tags=$tags&offset=0&limit=100000" -O "$outdir-xml" -e continue=off
edhoprima
parents:
diff changeset
46 echo "Processing XML file..."
edhoprima
parents:
diff changeset
47 # xslt evilry
edhoprima
parents:
diff changeset
48 xsltproc - "$outdir-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([jp][pn]g\)/\1\2.\3/g' | grep ^http > "$outdir-list"
edhoprima
parents:
diff changeset
49 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
edhoprima
parents:
diff changeset
50 <xsl:output method="xml" indent="yes"/>
edhoprima
parents:
diff changeset
51 <xsl:template match="post">
edhoprima
parents:
diff changeset
52 <xsl:value-of select="@file_url" />
edhoprima
parents:
diff changeset
53 </xsl:template>
edhoprima
parents:
diff changeset
54 </xsl:stylesheet>
edhoprima
parents:
diff changeset
55 EOF
edhoprima
parents:
diff changeset
56 echo "`echo \`wc -l < \"$outdir-list\" \`` file(s) available on server"
edhoprima
parents:
diff changeset
57 #output file: $outdir-list
edhoprima
parents:
diff changeset
58 }
edhoprima
parents:
diff changeset
59
edhoprima
parents:
diff changeset
60 CHECK()
edhoprima
parents:
diff changeset
61 {
edhoprima
parents:
diff changeset
62 if [ "$ISNEW" -ne 1 ];then
edhoprima
parents:
diff changeset
63 echo "Checking for errors..."
edhoprima
parents:
diff changeset
64 # THE FILES
154
8bca9dc8c20d fix: echo sucks.
edhoprima
parents: 153
diff changeset
65 printf "" > "$outdir-error"
148
edhoprima
parents:
diff changeset
66 cd "../$outdir"
edhoprima
parents:
diff changeset
67 for file in `ls`
edhoprima
parents:
diff changeset
68 do
edhoprima
parents:
diff changeset
69 if [ `$md5 "$file" | cut -d ' ' -f1 -` != `echo "$file" | cut -d '.' -f1` ]
edhoprima
parents:
diff changeset
70 then
edhoprima
parents:
diff changeset
71 echo "$file" >> "../temp/$outdir-error"
edhoprima
parents:
diff changeset
72 echo "Error: $file"
edhoprima
parents:
diff changeset
73 fi
edhoprima
parents:
diff changeset
74 printf "."
edhoprima
parents:
diff changeset
75 done
edhoprima
parents:
diff changeset
76 echo
edhoprima
parents:
diff changeset
77 cd ../temp
153
7b07448e46f1 fix: when there is no error file
edhoprima
parents: 152
diff changeset
78 totalerr=`echo \`wc -l < "$outdir-error"\``
148
edhoprima
parents:
diff changeset
79 echo "$totalerr file(s) error"
edhoprima
parents:
diff changeset
80
edhoprima
parents:
diff changeset
81 echo "Generating list of new files..."
edhoprima
parents:
diff changeset
82 # THE FILES
edhoprima
parents:
diff changeset
83 #ls "../$outdir" | grep -vf "$outdir-error" > "$outdir-ok"
156
d3b002fd944e fix: my attempt at speeding up things failed. reverting back to trusty grep -vf
edhoprima
parents: 155
diff changeset
84 #
157
ffca564daefa fix: I suck at comm
edhoprima
parents: 156
diff changeset
85 ls "../$outdir" | comm -1 -3 "$outdir-error" - > "$outdir-ok"
156
d3b002fd944e fix: my attempt at speeding up things failed. reverting back to trusty grep -vf
edhoprima
parents: 155
diff changeset
86 cat "$outdir-list" | grep -vf "$outdir-ok" > "$outdir-newlist"
148
edhoprima
parents:
diff changeset
87 echo "`echo \`wc -l < \"$outdir-newlist\"\`` file(s) to be downloaded"
edhoprima
parents:
diff changeset
88 cd "../$outdir"
edhoprima
parents:
diff changeset
89
edhoprima
parents:
diff changeset
90 echo "Removing error files"
153
7b07448e46f1 fix: when there is no error file
edhoprima
parents: 152
diff changeset
91 if [ $totalerr -eq 0 ]; then
7b07448e46f1 fix: when there is no error file
edhoprima
parents: 152
diff changeset
92 echo "No error file. 0 file removed"
156
d3b002fd944e fix: my attempt at speeding up things failed. reverting back to trusty grep -vf
edhoprima
parents: 155
diff changeset
93 else
148
edhoprima
parents:
diff changeset
94 cat "../temp/$outdir-error" | xargs rm
156
d3b002fd944e fix: my attempt at speeding up things failed. reverting back to trusty grep -vf
edhoprima
parents: 155
diff changeset
95 echo "$totalerr file(s) removed"
148
edhoprima
parents:
diff changeset
96 fi
edhoprima
parents:
diff changeset
97 echo "`echo \`ls | wc -l\`` file(s) available locally"
155
78ef726d3845 fix: whoops. Forgot to move back to workdir after
edhoprima
parents: 154
diff changeset
98 cd ../temp
148
edhoprima
parents:
diff changeset
99 else
152
67df02877319 added quickfetch for skipping file checking
edhoprima
parents: 151
diff changeset
100 if [ $ISQUICK -eq 1 ]; then
67df02877319 added quickfetch for skipping file checking
edhoprima
parents: 151
diff changeset
101 echo "quick mode selected. Skipping check"
67df02877319 added quickfetch for skipping file checking
edhoprima
parents: 151
diff changeset
102 else
67df02877319 added quickfetch for skipping file checking
edhoprima
parents: 151
diff changeset
103 echo "Empty local repository"
67df02877319 added quickfetch for skipping file checking
edhoprima
parents: 151
diff changeset
104 fi
148
edhoprima
parents:
diff changeset
105 cat "$outdir-list" > "$outdir-newlist"
edhoprima
parents:
diff changeset
106 fi
edhoprima
parents:
diff changeset
107 }
edhoprima
parents:
diff changeset
108
edhoprima
parents:
diff changeset
109 FETCH()
edhoprima
parents:
diff changeset
110 {
155
78ef726d3845 fix: whoops. Forgot to move back to workdir after
edhoprima
parents: 154
diff changeset
111 if [ `echo \`wc -l < "$outdir-newlist"\`` -eq 0 ]; then
148
edhoprima
parents:
diff changeset
112 echo "No new file"
edhoprima
parents:
diff changeset
113 else
edhoprima
parents:
diff changeset
114 echo "Starting wget"
edhoprima
parents:
diff changeset
115 cd "../$outdir"
157
ffca564daefa fix: I suck at comm
edhoprima
parents: 156
diff changeset
116 wget -e continue=on -bi "../temp/$outdir-newlist" -o "../temp/$outdir.log"
148
edhoprima
parents:
diff changeset
117 fi
edhoprima
parents:
diff changeset
118 }
edhoprima
parents:
diff changeset
119
edhoprima
parents:
diff changeset
120
edhoprima
parents:
diff changeset
121 # path initialization
edhoprima
parents:
diff changeset
122 export PATH=${PATH}:$extrapath
edhoprima
parents:
diff changeset
123
edhoprima
parents:
diff changeset
124 # verify all programs required do indeed exist
edhoprima
parents:
diff changeset
125 # basic tools
edhoprima
parents:
diff changeset
126 commands="cut sed wc wget xsltproc xargs rm mkdir chown comm"
edhoprima
parents:
diff changeset
127 cmderr=" "
edhoprima
parents:
diff changeset
128 for cmd in $commands
edhoprima
parents:
diff changeset
129 do
edhoprima
parents:
diff changeset
130 command -v "$cmd" >/dev/null || cmderr="$cmderr $cmd"
edhoprima
parents:
diff changeset
131 done
edhoprima
parents:
diff changeset
132 if [ x"$cmderr" != x" " ]; then
edhoprima
parents:
diff changeset
133 echo "$cmderr doesn't exist in $PATH"
edhoprima
parents:
diff changeset
134 exit 1
edhoprima
parents:
diff changeset
135 fi
edhoprima
parents:
diff changeset
136 #md5
edhoprima
parents:
diff changeset
137 md5base=`echo $md5 | cut -d ' ' -f 1 -`
edhoprima
parents:
diff changeset
138 if [ x`command -v "$md5base" >/dev/null || echo x` != "x" ]; then
edhoprima
parents:
diff changeset
139 echo "$md5base doesn't exist in $PATH"
edhoprima
parents:
diff changeset
140 exit 1
edhoprima
parents:
diff changeset
141 fi
150
9338901a348a hahaha I broke it
edhoprima
parents: 149
diff changeset
142 mdtest=
151
3d6b2152f4a3 it didn't work on freebsd's md5
edhoprima
parents: 150
diff changeset
143 if [ `echo test | $md5 | cut -d ' ' -f 1 -` != "d8e8fca2dc0f896fd7cb4cb0031ba249" ]; then
148
edhoprima
parents:
diff changeset
144 echo "$md5 doesn't produce wanted output"
edhoprima
parents:
diff changeset
145 exit 1
edhoprima
parents:
diff changeset
146 fi
edhoprima
parents:
diff changeset
147
edhoprima
parents:
diff changeset
148
edhoprima
parents:
diff changeset
149 # all green (part 1)! let's go (until we check the tag)
edhoprima
parents:
diff changeset
150
edhoprima
parents:
diff changeset
151 # initialization
edhoprima
parents:
diff changeset
152 # are we really doing it?
152
67df02877319 added quickfetch for skipping file checking
edhoprima
parents: 151
diff changeset
153 HELP="Usage: moefetch (quick)fetch|status <tags>"
67df02877319 added quickfetch for skipping file checking
edhoprima
parents: 151
diff changeset
154
148
edhoprima
parents:
diff changeset
155 if [ $# -lt 2 ]; then
152
67df02877319 added quickfetch for skipping file checking
edhoprima
parents: 151
diff changeset
156 echo "$HELP"
148
edhoprima
parents:
diff changeset
157 exit 1
edhoprima
parents:
diff changeset
158 fi
edhoprima
parents:
diff changeset
159
edhoprima
parents:
diff changeset
160 case "$1" in
152
67df02877319 added quickfetch for skipping file checking
edhoprima
parents: 151
diff changeset
161 status|fetch|quickfetch)
148
edhoprima
parents:
diff changeset
162 echo "Starting..."
edhoprima
parents:
diff changeset
163 ;;
edhoprima
parents:
diff changeset
164 *)
152
67df02877319 added quickfetch for skipping file checking
edhoprima
parents: 151
diff changeset
165 echo "$HELP"
148
edhoprima
parents:
diff changeset
166 exit 1
edhoprima
parents:
diff changeset
167 ;;
edhoprima
parents:
diff changeset
168 esac
edhoprima
parents:
diff changeset
169
edhoprima
parents:
diff changeset
170 # we did it indeed
edhoprima
parents:
diff changeset
171 # get started
edhoprima
parents:
diff changeset
172
edhoprima
parents:
diff changeset
173 # do we own the files
edhoprima
parents:
diff changeset
174 tags=`echo "$@" | cut -d ' ' -f 2- -`
edhoprima
parents:
diff changeset
175 echo "Tags: $tags"
edhoprima
parents:
diff changeset
176 # slash do not want
edhoprima
parents:
diff changeset
177 outdir=`echo "$tags" | sed -e 's/\//_/g'`
152
67df02877319 added quickfetch for skipping file checking
edhoprima
parents: 151
diff changeset
178 ISNEW=0
148
edhoprima
parents:
diff changeset
179 if [ -O "$basedir" ]; then
edhoprima
parents:
diff changeset
180 if [ ! -d "$basedir/$outdir" ]; then
edhoprima
parents:
diff changeset
181 ISNEW=1
edhoprima
parents:
diff changeset
182 mkdir "$basedir/$outdir"
edhoprima
parents:
diff changeset
183 fi
edhoprima
parents:
diff changeset
184 if [ ! -O "$basedir/$outdir" ]; then
edhoprima
parents:
diff changeset
185 echo "You don't own the $basedir/$outdir, applying globally writeable permission on it"
edhoprima
parents:
diff changeset
186 chmod -R u=rwX,g=rwX,o=rwX "$basedir/$outdir"
edhoprima
parents:
diff changeset
187 fi
edhoprima
parents:
diff changeset
188 if [ `echo \`ls "$basedir/$outdir" | wc -l\`` -eq 0 ]; then
edhoprima
parents:
diff changeset
189 ISNEW=1
edhoprima
parents:
diff changeset
190 fi
edhoprima
parents:
diff changeset
191 if [ ! -d "$basedir/temp" ]; then
edhoprima
parents:
diff changeset
192 mkdir "$basedir/temp"
edhoprima
parents:
diff changeset
193 fi
edhoprima
parents:
diff changeset
194 if [ ! -O "$basedir/temp" ]; then
edhoprima
parents:
diff changeset
195 echo "You don't own the $basedir/temp, applying globally writeable permission on it"
edhoprima
parents:
diff changeset
196 chmod -R u=rwX,g=rwX,o=rwX "$basedir/temp"
edhoprima
parents:
diff changeset
197 fi
edhoprima
parents:
diff changeset
198 else
edhoprima
parents:
diff changeset
199 echo "Fatal error: you don't own ${basedir}. Please fix ${basedir}. Stopping"
edhoprima
parents:
diff changeset
200 exit 1
edhoprima
parents:
diff changeset
201 fi
edhoprima
parents:
diff changeset
202 # let's move to workdir
edhoprima
parents:
diff changeset
203 cd "$basedir/temp"
edhoprima
parents:
diff changeset
204 touch "$outdir-error"
edhoprima
parents:
diff changeset
205 touch "$outdir-ok"
edhoprima
parents:
diff changeset
206 touch "$outdir-list"
edhoprima
parents:
diff changeset
207 touch "$outdir-newlist"
edhoprima
parents:
diff changeset
208 #
edhoprima
parents:
diff changeset
209
edhoprima
parents:
diff changeset
210 # let's do the job!
152
67df02877319 added quickfetch for skipping file checking
edhoprima
parents: 151
diff changeset
211 ISQUICK=0
148
edhoprima
parents:
diff changeset
212 case "$1" in
edhoprima
parents:
diff changeset
213 status)
edhoprima
parents:
diff changeset
214 GENERATE
edhoprima
parents:
diff changeset
215 CHECK
edhoprima
parents:
diff changeset
216 ;;
152
67df02877319 added quickfetch for skipping file checking
edhoprima
parents: 151
diff changeset
217 fetch)
148
edhoprima
parents:
diff changeset
218 GENERATE
edhoprima
parents:
diff changeset
219 CHECK
edhoprima
parents:
diff changeset
220 FETCH
edhoprima
parents:
diff changeset
221 ;;
152
67df02877319 added quickfetch for skipping file checking
edhoprima
parents: 151
diff changeset
222 quickfetch)
67df02877319 added quickfetch for skipping file checking
edhoprima
parents: 151
diff changeset
223 GENERATE
67df02877319 added quickfetch for skipping file checking
edhoprima
parents: 151
diff changeset
224 ISNEW=1
67df02877319 added quickfetch for skipping file checking
edhoprima
parents: 151
diff changeset
225 ISQUICK=1
67df02877319 added quickfetch for skipping file checking
edhoprima
parents: 151
diff changeset
226 CHECK
148
edhoprima
parents:
diff changeset
227 FETCH
edhoprima
parents:
diff changeset
228 esac