📄 heritrix
字号:
#!/usr/bin/env sh#### This script launches the heritrix crawler.#### Optional environment variables#### JAVA_HOME Point at a JDK install to use.## ## HERITRIX_HOME Pointer to your heritrix install. If not present, we ## make an educated guess based of position relative to this## script.#### HERITRIX_OUT Pathname to the Heritrix log file written when run in## daemon mode.## Default setting is $HERITRIX_HOME/heritrix_out.log#### JAVA_OPTS Java runtime options. Default setting is '-Xmx256m'.#### FOREGROUND Set to any value -- e.g. 'true' -- if you want to run ## heritrix in foreground (Used by build system when it runs## selftest to see if completed successfully or not).#### JMX_OPTS Default is to startup the JVM JMX administration ## on port 8849 if the JVM is SUN JVM 1.5. This allows JMX## administration of Heritrix. If the JVM is other than the## SUN JDK 1.5, the arguments are ignored. If you do not want## to start the JVM JXM administration server on the SUN JDK## 1.5, set this variable to empty string.#### JMX_PORT Port you'd like the JVM JMX administration server to run## on. Default is 8849.#### JMX_OFF Set to a non-empty string to disable JMX (and JMX setup of## password file, etc.)### Resolve links - $0 may be a softlinkPRG="$0"while [ -h "$PRG" ]; do ls=`ls -ld "$PRG"` link=`expr "$ls" : '.*-> \(.*\)$'` if expr "$link" : '.*/.*' > /dev/null; then PRG="$link" else PRG=`dirname "$PRG"`/"$link" fidonePRGDIR=`dirname "$PRG"`# Read local heritrix properties if any.if [ -f $HOME/.heritrixrc ]then . $HOME/.heritrixrcfi# Set HERITRIX_HOME.if [ -z "$HERITRIX_HOME" ]then HERITRIX_HOME=`cd "$PRGDIR/.." ; pwd`fi# Find JAVA_HOME.if [ -z "$JAVA_HOME" ]then JAVA=`which java` if [ -z "$JAVA" ] then echo "Cannot find JAVA. Please set JAVA_HOME or your PATH." exit 1 fi JAVA_BINDIR=`dirname $JAVA` JAVA_HOME=$JAVA_BINDIR/..fiif [ -z "$JAVACMD" ] then # It may be defined in env - including flags!! # See '[ 1482761 ] BDB Adler32 gc-lock OOME risk' for why we include the # 'je.disable.java.adler32'. JAVACMD="$JAVA_HOME/bin/java -Dje.disable.java.adler32=true"fi# Ignore previous classpath. Build one that contains heritrix jar and content# of the lib directory into the variable CP.for jar in `ls $HERITRIX_HOME/lib/*.jar $HERITRIX_HOME/*.jar`do CP=${CP}:${jar}done# cygwin path translationif expr `uname` : 'CYGWIN*' > /dev/null; then CP=`cygpath -p -w "$CP"` HERITRIX_HOME=`cygpath -p -w "$HERITRIX_HOME"`fi# Make sure of java opts.if [ -z "$JAVA_OPTS" ]then JAVA_OPTS=" -Xmx256m"fiif [ -z "${JMX_OFF}" ]then if [ -z "${JMX_PORT}" ] then JMX_PORT=8849 fi if [ -z "$JMX_OPTS" ] then JMX_OPTS="-Dcom.sun.management.jmxremote.port=${JMX_PORT} \ -Dcom.sun.management.jmxremote.ssl=false \ -Dcom.sun.management.jmxremote.password.file=${HERITRIX_HOME}/jmxremote.password" # Copy into place a jmxremote password file that uses the heritrix password # interpolated (First need to find the current password if one supplied on # command-line, else use whats in heritrix.properties as default). # Need to make it so its only readable by user else jconsole won't use it. JMX_PASSWORD=`echo "$@" |sed -n -e 's/.*--admin=[^:]*:\([^ ]*\).*/\1/p' \ -e 's/.*-a *[^:]*:\([^ ]*\).*/\1/p'` if [ -z "$JMX_PASSWORD" ] then JMX_PASSWORD=`sed -n -e 's/heritrix.cmdline.admin[ ]*=[^:]*:\(.*\)/\1/p' \ ${HERITRIX_HOME}/conf/heritrix.properties` fi JMX_PWORD_FILE="${HERITRIX_HOME}/jmxremote.password" if [ -f "${JMX_PWORD_FILE}" ] then rm -f "${JMX_PWORD_FILE}" fi sed -e "s/@PASSWORD@/${JMX_PASSWORD}/" \ "${HERITRIX_HOME}/conf/jmxremote.password.template" > "${JMX_PWORD_FILE}" chmod 600 "${JMX_PWORD_FILE}" fifi# Main heritrix class.if [ -z "$CLASS_MAIN" ]then CLASS_MAIN='org.archive.crawler.Heritrix'fi# heritrix_dmesg.log contains startup output from the crawler main class. # As soon as content appears in this log, this shell script prints the # successful (or failed) startup content and moves off waiting on heritrix# startup. This technique is done so we can show on the console startup # messages emitted by java subsequent to the redirect of stdout and stderr.startMessage="${HERITRIX_HOME}/heritrix_dmesg.log"# Remove any file that may have been left over from previous starts.if [ -f $startMessage ]then rm -f $startmessagefi# Run heritrix as daemon. Redirect stdout and stderr to a file.# Print start message with date, java version, java opts, ulimit, and uname.if [ -z "$HERITRIX_OUT" ]then HERITRIX_OUT=${HERITRIX_HOME}/heritrix_out.logfistdouterrlog=${HERITRIX_OUT}echo "`date` Starting heritrix" >> $stdouterrloguname -a >> $stdouterrlog 2>&1${JAVACMD} ${JAVA_OPTS} -version >> $stdouterrlog 2>&1echo "JAVA_OPTS=${JAVA_OPTS}" >> $stdouterrlogulimit -a >> $stdouterrlog 2>&1# If FOREGROUND is set, run heritrix in foreground.if [ -n "$FOREGROUND" ]then CLASSPATH=${CP} $JAVACMD -Dheritrix.home=${HERITRIX_HOME} \ -Djava.protocol.handler.pkgs=org.archive.net \ -Dheritrix.out=${HERITRIX_OUT} ${JAVA_OPTS} ${JMX_OPTS} \ $CLASS_MAIN $@else CLASSPATH=${CP} nohup $JAVACMD -Dheritrix.home=${HERITRIX_HOME} \ -Djava.protocol.handler.pkgs=org.archive.net \ -Dheritrix.out=${HERITRIX_OUT} ${JAVA_OPTS} ${JMX_OPTS} \ $CLASS_MAIN $@ >> ${stdouterrlog} 2>&1 & # Wait for content in the heritrix_dmesg.log file. echo -n "`date` Starting heritrix" while true do sleep 1 if [ -s $startMessage ] then echo cat $startMessage rm -f $startMessage break fi echo -n '.' donefi
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -