\#!/bin/bash

base\_path="https://testerhome.com/"
user\_path="ycwdaaaa/topics?page="
rm suffix*
rm -f ./htmldoc/*
for i in $(seq 1 5)
do
user\_page=${base\_path}${user\_path}${i}
echo ${user\_page}
#echo "-----------------------------------"
curl ${user\_page} 2>/dev/null | grep -E 'href="(/articles|/topics)/\[0-9\]{5}' | awk -F "\\"" '{print $4$5}' | sort | awk -F ">|<" '{print $1 "@" $2}' >> ./suffix\_path.txt
done

sed 's/[[:space:]]//g' ./suffix\_path.txt > ./suffix\_path\_name.txt

for j in cat ./suffix\_path\_name.txt
do
echo "文件行内容:${j}"
arr\_0=echo ${j} | cut -d"@" -f1
arr\_1=echo ${j} | cut -d"@" -f2
echo "=================================="
echo "arr[0] is: ${arr\_0}"
echo "arr[1] is: ${arr\_1}"
topic\_path=${base\_path}${arr\_0}
echo "topic\_path is: ${topic\_path}"
curl ${topic\_path} 2>/dev/null > ./htmldoc/${arr\_1}.html
done


---------------------------------------------------------------------------------------

\#!/bin/bash

base\_path="https://testerhome.com/"
user\_path="ycwdaaaa/topics?page="
rm suffix*
rm -f ./htmldoc/*
for i in $(seq 1 5)
do
user\_page=${base\_path}${user\_path}${i}
echo ${user\_page}
#echo "-----------------------------------"
curl ${user\_page} 2>/dev/null | grep -E 'href="(/articles|/topics)/\[0-9\]{5}' | awk -F "\\"" '{print $4$5}' | sort | awk -F ">|<" '{print $1 "@" $2}' >> ./suffix\_path.txt
done

sed 's/[[:space:]]//g' ./suffix\_path.txt > ./suffix\_path\_name.txt

for j in cat ./suffix\_path\_name.txt
do
echo "文件行内容:${j}"
OLD\_IFS=${IFS}
IFS="@"
arr=(${j})
arr\_0=${arr[0]}
arr\_1=${arr[1]}
echo "=================================="
echo "arr[0] is: ${arr\_0}"
echo "arr[1] is: ${arr\_1}"
topic\_path=${base\_path}${arr\_0}
echo "topic\_path is: ${topic\_path}"
curl ${topic\_path} 2>/dev/null > ./htmldoc/${arr\_1}.html
done
IFS=${OLD\_IFS}

标签: shell, echo, user, path, arr, 抓取, topic, suffix, 网页内容

相关文章推荐

添加新评论,含*的栏目为必填