@echo off & setlocal enableextensions enabledelayedexpansion
::
:: Get the name of the HTML file to be processed
set Source=%~1
if not defined Source (
echo +----------------------------------------------------+
echo ^| A script to get the HTTP, FTP and FILE links ^|
echo ^| from an HTML page soure file ^|
echo ^| By Prof. Timo Salmi, Last modified Thu 15-Feb-2007 ^|
echo +----------------------------------------------------+
echo.
echo Usage %~0 [HTMLSourceFilename]
goto :EOF)
if not exist "%Source%" (
echo File "%Source%" not found
goto :EOF)
::
:: An auxiliary temporary file
set temp_=%temp%
if defined mytemp set temp_=%mytemp%
for /f "tokens=*" %%f in ("%temp_%") do set temp_=%%~sf
set tempfile=%temp_%\tempfile.tmp
set tempfil2=%temp_%\tempfil2.tmp
for %%f in (%tempfile% %tempfil2%) do if exist %%f del %%f
::
:: Substitute = with , since tokens does not work with =
:: Customize the delim character if necessary
set delim=,
for /f "tokens=*" %%c in ('type "%Source%"') do (
set lineContents=%%c
echo !lineContents:"=%delim%!
)>>%tempfile%
::
:: Find the HTTP links
for /f "tokens=2 delims=%delim%" %%a in ('
find /i "HREF=%delim%http://" %tempfile%') do (
echo %%a
)>>%tempfil2%
type %tempfil2%|find /i "http://"
::
:: Find the FILE links
for /f "tokens=2 delims=%delim%" %%a in ('
find /i "HREF=%delim%file://" %tempfile%') do (
echo %%a
)>>%tempfil2%
type %tempfil2%|find /i "file://"
::
:: Find the FTP links
for /f "tokens=2 delims=%delim%" %%a in ('
find /i "HREF=%delim%ftp://" %tempfile%') do (
echo %%a
)>>%tempfil2%
type %tempfil2%|find /i "ftp://"
::
:: Clean up
for %%f in (%tempfile% %tempfil2%) do if exist %%f del %%f
endlocal & goto :EOF
The output from the
tscmd.html index page's
source would start with
http://www.elisanet.fi/tsalmi/info/tscmd.html
http://www.elisanet.fi/tsalmi/homepage.html
http://users.skynet.be/mgueury/mozilla/
http://www.elisanet.fi/tsalmi/pc/link/tscmd.zip
http://groups.google.com/group/alt.msdos.batch.nt
http://www.uva.fi/en/
http://www.uva.fi/fi/about/organisation/services/campus_services/tietotekniikkapalvelut/
http://www.gnu.org/software/sed/manual/sed.html
http://gnuwin32.sourceforge.net/packages/sed.htm
http://sourceforge.net/project/showfiles.php?group_id=9328
http://unxutils.sourceforge.net/
http://sourceforge.net/
http://www.gnu.org/software/gawk/