Showing posts with label download. Show all posts
Showing posts with label download. Show all posts

Thursday, July 28, 2011

Simple Script to Download epaper from The Hindu


#!/bin/bash
#ishan dot karve at gmail dot com
#
#Script to download epaper from Hindu
#No more subscription .. pls donate the money to Prime Ministers Welfare Fund
#As always /// Its free to use...
#Get user to select edition
edition_choice=([0]=101 [1]=102 [2]=103 )
edition_name=([0]=Chennai [1]=Hyderabad [2]=Delhi )
echo "Hindu epaper editions are"
echo "-------------------------------------------------"
echo "0. Chennai"
echo "1. Hyderabad"
echo "2. Delhi"
echo "-------------------------------------------------"
while true; do
    read -p "Enter edition you wish to selec[0-2]: " ed
    case $ed in
       [012]) 
 echo "Thanks." 
 break;;
        * ) echo "Please select the correct numeric serial.";;
    esac
done
#spider the selected edition using wget to estimate number of pages
#define max incremental page limit
max_spider=100
echo "Estimating number of pages in ${edition_name[ed]} edition"
#start spider for main editon
 for ((  j = 1 ;  j <= $max_spider;  j++  ))
    do
      #prepend zero to single digits
      pageno=`printf "%03d" $j`  
      echo "Searching for Page $pageno"
      I_FILE="http://epaper.thehindu.com/pdf/`date +%Y`/`date +%m`/`date +%d`/`date +%Y``date +%m``date +%d`A_$pageno${edition_choice[ed]}.pdf"
 debug=`wget --spider $I_FILE 2>&1`
 
 echo $debug
      if [[ $debug =~ .*link!!!.* ]]
      then
      break
      fi
done
clear
#decrement counter
(( j = j - 1 ))
npages_A=$j
echo "Estimating number of pages in ${edition_name[ed]} edition supplement"



#start spider for newapaper supplement
 for ((  j = 1 ;  j <= $max_spider;  j++  ))
    do
      #prepend zero to single digits
      pageno=`printf "%03d" $j`  
      echo "Searching for Page $pageno"
      I_FILE="http://epaper.thehindu.com/pdf/`date +%Y`/`date +%m`/`date +%d`/`date +%Y``date +%m``date +%d`B_$pageno${edition_choice[ed]}.pdf"
 debug=`wget --spider $I_FILE 2>&1`
 
 echo $debug
      if [[ $debug =~ .*link!!!.* ]]
      then
      break
      fi
done
clear
#decrement counter
(( j = j - 1 ))
npages_B=$j

ty_dir="$HOME/Desktop/hindu_${edition_name[ed]}_`date +%d``date +%m``date +%Y`"
#mkdir to store individual pages
mkdir $ty_dir
echo "Please be patient..Bandwidth intensive operation starts..;-)"
echo "Downloading Main Paper .. total $npages_A pages"
    for ((  i = 1 ;  i <= npages_A;  i++  ))
    do
      #prepend zero to single digits
      pageno=`printf "%03d" $i`  
      echo "Downloading Page $pageno"
      O_FILE="$ty_dir/A$pageno.pdf"
      I_FILE="http://epaper.thehindu.com/pdf/`date +%Y`/`date +%m`/`date +%d`/`date +%Y``date +%m``date +%d`A_$pageno${edition_choice[ed]}.pdf"
      wget -q -O $O_FILE $I_FILE 
      
    done
    
echo "Downloading Supplement .. total $npages_B pages"

  for ((  i = 1 ;  i <= npages_B;  i++  ))
    do
      #prepend zero to single digits
      pageno=`printf "%03d" $i`  
      echo "Downloading Page $pageno"
      O_FILE="$ty_dir/B$pageno.pdf"
      I_FILE="http://epaper.thehindu.com/pdf/`date +%Y`/`date +%m`/`date +%d`/`date +%Y``date +%m``date +%d`B_$pageno${edition_choice[ed]}.pdf"
      wget -q -O $O_FILE $I_FILE 
      
    done
   

echo "Combining all pages into a single pdf document"
#combine multiple pdf files
gs -dNOPAUSE -sDEVICE=pdfwrite -sOUTPUTFILE=The_Hindu_${edition_name[ed]}_`date +%d``date +%b``date +%Y`.pdf -dBATCH $ty_dir/*.pdf
#empty directory
rm $ty_dir/*.*
#remove directory
rmdir $ty_dir
How to get it running

Copy the script to your Linux desktop
 go to command prompt using terminal
 type following commands

cd ~/Desktop
chmod +x thehindu.sh
./thehindu.sh

Tuesday, July 19, 2011

Simple Script to Download epaper from Mid-Day


#!/bin/bash
#ishan dot karve at gmail dot com
#Script to download epaper from mid-day.com
#As always /// Its free to use...
#Get user to select edition
edition_choice=([0]=mumbai [1]=delhi [2]=bangalore [3]=pune)
edition_abbr=([0]=md-mn [1]=md-dn [2]=md-bn [3]=md-pn)
echo "Mid-Day epaper editions are"
echo "-------------------------------------------------"
echo "0. Mumbai"
echo "1. Delhi"
echo "2. Bangalore"
echo "3. Pune"
echo "-------------------------------------------------"
while true; do
    read -p "Enter edition you wish to selec[0-9]: " ed
    case $ed in
       [0123]) 
 echo "Thanks." 
 break;;
        * ) echo "Please select the correct numeric serial.";;
    esac
done
#Get user to input starting page
read -p "Please enter the starting page you wish to download from?" strt_pg
#Get user to input ending page
read -p "Please enter the ending page you wish to download?" end_pg
while true; do
    read -p "Do you wish download pages $strt_pg to $end_pg? [Y/N]" yn
    case $yn in
        [Yy]* ) 
    for ((  i = $strt_pg ;  i <= end_pg;  i++  ))
    do
      echo "Downloading Page $i"

      I_FILE="http://epaper2.mid-day.com/DRIVE/${edition_choice[ed]}/`date +%d``date +%m``date +%Y`/epaperpdf/19072011-${edition_abbr[ed]}-$i.pdf"
      wget $I_FILE 
    done
     break;;
        [Nn]* ) exit;;
        * ) echo "Please answer yes or no.";;
    esac
done







How to get it running

Copy the script to your Linux desktop
 go to command prompt using terminal
 type following commands

cd ~/Desktop
chmod +x milk_day.sh
./milk_day.sh

Simple Script to Download epaper from Indian Express

#!/bin/bash
# ishan dot karve at gmail dot com
#Script to download epaper from indian express
#As always /// Its free to use...
clear
curl -s http://epaper.indianexpress.com > /tmp/editions
temp1=$(sed -nr 's/(.*)max-height:none;" src="?([^ ">]*).*/\2\n\1/; T; P; D;' /tmp/editions) 
temp2=$(sed -n -e 's/.*<span class="caption">\(.*\)<\/span>.*/\1/p' /tmp/editions)
editions=($(echo $temp2 | tr " " "\n"))
editions_link=($(echo $temp1 | tr " " "\n"))
echo "Following ${#editions[*]} Editions available for download"
count=0
for i in "${editions[@]}"
do
 echo $count.  $i
 count=$((count+1))
done
while true; do
    read -p "Enter edition you wish to select[0-9]: " ed
    case $ed in
       [0123456789]) 
echo "Processing..." 
break;;
        * ) echo "Please select the correct numeric serial.";;
    esac
done

if [ "$ed" -ge "${#editions[*]}" ]
then
echo "Please select proper edition. Please try again. Bye."
exit 0
fi

links=${editions_link[$ed]}
#grab edition id
edition_id=($(echo $links | cut -d "/" -f4))
#Get user to input starting page
read -p "Please enter the starting page you wish to download from?" strt_pg
#Get user to input ending page
read -p "Please enter the ending page you wish to download?" end_pg
while true; do
    read -p "Do you wish download pages $strt_pg to $end_pg? [Y/N]" yn
    case $yn in
        [Yy]* ) 
 ty_dir="$HOME/Desktop/ie_day_`date +%d``date +%m``date +%Y`"
 mkdir $ty_dir

 for ((  i = $strt_pg ;  i <= end_pg;  i++  ))
 do
#prepend zero to single digits
      pageno=`printf "%02d" $i`  
      echo "Downloading Page $pageno"
      O_FILE="$ty_dir/$pageno.pdf"
      I_FILE="http://epaper.indianexpress.com/pdf/get/$edition_id/$i"
   wget -O $O_FILE $I_FILE 
 done
  break;;
        [Nn]* ) exit;;
        * ) echo "Please answer yes or no.";;
    esac
done
#combine multiple pdf files
gs -dNOPAUSE -sDEVICE=pdfwrite -sOUTPUTFILE=ie_`date +%d``date +%m``date +%Y`.pdf -dBATCH $ty_dir/*.pdf
#empty directory
rm $ty_dir/*.*
#remove directory
rmdir $ty_dir



How to get it running

Copy the script to your Linux desktop
 go to command prompt using terminal
 type following commands

cd ~/Desktop
chmod +x milk_express.sh
./milk_express.sh

Tuesday, July 12, 2011

Simple Script to Download epaper from Times of India

#!/bin/bash
#Written for a friend in need
#Script to download epaper from indiatimes.com
#As always /// Its free to use...
#Get user to select edition
edition_choice=([0]=TOIM [1]=CAP [2]=TOIB [3]=TOIKM [4]=TOICH [5]=TOIPU [6]=TOIA [7]=TOIL [8]=TOIJ [9]=TOIH)
echo "Times of India epaper editions are"
echo "-------------------------------------------------"
echo "0. Mumbai"
echo "1. Delhi"
echo "2. Bangalore"
echo "3. Kolkata"
echo "4. Chennai"
echo "5. Pune"
echo "6. Ahmedabad"
echo "7. Lucknow"
echo "8. Jaipur"
echo "9. Hyderabad"
echo "-------------------------------------------------"
while true; do
    read -p "Enter edition you wish to selec[0-9]: " ed
    case $ed in
       [0123456789]) 
 echo "Thanks." 
 break;;
        * ) echo "Please select the correct numeric serial.";;
    esac
done
#Get user to input starting page
read -p "Please enter the starting page you wish to download from?" strt_pg
#Get user to input ending page
read -p "Please enter the ending page you wish to download?" end_pg
while true; do
    read -p "Do you wish download pages $strt_pg to $end_pg? [Y/N]" yn
    case $yn in
        [Yy]* ) 
    for ((  i = $strt_pg ;  i <= end_pg;  i++  ))
    do
      echo "Downloading Page $i";
      I_FILE="http://epaper.timesofindia.com/Repository/${edition_choice[ed]}/`date +%Y`/`date +%m`/`date +%d`/${edition_choice[ed]}_`date +%Y`_`date +%-m`_`date +%d`_$i.pdf"
      wget $I_FILE 
    done
     break;;
        [Nn]* ) exit;;
        * ) echo "Please answer yes or no.";;
    esac
done


How to get it running

Copy the script to your Linux desktop
 go to command prompt using terminal
 type following commands

cd ~/Desktop
chmod +x test_1.sh
./test_1.sh


14 Jul 2011 0026 : Script updated to reflect various editions....