Posted

Complément :


Script permettant de retrouver le lien manuellement :



# Last modified: 2013 Apr 22
# Version 1.0

# Author: Florian CROUZAT <contact@floriancrouzat.net>
# Feel free to do whatever you want with this file.
# Just make sure to credit what deserve credits.

# Arg(s): $1 - Arte+7 html URL of the page used to actually see the video.

# As of 22/04/2013, Arte completly rewritted his website, leading to this new version of the tool.
# It doesn't require rtmdump anymore as MP4 direct download links can be found in the code. 

function die() {
 echo " ${BAD}*${NORMAL} $@" > /dev/stderr
 # Try to clean up mktemp
 rm $xml1 $xml2 $xml3 $xml3lang1 $xml3lang2 &>/dev/null
 exit 1

# I need a single arg
[[ -n $1 ]] && echo || die "Usage: $0 http://www.arte.tv/guide/fr/048528-002/metropolis?autoplay=1"

# Will I be able to download things ?
if type -p wget &>/dev/null ; then
 get="wget -q -O "
 getv="wget "
elif type -p curl &>/dev/null ; then
 get="curl -s -o "
 getv="curl "
 die "This script ${HILITE}require${NORMAL} either one of wget or curl."

# Creating tempfiles to temporary store XMLs
xml1=$(mktemp /tmp/xml1XXXXX) || die "Cannot create temporary file using mktemp. Exiting."
xml2=$(mktemp /tmp/xml2XXXXX) || die "Cannot create temporary file using mktemp. Exiting."
xml3=$(mktemp /tmp/xml3XXXXX) || die "Cannot create temporary file using mktemp. Exiting."
xml3lang1=$(mktemp /tmp/xml31XXXXX) || die "Cannot create temporary file using mktemp. Exiting."
xml3lang2=$(mktemp /tmp/xml32XXXXX) || die "Cannot create temporary file using mktemp. Exiting."

# This is where the real stuff begins
# It's a three step parsing. It requires multiple hop to get to the MP4 URL

# Step 1/3
# Parses the first page and search the link for the second page

echo -e "Let's crawl ! Starting with $1 ..."
$get "$xml1" "$1"

step1=$(zgrep -E -m1 -o 'arte_vp_url="[^ ]+PLUS7[^"]+"' "$xml1" | tr -d '"' | cut -d= -f2)

# Have we found what we are looking for ?
[[ -z $step1 ]] && die "Parsing error: no \"arte_vp_url\" found. Exiting"

# End of step 1

# Step 2
# Parse the second pages and search for the link to the third pages

echo -e " $GOOD*$NORMAL Now parsing $step1 ..."
$get "$xml2" "$step1" && sed -i 's/,/&\n/g' $xml2

step2lang1=$(zgrep -E '^"videoStreamUrl":' $xml2 | grep -E -o "http://[^ ]+.json")
lang1=$(zgrep -E '^"videoIsoLang":' $xml2 | grep -E -o ".._..")
step2lang2=$(zgrep -E '^"videoSwitchLang":' $xml2 | grep -E -o "http://[^ ]+.json")
lang2=$(zgrep -E '^"videoSwitchLang":' $xml2 | grep -E -o '".._.."' | tr -d '"')

# Have we found what we are looking for ?
[[ -z $step2lang1 ]] && die "Parsing error: no \"videoStreamUrl\" found. Exiting"
[[ -z $step2lang2 ]] && die "Parsing error: no \"videoSwitchLang\" found. Exiting"

# End of step 2

# Step 3
# Parse the third pages and search for mp4 URLs

echo -e " $GOOD*$NORMAL Now parsing $lang1 page $step2lang1 ..."
$get $xml3lang1 "$step2lang1" && sed -i 's/,/&\n/g' $xml3lang1

echo -e " $GOOD*$NORMAL Now parsing $lang2 page $step2lang2 ..."
$get $xml3lang2 "$step2lang2" && sed -i 's/,/&\n/g' $xml3lang2

titlelang1=$(zfgrep '"VTI":' $xml3lang1 | cut -d: -f2- | sed 's/[",]//g')
temptitle=$(zfgrep '"VSU":' $xml3lang1 | cut -d: -f2- | sed 's/[",]//g')
titlelang1="${titlelang1} ${temptitle}"

titlelang2=$(zfgrep '"VTI":' $xml3lang2 | cut -d: -f2- | sed 's/[",]//g')
temptitle=$(zfgrep '"VSU":' $xml3lang2 | cut -d: -f2- | sed 's/[",]//g')
titlelang2="${titlelang2} ${temptitle}"

# Fill an array with all MP4 URLs and file quality for lang1
while read url ; do
 if [[ $url =~ "\"VQU\":" ]] ; then
   rtmplang1qual[$loop]=$(echo $url | cut -d: -f2 | sed 's/[",]//g')
 elif [[ $url =~ "\"VUR\":" ]] ; then
   rtmplang1[$loop]=$(echo $url | grep -E -o 'http://[^ ]+\.mp4"}]?,$' | sed 's/"}]*,//')
   ((loop++)) # the order of the fields explains why we only increase loop here
done < <(zgrep -E '"(VUR|VQU)":' $xml3lang1 | grep -E 'http://[^ ]+.mp4' -B1)

# Fill an array with all MP4 URLs and file quality for lang2
while read url ; do
 if [[ $url =~ "\"VQU\":" ]] ; then
   rtmplang2qual[$loop]=$(echo $url | cut -d: -f2 | sed 's/[",]//g')
 elif [[ $url =~ "\"VUR\":" ]] ; then
   rtmplang2[$loop]=$(echo $url | grep -E -o 'http://[^ ]+\.mp4"}]?,$' | sed 's/"}]*,//')
   ((loop++)) # the order of the fields explains why we only increase loop here
done < <(zgrep -E '"(VUR|VQU)":' $xml3lang2 | grep -E 'http://[^ ]+.mp4' -B1)


# Loop over filled arrays and create a download menu
echo -e "Lang: $lang1"
echo -e "Title: $titlelang1"
echo -e "$GOOD*$NORMAL Found ${#rtmplang1[@]} ${HILITE}$lang1 MP4 direct download${NORMAL} URLs ..."
loop=0 ; for i in ${rtmplang1[@]} ; do
 echo "   $loop  -> ${rtmplang1qual[$loop]} $i "


echo -e "Lang: $lang2"
echo -e "Title: $titlelang2"
echo -e "$GOOD*$NORMAL Found ${#rtmplang2[@]} ${HILITE}$lang2 MP4 direct download${NORMAL} URLs ..."
for i in ${rtmplang2[@]} ; do
 echo "   $loop  -> ${rtmplang2qual[$loop]} $i "


rm $xml1 $xml2 $xml3 $xml3lang1 $xml3lang2 # Cleaning mktemp

# End of step 3

# Finally, download the desired MP4

echo -n "Please paste the ${HILITE}number${NORMAL} of the above MP4 file you want to download: "
read -r whichmp4
[[ $whichmp4 =~ ^[0-9]$ ]] || die "Please enter a valid number. Exiting."

echo -n "Press any key to start downloading ... "


if [ $whichmp4 -le ${#rtmplang1[@]} ] ; then
 $getv ${rtmplang1[$whichmp4]}
 $getv ${rtmplang2[$whichmp4]}

exit 0


Lien du script :





Chercher les liens MP4.


Il suffit de retirer / lecteur / de l'url de JSON.


Ainsi, au lieu de : http://www.arte.tv/papi/tvguide/videos/stream/player/D/039442-002_PLUS7-D/ALL/ALL.json

Utilisez cette adresse : http://www.arte.tv/papi/tvguide/videos/stream/D/039442-002_PLUS7-D/ALL/ALL.json



Le lien .mp4 est dans la page .json


Une fois la vidéo ouverte, clic droit, enregistrer la vidéo sous ...

Edited by insomniaque

