.\" Automatically generated by Pod::Man 2.27 (Pod::Simple 3.28) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' . ds C` . ds C' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is turned on, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .\" .\" Avoid warning from groff about undefined register 'F'. .de IX .. .nr rF 0 .if \n(.g .if rF .nr rF 1 .if (\n(rF:(\n(.g==0)) \{ . if \nF \{ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . if !\nF==2 \{ . nr % 0 . nr F 2 . \} . \} .\} .rr rF .\" ======================================================================== .\" .IX Title "WWW::Mechanize::Examples 3" .TH WWW::Mechanize::Examples 3 "2023-04-27" "perl v5.16.3" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" WWW::Mechanize::Examples \- Sample programs that use WWW::Mechanize .SH "VERSION" .IX Header "VERSION" version 2.17 .SH "SYNOPSIS" .IX Header "SYNOPSIS" Plenty of people have learned WWW::Mechanize, and now, you can too! .PP Following are user-supplied samples of WWW::Mechanize in action. If you have samples you'd like to contribute, please send 'em to \&\f(CW\*(C`\*(C'\fR. .PP You can also look at the \fIt/*.t\fR files in the distribution. .PP Please note that these examples are not intended to do any specific task. For all I know, they're no longer functional because the sites they hit have changed. They're here to give examples of how people have used WWW::Mechanize. .PP Note that the examples are in reverse order of my having received them, so the freshest examples are always at the top. .SS "Starbucks Density Calculator, by Nat Torkington" .IX Subsection "Starbucks Density Calculator, by Nat Torkington" Here's a pair of programs from Nat Torkington, editor for O'Reilly Media and co-author of the \fIPerl Cookbook\fR. .Sp .RS 4 Rael [Dornfest] discovered that you can easily find out how many Starbucks there are in an area by searching for \*(L"Starbucks\*(R". So I wrote a silly scraper for some old census data and came up with some Starbucks density figures. There's no meaning to these numbers thanks to errors from using old census data coupled with false positives in Yahoo search (e.g., \&\*(L"Dodie Starbuck-Your Style Desgn\*(R" in Portland \s-1OR\s0). But it was fun to waste a night on. .Sp Here are the top twenty cities in descending order of population, with the amount of territory each Starbucks has. E.g., A New York \s-1NY\s0 Starbucks covers 1.7 square miles of ground. .Sp .Vb 10 \& New York, NY 1.7 \& Los Angeles, CA 1.2 \& Chicago, IL 1.0 \& Houston, TX 4.6 \& Philadelphia, PA 6.8 \& San Diego, CA 2.7 \& Detroit, MI 19.9 \& Dallas, TX 2.7 \& Phoenix, AZ 4.1 \& San Antonio, TX 12.3 \& San Jose, CA 1.1 \& Baltimore, MD 3.9 \& Indianapolis, IN 12.1 \& San Francisco, CA 0.5 \& Jacksonville, FL 39.9 \& Columbus, OH 7.3 \& Milwaukee, WI 5.1 \& Memphis, TN 15.1 \& Washington, DC 1.4 \& Boston, MA 0.5 .Ve .RE .PP \&\f(CW\*(C`get_pop_data\*(C'\fR .PP .Vb 1 \& #!/usr/bin/perl \-w \& \& use WWW::Mechanize; \& use Storable; \& \& $url = \*(Aqhttp://www.census.gov/population/www/documentation/twps0027.html\*(Aq; \& $m = WWW::Mechanize\->new(); \& $m\->get($url); \& \& $c = $m\->content; \& \& $c =~ m{(.*?)}s \& or die "Can\*(Aqt find the population table\en"; \& $t = $1; \& @outer = $t =~ m{(.*?)}gs; \& shift @outer; \& foreach $r (@outer) { \& @bits = $r =~ m{(.*?)}gs; \& for ($x = 0; $x < @bits; $x++) { \& $b = $bits[$x]; \& @v = split /\es*
\es*/, $b; \& foreach (@v) { s/^\es+//; s/\es+$// } \& push @{$data[$x]}, @v; \& } \& } \& \& for ($y = 0; $y < @{$data[0]}; $y++) { \& $data{$data[1][$y]} = { \& NAME => $data[1][$y], \& RANK => $data[0][$y], \& POP => comma_free($data[2][$y]), \& AREA => comma_free($data[3][$y]), \& DENS => comma_free($data[4][$y]), \& }; \& } \& \& store(\e%data, "cities.dat"); \& \& sub comma_free { \& my $n = shift; \& $n =~ s/,//; \& return $n; \& } .Ve .PP \&\f(CW\*(C`plague_of_coffee\*(C'\fR .PP .Vb 1 \& #!/usr/bin/perl \-w \& \& use WWW::Mechanize; \& use strict; \& use Storable; \& \& $SIG{_\|_WARN_\|_} = sub {} ; # ssssssh \& \& my $Cities = retrieve("cities.dat"); \& \& my $m = WWW::Mechanize\->new(); \& $m\->get("http://local.yahoo.com/"); \& \& my @cities = sort { $Cities\->{$a}{RANK} <=> $Cities\->{$b}{RANK} } keys %$Cities; \& foreach my $c ( @cities ) { \& my $fields = { \& \*(Aqstx\*(Aq => "starbucks", \& \*(Aqcsz\*(Aq => $c, \& }; \& \& my $r = $m\->submit_form(form_number => 2, \& fields => $fields); \& die "Couldn\*(Aqt submit form" unless $r\->is_success; \& \& my $hits = number_of_hits($r); \& # my $ppl = sprintf("%d", 1000 * $Cities\->{$c}{POP} / $hits); \& # print "$c has $hits Starbucks. That\*(Aqs one for every $ppl people.\en"; \& my $density = sprintf("%.1f", $Cities\->{$c}{AREA} / $hits); \& print "$c : $density\en"; \& } \& \& sub number_of_hits { \& my $r = shift; \& my $c = $r\->content; \& if ($c =~ m{\ed+ out of (\ed+) total results for}) { \& return $1; \& } \& if ($c =~ m{Sorry, no .*? found in or near}) { \& return 0; \& } \& if ($c =~ m{Your search matched multiple cities}) { \& warn "Your search matched multiple cities\en"; \& return 0; \& } \& if ($c =~ m{Sorry we couldn.t find that location}) { \& warn "No cities\en"; \& return 0; \& } \& if ($c =~ m{Could not find.*?, showing results for}) { \& warn "No matches\en"; \& return 0; \& } \& die "Unknown response\en$c\en"; \& } .Ve .SS "pb-upload, by John Beppu" .IX Subsection "pb-upload, by John Beppu" This program takes filenames of images from the command line and uploads them to a www.photobucket.com folder. John Beppu, the author, says: .Sp .RS 4 I had 92 pictures I wanted to upload, and doing it through a browser would've been torture. But thanks to mech, all I had to do was `./pb.upload *.jpg` and watch it do its thing. It felt good. If I had more time, I'd implement WWW::Photobucket on top of WWW::Mechanize. .RE .PP .Vb 1 \& #!/usr/bin/perl \-w \-T \& \& use strict; \& use WWW::Mechanize; \& \& my $login = "login_name"; \& my $password = "password"; \& my $folder = "folder"; \& \& my $url = "http://img78.photobucket.com/albums/v281/$login/$folder/"; \& \& # login to your photobucket.com account \& my $mech = WWW::Mechanize\->new(); \& $mech\->get($url); \& $mech\->submit_form( \& form_number => 1, \& fields => { password => $password }, \& ); \& die unless ($mech\->success); \& \& # upload image files specified on command line \& foreach (@ARGV) { \& print "$_\en"; \& $mech\->form_number(2); \& $mech\->field(\*(Aqthe_file[]\*(Aq => $_); \& $mech\->submit(); \& } .Ve .SS "listmod, by Ian Langworth" .IX Subsection "listmod, by Ian Langworth" Ian Langworth contributes this little gem that will bring joy to beleaguered mailing list admins. It discards spam messages through mailman's web interface. .PP .Vb 8 \& #!/arch/unix/bin/perl \& use strict; \& use warnings; \& # \& # listmod \- fast alternative to mailman list interface \& # \& # usage: listmod crew XXXXXXXX \& # \& \& die "usage: $0 \en" unless @ARGV == 2; \& my ($listname, $password) = @ARGV; \& \& use CGI qw(unescape); \& \& use WWW::Mechanize; \& my $m = WWW::Mechanize\->new( autocheck => 1 ); \& \& use Term::ReadLine; \& my $term = Term::ReadLine\->new($0); \& \& # submit the form, get the cookie, go to the list admin page \& $m\->get("https://lists.ccs.neu.edu/bin/admindb/$listname"); \& $m\->set_visible( $password ); \& $m\->click; \& \& # exit if nothing to do \& print "There are no pending requests.\en" and exit \& if $m\->content =~ /There are no pending requests/; \& \& # select the first form and examine its contents \& $m\->form_number(1); \& my $f = $m\->current_form or die "Couldn\*(Aqt get first form!\en"; \& \& # get me the base form element for each email item \& my @items = map {m/^.+?\-(.+)/} grep {m/senderbanp/} $f\->param \& or die "Couldn\*(Aqt get items in first form!\en"; \& \& # iterate through items, prompt user, commit actions \& foreach my $item (@items) { \& \& # show item info \& my $sender = unescape($item); \& my ($subject) = [$f\->find_input("senderbanp\-$item")\->value_names]\->[1] \& =~ /Subject:\es+(.+?)\es+Size:/g; \& \& # prompt user \& my $choice = \*(Aq\*(Aq; \& while ( $choice !~ /^[DAX]$/ ) { \& print "$sender\e: \*(Aq$subject\*(Aq\en"; \& $choice = uc $term\->readline("Action: defer/accept/discard [dax]: "); \& print "\en\en"; \& } \& \& # set button \& $m\->field("senderaction\-$item" => {D=>0,A=>1,X=>3}\->{$choice}); \& } \& \& # submit actions \& $m\->click; .Ve .SS "ccdl, by Andy Lester" .IX Subsection "ccdl, by Andy Lester" Steve McConnell, author of the landmark \fICode Complete\fR has put up the chapters for the 2nd edition in \s-1PDF\s0 format on his website. I needed to download them to take to Kinko's to have printed. This little program did it for me. .PP .Vb 1 \& #!/usr/bin/perl \-w \& \& use strict; \& use WWW::Mechanize; \& \& my $start = "http://www.stevemcconnell.com/cc2/cc.htm"; \& \& my $mech = WWW::Mechanize\->new( autocheck => 1 ); \& $mech\->get( $start ); \& \& my @links = $mech\->find_all_links( url_regex => qr/\ed+.+\e.pdf$/ ); \& \& for my $link ( @links ) { \& my $url = $link\->url_abs; \& my $filename = $url; \& $filename =~ s[^.+/][]; \& \& print "Fetching $url"; \& $mech\->get( $url, \*(Aq:content_file\*(Aq => $filename ); \& \& print " ", \-s $filename, " bytes\en"; \& } .Ve .SS "quotes.pl, by Andy Lester" .IX Subsection "quotes.pl, by Andy Lester" This was a program that was going to get a hack in \fISpidering Hacks\fR, but got cut at the last minute, probably because it's against \s-1IMDB\s0's \s-1TOS\s0 to scrape from it. I present it here as an example, not a suggestion that you break their \s-1TOS.\s0 .PP Last I checked, it didn't work because their \s-1HTML\s0 didn't match, but it's still good as sample code. .PP .Vb 1 \& #!/usr/bin/perl \-w \& \& use strict; \& \& use WWW::Mechanize; \& use Getopt::Long; \& use Text::Wrap; \& \& my $match = undef; \& my $random = undef; \& GetOptions( \& "match=s" => \e$match, \& "random" => \e$random, \& ) or exit 1; \& \& my $movie = shift @ARGV or die "Must specify a movie\en"; \& \& my $quotes_page = get_quotes_page( $movie ); \& my @quotes = extract_quotes( $quotes_page ); \& \& if ( $match ) { \& $match = quotemeta($match); \& @quotes = grep /$match/i, @quotes; \& } \& \& if ( $random ) { \& print $quotes[rand @quotes]; \& } \& else { \& print join( "\en", @quotes ); \& } \& \& \& sub get_quotes_page { \& my $movie = shift; \& \& my $mech = WWW::Mechanize\->new; \& $mech\->get( "http://www.imdb.com/search" ); \& $mech\->success or die "Can\*(Aqt get the search page"; \& \& $mech\->submit_form( \& form_number => 2, \& fields => { \& title => $movie, \& restrict => "Movies only", \& }, \& ); \& \& my @links = $mech\->find_all_links( url_regex => qr[^/Title] ) \& or die "No matches for \e"$movie\e" were found.\en"; \& \& # Use the first link \& my ( $url, $title ) = @{$links[0]}; \& \& warn "Checking $title...\en"; \& \& $mech\->get( $url ); \& my $link = $mech\->find_link( text_regex => qr/Memorable Quotes/i ) \& or die qq{"$title" has no quotes in IMDB!\en}; \& \& warn "Fetching quotes...\en\en"; \& $mech\->get( $link\->[0] ); \& \& return $mech\->content; \& } \& \& \& sub extract_quotes { \& my $page = shift; \& \& # Nibble away at the unwanted HTML at the beginnning... \& $page =~ s/.+Memorable Quotes//si; \& $page =~ s/.+?(
tag \& my @quotes = split( //, $page ); \& \& for my $quote ( @quotes ) { \& my @lines = split( /
/, $quote ); \& for ( @lines ) { \& s/<[^>]+>//g; # Strip HTML tags \& s/\es+/ /g; # Squash whitespace \& s/^ //; # Strip leading space \& s/ $//; # Strip trailing space \& s/"/"/g; # Replace HTML entity quotes \& \& # Word\-wrap to fit in 72 columns \& $Text::Wrap::columns = 72; \& $_ = wrap( \*(Aq\*(Aq, \*(Aq \*(Aq, $_ ); \& } \& $quote = join( "\en", @lines ); \& } \& \& return @quotes; \& } .Ve .SS "cpansearch.pl, by Ed Silva" .IX Subsection "cpansearch.pl, by Ed Silva" A quick little utility to search the \s-1CPAN\s0 and fire up a browser with a results page. .PP .Vb 1 \& #!/usr/bin/perl \& \& # turn on perl\*(Aqs safety features \& use strict; \& use warnings; \& \& # work out the name of the module we\*(Aqre looking for \& my $module_name = $ARGV[0] \& or die "Must specify module name on command line"; \& \& # create a new browser \& use WWW::Mechanize; \& my $browser = WWW::Mechanize\->new(); \& \& # tell it to get the main page \& $browser\->get("http://search.cpan.org/"); \& \& # okay, fill in the box with the name of the \& # module we want to look up \& $browser\->form_number(1); \& $browser\->field("query", $module_name); \& $browser\->click(); \& \& # click on the link that matches the module name \& $browser\->follow_link( text_regex => $module_name ); \& \& my $url = $browser\->uri; \& \& # launch a browser... \& system(\*(Aqgaleon\*(Aq, $url); \& \& exit(0); .Ve .SS "lj_friends.cgi, by Matt Cashner" .IX Subsection "lj_friends.cgi, by Matt Cashner" .Vb 1 \& #!/usr/bin/perl \& \& # Provides an rss feed of a paid user\*(Aqs LiveJournal friends list \& # Full entries, protected entries, etc. \& # Add to your favorite rss reader as \& # http://your.site.com/cgi\-bin/lj_friends.cgi?user=USER&password=PASSWORD \& \& use warnings; \& use strict; \& \& use WWW::Mechanize; \& use CGI; \& \& my $cgi = CGI\->new(); \& my $form = $cgi\->Vars; \& \& my $agent = WWW::Mechanize\->new(); \& \& $agent\->get(\*(Aqhttp://www.livejournal.com/login.bml\*(Aq); \& $agent\->form_number(\*(Aq3\*(Aq); \& $agent\->field(\*(Aquser\*(Aq,$form\->{user}); \& $agent\->field(\*(Aqpassword\*(Aq,$form\->{password}); \& $agent\->submit(); \& $agent\->get(\*(Aqhttp://www.livejournal.com/customview.cgi?user=\*(Aq.$form\->{user}.\*(Aq&styleid=225596&checkcookies=1\*(Aq); \& print "Content\-type: text/plain\en\en"; \& print $agent\->content(); .Ve .SS "Hacking Movable Type, by Dan Rinzel" .IX Subsection "Hacking Movable Type, by Dan Rinzel" .Vb 2 \& use strict; \& use WWW::Mechanize; \& \& # a tool to automatically post entries to a moveable type weblog, and set arbitrary creation dates \& \& my $mech = WWW::Mechanize\->new(); \& my $entry; \& $entry\->{title} = "Test AutoEntry Title"; \& $entry\->{btext} = "Test AutoEntry Body"; \& $entry\->{date} = \*(Aq2002\-04\-15 14:18:00\*(Aq; \& my $start = qq|http://my.blog.site/mt.cgi|; \& \& $mech\->get($start); \& $mech\->field(\*(Aqusername\*(Aq,\*(Aqund3f1n3d\*(Aq); \& $mech\->field(\*(Aqpassword\*(Aq,\*(Aqobscur3d\*(Aq); \& $mech\->submit(); # to get login cookie \& $mech\->get(qq|$start?_\|_mode=view&_type=entry&blog_id=1|); \& $mech\->form_name(\*(Aqentry_form\*(Aq); \& $mech\->field(\*(Aqtitle\*(Aq,$entry\->{title}); \& $mech\->field(\*(Aqcategory_id\*(Aq,1); # adjust as needed \& $mech\->field(\*(Aqtext\*(Aq,$entry\->{btext}); \& $mech\->field(\*(Aqstatus\*(Aq,2); # publish, or 1 = draft \& $results = $mech\->submit(); \& \& # if we\*(Aqre ok with this entry being datestamped "NOW" (no {date} in %entry) \& # we\*(Aqre done. Otherwise, time to be tricksy \& # MT returns a 302 redirect from this form. the redirect itself contains a handler \& # which takes the user to an editable version of the form where the create date can be edited \& # MT date format of YYYY\-MM\-DD HH:MI:SS is the only one that won\*(Aqt error out \& \& if ($entry\->{date} && $entry\->{date} =~ /^\ed{4}\-\ed{2}\-\ed{2}\es+\ed{2}:\ed{2}:\ed{2}/) { \& # travel the redirect \& $results = $mech\->get($results\->{_headers}\->{location}); \& $results\->{_content} =~ /get($start.$1); \& $mech\->form_name(\*(Aqentry_form\*(Aq); \& $mech\->field(\*(Aqcreated_on_manual\*(Aq,$entry\->{date}); \& $mech\->submit(); \& } .Ve .SS "get-despair, by Randal Schwartz" .IX Subsection "get-despair, by Randal Schwartz" Randal submitted this bot that walks the despair.com site sucking down all the pictures. .PP .Vb 2 \& use strict; \& $|++; \& \& use WWW::Mechanize; \& use File::Basename; \& \& my $m = WWW::Mechanize\->new; \& \& $m\->get("http://www.despair.com/indem.html"); \& \& my @top_links = @{$m\->links}; \& \& for my $top_link_num (0..$#top_links) { \& next unless $top_links[$top_link_num][0] =~ /^http:/; \& \& $m\->follow_link( n=>$top_link_num ) or die "can\*(Aqt follow $top_link_num"; \& \& print $m\->uri, "\en"; \& for my $image (grep m{^http://store4}, map $_\->[0], @{$m\->links}) { \& my $local = basename $image; \& print " $image...", $m\->mirror($image, $local)\->message, "\en" \& } \& \& $m\->back or die "can\*(Aqt go back"; \& } .Ve .SH "AUTHOR" .IX Header "AUTHOR" Andy Lester .SH "COPYRIGHT AND LICENSE" .IX Header "COPYRIGHT AND LICENSE" This software is copyright (c) 2004 by Andy Lester. .PP This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself.