A start towards Net::OpenID::UserProfile
Martin Atkins
mart at degeneration.co.uk
Wed May 25 12:13:52 PDT 2005
Martin Atkins wrote:
>
> Attached are some Perl modules which implement the lowest level of
> getting user profile data: actually grovelling around inside the
> referenced documents and making a nice, uniform data structure out of it.
>
Why do I always forget to attach my attachments?
Also included is a really ugly test script I was using to test it.
-------------- next part --------------
package RSS;
use base (Fetcher);
use XML::RSS::Parser 1.0;
use strict;
sub get_profile_info {
my ($self, $url) = @_;
my $res = $self->_fetch($url);
return undef unless $res->is_success();
my $p = new XML::RSS::Parser;
my $doc;
eval { $doc = $p->parse($res->content()); };
return undef if $@ || ! $doc;
my $nsuri = $doc->rss_namespace_uri;
my $rss;
# The RSS module should really handle this case, but it doesn't.
if ($nsuri) {
$rss = sub { return $p->ns_qualify($_[0], $nsuri); };
}
else {
$rss = sub { return $_[0]; };
}
my $eval = sub { return $_[0] ? $_[0]->value : undef; };
my $chan = $doc->channel;
return undef unless $chan;
return {
'weblog_url' => $eval->($chan->children($rss->('link'))),
'weblog_title' => $eval->($chan->children($rss->('title'))),
};
}
1;
-------------- next part --------------
# This should almost certainly use some Atom-specific parser, but
# I didn't have much luck getting XML::Atom to work. It kept returning
# undef for everything.
package Atom;
use base (Fetcher);
use strict;
use XML::XPath;
my %url_types = (
'text/html' => 1,
'application/xhtml+xml' => 1,
'text/xhtml+xml' => 1,
);
sub get_profile_info {
my ($self, $url) = @_;
my $res = $self->_fetch($url);
return undef unless $res->is_success();
my $content = $res->content();
my $doc;
eval { $doc = new XML::XPath(xml => $content); };
return undef if $@ || ! $doc;
my $ret = {};
# FIXME: All of the URLy stuff here needs to be made into a full URL based on xml:base
LINKPATH:
foreach my $p (("/feed/head/link", "/feed/link")) {
foreach my $l ($doc->findnodes($p)) {
if ($l->getAttribute("rel") eq 'alternate' && $url_types{$l->getAttribute("type")}) {
$ret->{weblog_url} = $l->getAttribute('href');
$ret->{weblog_name} = $l->getAttribute('title') || undef;
last LINKPATH;
}
}
}
# FIXME: Some of these text elements have a "type" parameter which says whether
# they are plain text, escaped HTML or embedded XML. Should deal with that
# in some way, though I'm not really sure how.
my $headthingtext = sub {
my $it = $doc->getNodeText("/feed/head/$_[0]")
|| $doc->getNodeText("/feed/$_[0]");
return $it ? $it->value() : undef;
};
if (! defined $ret->{weblog_name}) {
$ret->{weblog_name} = $headthingtext->("title");
}
$ret->{name} = $headthingtext->("author/name");
$ret->{website_url} = $headthingtext->("author/uri") || $headthingtext->("author/url");
$ret->{email} = $headthingtext->("author/email");
return $ret;
}
1;
-------------- next part --------------
package FOAF;
use base (Fetcher);
use strict;
use XML::FOAF;
sub get_profile_info {
my ($self, $url) = @_;
my $res = $self->_fetch($url);
return undef unless $res->is_success();
my $content = $res->content();
my $doc;
eval { $doc = new XML::FOAF(\$content, $url); };
return undef if $@ || ! $doc;
my $p = $doc->person();
return undef unless $p;
my $ret = {
'name' => $p->name(),
'nick' => $p->nick(),
'mbox_sha1' => $p->mbox_sha1sum(),
'website_url' => $p->homepage(),
'weblog_url' => $p->weblog(),
'photo' => $p->img(),
'birthdate' => $p->dateOfBirth(),
};
if ($p->mbox() =~ /^mailto:(.*)$/) {
$ret->{email} = $1;
}
return $ret;
}
1;
-------------- next part --------------
package Fetcher;
use LWP::UserAgent;
use HTTP::Request;
sub new {
my ($class, $ua) = @_;
my $self = {
ua => $ua || new LWP::UserAgent(),
};
return bless($self, $class);
}
sub get_profile_info { return undef; }
sub _fetch {
my ($self, $url) = @_;
return $self->{ua}->get($url);
}
1;
-------------- next part --------------
use RSS;
use Atom;
use FOAF;
# use vCard; # Not supported yet.
use Data::Dumper;
my $rss = new RSS();
my $atom = new Atom();
my $foaf = new FOAF();
# my $vcard = new vCard(); # Not supported yet.
my @rss_tests = (
"http://norman.walsh.name/rss/index.rss",
"http://frank.livejournal.com/data/rss",
"http://www.intertwingly.net/blog/index.rss",
"http://www.intertwingly.net/blog/index.rdf",
"http://www.intertwingly.net/blog/index.rss11",
"http://www.intertwingly.net/blog/index.rss2",
);
foreach (@rss_tests) {
print "******* $_ as RSS *******\n";
print Data::Dumper::Dumper($rss->get_profile_info($_));
}
my @atom_tests = (
"http://norman.walsh.name/atom/whatsnew.xml",
"http://frank.livejournal.com/data/atom",
"http://www.decafbad.com/blog/atom.xml",
);
foreach (@atom_tests) {
print "******* $_ as Atom *******\n";
print Data::Dumper::Dumper($atom->get_profile_info($_));
}
my @foaf_tests = (
"http://frank.livejournal.com/data/foaf",
"http://rdfweb.org/people/danbri/rdfweb/webwho.xrdf",
"http://www.ideaspace.net/users/wkearney/foaf.xrdf",
# "http://crschmidt.net/foaf.rdf", # Down?
);
foreach (@foaf_tests) {
print "******* $_ as FOAF *******\n";
print Data::Dumper::Dumper($foaf->get_profile_info($_));
}
1;
More information about the yadis
mailing list