A start towards Net::OpenID::UserProfile

Wed May 25 12:13:52 PDT 2005

Martin Atkins wrote:
> 
> Attached are some Perl modules which implement the lowest level of 
> getting user profile data: actually grovelling around inside the 
> referenced documents and making a nice, uniform data structure out of it.
> 

Why do I always forget to attach my attachments?

Also included is a really ugly test script I was using to test it.

-------------- next part --------------

package RSS;

use base (Fetcher);
use XML::RSS::Parser 1.0;
use strict;

sub get_profile_info {
    my ($self, $url) = @_;

    my $res = $self->_fetch($url);

    return undef unless $res->is_success();

    my $p = new XML::RSS::Parser;

    my $doc;
    eval { $doc = $p->parse($res->content()); };
    return undef if $@ || ! $doc;

    my $nsuri = $doc->rss_namespace_uri;
    my $rss;

    # The RSS module should really handle this case, but it doesn't.
    if ($nsuri) {
        $rss = sub { return $p->ns_qualify($_[0], $nsuri); };
    }
    else {
        $rss = sub { return $_[0]; };
    }

    my $eval = sub { return $_[0] ? $_[0]->value : undef; };

    my $chan = $doc->channel;
    return undef unless $chan;

    return {
        'weblog_url' => $eval->($chan->children($rss->('link'))),
        'weblog_title' => $eval->($chan->children($rss->('title'))),
    };
}

1;
-------------- next part --------------

# This should almost certainly use some Atom-specific parser, but
# I didn't have much luck getting XML::Atom to work. It kept returning
# undef for everything.

package Atom;

use base (Fetcher);
use strict;
use XML::XPath;

my %url_types = (
    'text/html' => 1,
    'application/xhtml+xml' => 1,
    'text/xhtml+xml' => 1,
);

sub get_profile_info {
    my ($self, $url) = @_;

    my $res = $self->_fetch($url);
    return undef unless $res->is_success();

    my $content = $res->content();
    my $doc;
    eval { $doc = new XML::XPath(xml => $content); };
    return undef if $@ || ! $doc;

    my $ret = {};

    # FIXME: All of the URLy stuff here needs to be made into a full URL based on xml:base

    LINKPATH:
    foreach my $p (("/feed/head/link", "/feed/link")) {
        foreach my $l ($doc->findnodes($p)) {
            if ($l->getAttribute("rel") eq 'alternate' && $url_types{$l->getAttribute("type")}) {
                $ret->{weblog_url} = $l->getAttribute('href');
                $ret->{weblog_name} = $l->getAttribute('title') || undef;
                last LINKPATH;
            }
        }
    }

    # FIXME: Some of these text elements have a "type" parameter which says whether
    #       they are plain text, escaped HTML or embedded XML. Should deal with that
    #       in some way, though I'm not really sure how.
    my $headthingtext = sub {
        my $it = $doc->getNodeText("/feed/head/$_[0]")
              || $doc->getNodeText("/feed/$_[0]");
        return $it ? $it->value() : undef;
    };

    if (! defined $ret->{weblog_name}) {
        $ret->{weblog_name} = $headthingtext->("title");
    }

    $ret->{name} = $headthingtext->("author/name");
    $ret->{website_url} = $headthingtext->("author/uri") || $headthingtext->("author/url");
    $ret->{email} = $headthingtext->("author/email");

    return $ret;
}

1;

-------------- next part --------------

package FOAF;

use base (Fetcher);
use strict;
use XML::FOAF;

sub get_profile_info {
    my ($self, $url) = @_;

    my $res = $self->_fetch($url);
    return undef unless $res->is_success();

    my $content = $res->content();
    my $doc;
    eval { $doc = new XML::FOAF(\$content, $url); };
    return undef if $@ || ! $doc;

    my $p = $doc->person();
    return undef unless $p;

    my $ret = {
        'name' => $p->name(),
        'nick' => $p->nick(),
        'mbox_sha1' => $p->mbox_sha1sum(),
        'website_url' => $p->homepage(),
        'weblog_url' => $p->weblog(),
        'photo' => $p->img(),
        'birthdate' => $p->dateOfBirth(),
    };

    if ($p->mbox() =~ /^mailto:(.*)$/) {
        $ret->{email} = $1;
    }

    return $ret;
}

1;
-------------- next part --------------

package Fetcher;

use LWP::UserAgent;
use HTTP::Request;

sub new {
    my ($class, $ua) = @_;

    my $self = {
        ua => $ua || new LWP::UserAgent(),
    };
    return bless($self, $class);
}

sub get_profile_info { return undef; }

sub _fetch {
    my ($self, $url) = @_;

    return $self->{ua}->get($url);
}

1;
-------------- next part --------------

use RSS;
use Atom;
use FOAF;
# use vCard;                  # Not supported yet.
use Data::Dumper;

my $rss = new RSS();
my $atom = new Atom();
my $foaf = new FOAF();
# my $vcard = new vCard();    # Not supported yet.

my @rss_tests = (
    "http://norman.walsh.name/rss/index.rss",
    "http://frank.livejournal.com/data/rss",
    "http://www.intertwingly.net/blog/index.rss",
    "http://www.intertwingly.net/blog/index.rdf",
    "http://www.intertwingly.net/blog/index.rss11",
    "http://www.intertwingly.net/blog/index.rss2",
);

foreach (@rss_tests) {
    print "******* $_ as RSS *******\n";
    print Data::Dumper::Dumper($rss->get_profile_info($_));
}

my @atom_tests = (
    "http://norman.walsh.name/atom/whatsnew.xml",
    "http://frank.livejournal.com/data/atom",
    "http://www.decafbad.com/blog/atom.xml",
);

foreach (@atom_tests) {
    print "******* $_ as Atom *******\n";
    print Data::Dumper::Dumper($atom->get_profile_info($_));
}

my @foaf_tests = (
    "http://frank.livejournal.com/data/foaf",
    "http://rdfweb.org/people/danbri/rdfweb/webwho.xrdf",
    "http://www.ideaspace.net/users/wkearney/foaf.xrdf",
    # "http://crschmidt.net/foaf.rdf",  # Down?
);

foreach (@foaf_tests) {
    print "******* $_ as FOAF *******\n";
    print Data::Dumper::Dumper($foaf->get_profile_info($_));
}

1;