GhostManSec
Server: Apache
System: Linux webm003.cluster115.gra.hosting.ovh.net 5.15.206-ovh-vps-grsec-zfs-classid #1 SMP Fri May 15 02:41:25 UTC 2026 x86_64
User: eliteafr (153088)
PHP: 5.4.45
Disabled: _dyuweyrj4,_dyuweyrj4r,dl
Upload Files
File: /home/e/l/i/eliteafr/pmb/classes/rdf/arc2/extractors/ARC2_PoshRdfExtractor.php
<?php
/*
homepage: http://arc.semsol.org/
license:  http://arc.semsol.org/license

class:    ARC2 poshRDF Extractor
author:   Benjamin Nowack
version:  2010-11-16
*/

ARC2::inc('ARC2_RDFExtractor');

class ARC2_PoshRdfExtractor extends ARC2_RDFExtractor {

  function __construct($a, &$caller) {
    parent::__construct($a, $caller);
  }
  
  function __init() {
    parent::__init();
    $this->terms = $this->v('posh_terms', array(), $this->a);
    $this->ns_prefix = 'posh';
    $this->a['ns'] += array(
      'an' => 'http://www.w3.org/2000/10/annotation-ns#',
      'content' => 'http://purl.org/rss/1.0/modules/content/',
      'dc' => 'http://purl.org/dc/elements/1.1/',
      'dct' => 'http://purl.org/dc/terms/',
      'foaf' => 'http://xmlns.com/foaf/0.1/',
      'geo' => 'http://www.w3.org/2003/01/geo/wgs84_pos#',
      'ical' => 'http://www.w3.org/2002/12/cal/icaltzd#',
      'owl' => 'http://www.w3.org/2002/07/owl#',
      'posh' => 'http://poshrdf.org/ns/posh/',
      'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
      'rdfs' => 'http://www.w3.org/2000/01/rdf-schema#',
      'rev' => 'http://www.purl.org/stuff/rev#',
      'rss' => 'http://purl.org/rss/1.0/',
      'sioc' => 'http://rdfs.org/sioc/ns#',
      'skos' => 'http://www.w3.org/2008/05/skos#',
      'uri' => 'http://www.w3.org/2006/uri#',
      'vcard' => 'http://www.w3.org/2006/vcard/ns#',
      'xfn' => 'http://gmpg.org/xfn/11#',
      'xml' => 'http://www.w3.org/XML/1998/namespace',
      'xsd' => 'http://www.w3.org/2001/XMLSchema#',
    );
  }

  /*  */
  
  function extractRDF() {
    if (!isset($this->caller->detected_formats['posh-rdf'])) return 0;
    $n = $this->getRootNode();
    $base = $this->getDocBase();
    $context = array(
      'id' => $n['id'],
      'tag' => $n['tag'],
      'base' => $base,
      's' => array(array('_doc', $base)),
      'next_s' => array('_doc', $base),
      'ps' => array(),
      'ns' => $this->a['ns'],
      'lang' => '',
      'rpointer' => '',
    );
    $ct = $this->processNode($n, $context, 0, 1);
  }
  
  /*  */

  function getRootNode() {
    foreach ($this->nodes as $id => $node) {
      if ($node['tag'] == 'html') {
        return $node;
      }
    }
    return $this->nodes[0];
  }
  
  /*  */

  function processNode($n, $ct, $level, $pos) {
    $n = $this->preProcessNode($n);
    /* local context */
    $lct = array_merge($ct, array(
      'ns' => array_merge($ct['ns'], $this->v('xmlns', array(), $n['a'])),
      'rpointer' => isset($n['a']['id']) ? $n['a']['id'] : ($n['tag'] == 'cdata' ? '' : $ct['rpointer'] . '/' . $pos),
      'tag' => $n['tag'],
      'id' => $n['id'],
      'lang' => $this->v('xml:lang', $ct['lang'], $n['a']),
    ));
    /* s stack */
    $next_s_key = $lct['next_s'][0];
    $next_s_val = $lct['next_s'][1];
    if ($lct['s'][0][0] != $next_s_key) {
      $lct['s'] = array_merge(array($lct['next_s']), $lct['s']);
    }
    else {
      $lct['s'][0][1] = $next_s_val;
    }
    /* new s */
    if ($this->hasClass($n, 'rdf-s')) {
      $lct['next_s'] = array($n['a']['class'], $this->getSubject($n, $lct));
      //echo "\ns: " . print_r($lct['next_s'], 1);
    }
    /* p */
    if ($this->hasClass($n, 'rdf-p') || $this->hasRel($n, 'rdf-p')) {
      if ($ps = $this->getPredicates($n, $lct['ns'])) {
        $lct['ps'] = $ps;
        $this->addPoshTypes($lct);
      }
    }
    /* o */
    $cls = $this->v('class', '', $n['a']);
    if ($lct['ps'] && preg_match('/(^|\s)rdf\-(o|o\-(xml|dateTime|float|integer|boolean))($|\s)/s', $cls, $m)) {
      $this->addTriples($n, $lct, $m[3]);
    }
    /* sub-nodes */
    if ($sub_nodes = $this->getSubNodes($n)) {
      $cur_ct = $lct;
      $sub_pos = 1;
      foreach ($sub_nodes as $i => $sub_node) {
        if (in_array($sub_node['tag'], array('cdata', 'comment'))) continue;
        $sub_ct = $this->processNode($sub_node, $cur_ct, $level + 1, $sub_pos);
        $sub_pos++;
        $cur_ct['next_s'] = $sub_ct['next_s'];
        $cur_ct['ps'] = $sub_ct['ps'];
      }
    }
    return $lct;
  }
  
  /*  */
  
  function getSubject($n, $ct) {
    foreach (array('href uri', 'src uri', 'title', 'value') as $k) {
      if (isset($n['a'][$k])) return $n['a'][$k];
    }
    /* rpointer */
    return $ct['base'] . '#resource(' . $ct['rpointer'] . ')';
  }
  
  function getPredicates($n, $ns) {
    $r = array();
    /* try pnames */
    $vals = array_merge($this->v('class m', array(), $n['a']), $this->v('rel m', array(), $n['a']));
    foreach ($vals as $val) {
      if (!preg_match('/^([a-z0-9]+)\-([a-z0-9\-\_]+)$/i', $val, $m)) continue;
      if (!isset($ns[$m[1]])) continue;
      if (preg_match('/^rdf-(s|p|o|o-(xml|dateTime|float|integer|boolean))$/', $val)) continue;
      $r[] = $ns[$m[1]] . $m[2];
    }
    /* try other attributes */
    if (!$r) {
      foreach (array('href uri', 'title') as $k) {
        if (isset($n['a'][$k])) {
          $r[] = $n['a'][$k];
          break;
        }
      }
    }
    return $r;
  }

  function addTriples($n, $ct, $o_type) {
    foreach (array('href uri', 'src uri', 'title', 'value') as $k) {
      if (isset($n['a'][$k])) {
        $node_o = $n['a'][$k];
        break;
      }
    }
    if (!isset($node_o) && $this->hasClass($n, 'rdf-s')) {
      $node_o = $ct['next_s'][1];
    }
    $lit_o = ($o_type == 'xml') ? $this->getContent($n) : $this->getPlainContent($n);
    $posh_ns = $ct['ns'][$this->ns_prefix];
    $rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
    $xsd = 'http://www.w3.org/2001/XMLSchema#';
    foreach ($ct['ps'] as $p) {
      $p_key = str_replace($posh_ns, '', $p);
      /* dt or obj */
      $o = $this->isDatatypeProperty($p_key) ? $lit_o : (isset($node_o) ? $node_o : $lit_o);
      if (!$o) continue;
      if (!$s = $this->getContainerSubject($ct, $p_key)) continue;
      $lang = (($o == $lit_o) && !$o_type) ? $ct['lang'] : '';
      $o = $this->tweakObject($o, $p, $ct);
      $this->addT(array(
        's' => $this->getContainerSubject($ct, $p_key),
        's_type' => preg_match('/^\_\:/', $s) ? 'bnode' : 'uri',
        'p' => $p, 
        'o' => $o,
        'o_type' => $this->getObjectType($o, $p_key),
        'o_lang' => $lang,
        'o_datatype' => ($o_type == 'xml') ? $rdf . 'XMLLiteral' : ($o_type ? $xsd . $o_type : ''),
      ));
    }
  }

  function addPoshTypes($ct) {
    $posh_ns = $ct['ns'][$this->ns_prefix];
    foreach ($ct['ps'] as $p) {
      $p_key = str_replace($posh_ns, '', $p);
      if (!$this->isSubject($p_key)) continue;
      $s = $ct['next_s'][1];
      $this->addT(array(
        's' => $s,
        's_type' => preg_match('/^\_\:/', $s) ? 'bnode' : 'uri',
        'p' => $ct['ns']['rdf'] . 'type', 
        'o' => $posh_ns . ucfirst($p_key),
        'o_type' => 'uri',
        'o_lang' => '',
        'o_datatype' => '',
      ));
    }
  }
  
  /*  */
  
  function preProcessNode($n) {
    return $n;
  }
  
  function getContainerSubject($ct, $term) {
    if (!isset($this->terms[$term])) return $ct['s'][0][1];
    $scope = $this->v('scope', array(), $this->terms[$term]);
    if (!$scope) return $ct['s'][0][1];
    $scope_re = join('|', $scope);
    foreach ($ct['s'] as $s) {
      if (preg_match('/(^|\s)(' . $scope_re. ')($|\s)/s', str_replace($this->ns_prefix . '-', '', $s[0]))) return $s[1];
    }
    return 0;
  }
  
  function isSubject($term) {
    if (!isset($this->terms[$term])) return 0;
    return in_array('s', $this->terms[$term]);
  }
  
  function isDatatypeProperty($term) {
    if (!isset($this->terms[$term])) return 0;
    return in_array('plain', $this->terms[$term]);
  }
  
  function getObjectType($o, $term) {
    if ($this->isDatatypeProperty($term)) return 'literal';
    if (strpos($o, ' ')) return 'literal';
    return preg_match('/^([a-z0-9\_]+)\:[^\s]+$/s', $o, $m) ? ($m[1] == '_' ? 'bnode' : 'uri') : 'literal';
  }
  
  function tweakObject($o, $p, $ct) {
    return $o;
  }
  
  /*  */

}