Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
F
feeds_xpathparser
Manage
Activity
Members
Labels
Plan
Wiki
Custom issue tracker
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Model registry
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
project
feeds_xpathparser
Commits
4813c4b0
Commit
4813c4b0
authored
14 years ago
by
Chris Leppanen
Browse files
Options
Downloads
Patches
Plain Diff
Added default and debugging support
parent
8a2d437d
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
FeedsXPathParser.inc
+193
-94
193 additions, 94 deletions
FeedsXPathParser.inc
with
193 additions
and
94 deletions
FeedsXPathParser.inc
+
193
−
94
View file @
4813c4b0
...
...
@@ -21,7 +21,14 @@ class FeedsXPathParserBase extends FeedsParser {
*/
public
function
parse
(
FeedsSource
$source
,
FeedsFetcherResult
$fetcher_result
)
{
$this
->
source_config
=
$source
->
getConfigFor
(
$this
);
$state
=
$source
->
state
(
FEEDS_PARSE
);
$mappings
=
feeds_importer
(
$this
->
id
)
->
processor
->
config
[
'mappings'
];
$this
->
mappings
=
array
();
foreach
(
$mappings
as
$mapping
)
{
if
(
strpos
(
$mapping
[
'source'
],
'xpathparser:'
)
===
0
)
{
$this
->
mappings
[
$mapping
[
'source'
]]
=
$mapping
[
'target'
];
}
}
if
(
isset
(
$this
->
source_config
[
'rawXML'
])
&&
is_array
(
$this
->
source_config
[
'rawXML'
]))
{
$this
->
rawXML
=
array_keys
(
array_filter
(
$this
->
source_config
[
'rawXML'
]));
}
...
...
@@ -36,6 +43,7 @@ class FeedsXPathParserBase extends FeedsParser {
*
* @param $xml
* A SimpleXMLElement object.
*
* @return array
* Returns a structured array suitable for adding to a batch object with
* $batch->setItems().
...
...
@@ -49,9 +57,14 @@ class FeedsXPathParserBase extends FeedsParser {
unset
(
$xml
);
$parsed_items
=
array
();
foreach
(
$all_items
as
$item
)
{
$parsed_item
=
array
();
$parsed_item
=
$variables
=
array
();
foreach
(
$this
->
source_config
[
'sources'
]
as
$source
=>
$query
)
{
$parsed_item
[
$source
]
=
$this
->
parseSourceElement
(
$item
,
$query
,
$source
);
$query
=
strtr
(
$query
,
$variables
);
$result
=
$this
->
parseSourceElement
(
$item
,
$query
,
$source
);
if
(
!
is_array
(
$result
))
{
$variables
[
'$'
.
$this
->
mappings
[
$source
]]
=
$result
;
}
$parsed_item
[
$source
]
=
$result
;
}
$parsed_items
[]
=
$parsed_item
;
}
...
...
@@ -63,8 +76,10 @@ class FeedsXPathParserBase extends FeedsParser {
*
* @param $xml
* The XML element to execute the query on.
*
* @param $query
* An XPath query.
*
* @return array
* An array containing the results of the query.
*/
...
...
@@ -87,31 +102,20 @@ class FeedsXPathParserBase extends FeedsParser {
$xml
->
registerXPathNamespace
(
$prefix
,
$namespace
);
}
}
/**
* Here we set libxml_use_internal_errors to TRUE because depending on the
* libxml version, $xml->xpath() might return FALSE or an empty array() when
* a query doesn't match.
*/
$use_errors
=
libxml_use_internal_errors
(
TRUE
);
// Perfom xpath query.
$results
=
$xml
->
xpath
(
$query
);
$error
=
libxml_get_last_error
();
libxml_clear_errors
();
libxml_use_internal_errors
(
$use_errors
);
list
(
$results
,
$error
)
=
$this
->
_query
(
$xml
,
$query
);
if
(
is_object
(
$error
)
&&
$error
->
level
==
LIBXML_ERR_ERROR
)
{
$orig_query
=
array_search
(
$query
,
$this
->
modified_queries
);
// If we didn't modify the query then it won't be in modified_queries.
$orig_query
=
$orig_query
?:
$query
;
$orig_query
=
$orig_query
?
$orig_query
:
$query
;
if
(
$this
->
source_config
[
'exp'
][
'errors'
])
{
drupal_set_message
(
t
(
"There was an error with the XPath query: %query.<br>
Libxml returned the message: %message, with the error code: %code."
,
array
(
'%query'
=>
$orig_query
,
array
(
'%query'
=>
$orig_query
,
'%message'
=>
trim
(
$error
->
message
),
'%code'
=>
$error
->
code
)),
'%code'
=>
$error
->
code
)),
'error'
,
FALSE
);
}
...
...
@@ -135,11 +139,10 @@ class FeedsXPathParserBase extends FeedsParser {
/**
* Normalizes XPath queries, adding the default namespace.
*
*/
private
function
addDefaultNamespace
(
$query
)
{
$
qu
er
y
=
feeds_xpathparser_parse_xpath
(
$query
);
return
$
q
uery
;
$
pars
er
=
new
FeedsXPathQueryParser
(
$query
);
return
$
parser
->
getQ
uery
()
;
}
/**
...
...
@@ -147,10 +150,13 @@ class FeedsXPathParserBase extends FeedsParser {
*
* @param $item
* A SimpleXMLElement from the context array.
*
* @param $query
* An XPath query.
*
* @param $source
* The name of the source for this query.
*
* @return array
* An array containing the results of the query.
*/
...
...
@@ -193,10 +199,10 @@ class FeedsXPathParserBase extends FeedsParser {
public
function
sourceForm
(
$source_config
)
{
$form
=
array
();
$form
[
'#weight'
]
=
-
10
;
$form
[
'#tree'
]
=
TRUE
;
$mappings_
=
feeds_importer
(
$this
->
id
)
->
processor
->
config
[
'mappings'
];
$uniques
=
$mappings
=
array
();
foreach
(
$mappings_
as
$mapping
)
{
if
(
strpos
(
$mapping
[
'source'
],
'xpathparser:'
)
===
0
)
{
$mappings
[
$mapping
[
'source'
]]
=
$mapping
[
'target'
];
...
...
@@ -207,7 +213,7 @@ class FeedsXPathParserBase extends FeedsParser {
}
if
(
empty
(
$mappings
))
{
$form
[
'error_message'
][
'#value'
]
=
'FeedsXPathParser: No mappings
we
re defined.'
;
$form
[
'error_message'
][
'#value'
]
=
'FeedsXPathParser: No mappings
a
re defined.
<br>
'
;
return
$form
;
}
...
...
@@ -235,6 +241,7 @@ class FeedsXPathParserBase extends FeedsParser {
$form
[
'sources'
][
'help'
][
'#value'
]
=
'<div class="help">'
.
theme
(
'item_list'
,
$items
)
.
'</div>'
;
}
$variables
=
array
();
foreach
(
$mappings
as
$source
=>
$target
)
{
$form
[
'sources'
][
$source
]
=
array
(
'#type'
=>
'textfield'
,
...
...
@@ -243,6 +250,10 @@ class FeedsXPathParserBase extends FeedsParser {
'#default_value'
=>
isset
(
$source_config
[
'sources'
][
$source
])
?
$source_config
[
'sources'
][
$source
]
:
''
,
'#maxlength'
=>
1024
,
);
if
(
!
empty
(
$variables
))
{
$form
[
'sources'
][
$source
][
'#description'
]
.
=
'<br>'
.
t
(
'The variables '
.
implode
(
', '
,
$variables
)
.
' are availliable for replacement.'
);
}
$variables
[]
=
'$'
.
$target
;
}
$form
[
'rawXML'
]
=
array
(
...
...
@@ -256,7 +267,7 @@ class FeedsXPathParserBase extends FeedsParser {
'#type'
=>
'fieldset'
,
'#collapsible'
=>
TRUE
,
'#collapsed'
=>
TRUE
,
'#title'
=>
'XPath Options'
,
'#title'
=>
'XPath
Parser
Options'
,
);
$form
[
'exp'
][
'errors'
]
=
array
(
...
...
@@ -270,10 +281,11 @@ class FeedsXPathParserBase extends FeedsParser {
'#type'
=>
'checkbox'
,
'#title'
=>
t
(
'Use Tidy'
),
'#description'
=>
t
(
'The Tidy PHP extension has been detected.
Slect this to clean the markup before parsing.'
),
S
e
lect this to clean the markup before parsing.'
),
'#default_value'
=>
isset
(
$source_config
[
'exp'
][
'tidy'
])
?
$source_config
[
'exp'
][
'tidy'
]
:
FALSE
,
);
}
$form
[
'exp'
][
'debug'
]
=
array
(
'#type'
=>
'checkboxes'
,
'#title'
=>
t
(
'Debug Query'
),
...
...
@@ -285,29 +297,33 @@ class FeedsXPathParserBase extends FeedsParser {
}
/**
* Override parent::getMappingSources().
*/
public
function
getMappingSources
()
{
return
array
(
'xpathparser:0'
=>
array
(
'name'
=>
t
(
'XPath Expression'
),
'description'
=>
t
(
'Allows you to configure an XPath expression that will populate this field.'
),
),
)
+
parent
::
getMappingSources
();
* Override parent::configForm().
*/
public
function
configForm
(
&
$form_state
)
{
$form
=
$this
->
sourceForm
(
$this
->
config
);
$form
[
'context'
][
'#required'
]
=
FALSE
;
return
$form
;
}
/**
* Define defaults.
*/
public
function
sourceDefaults
()
{
return
$this
->
config
;
}
/**
* Define defaults.
*/
public
function
configDefaults
()
{
return
array
(
'sources'
=>
array
(),
'rawXML'
=>
array
(),
'context'
=>
''
,
'exp'
=>
array
(
'exp'
=>
array
(
'errors'
=>
FALSE
,
'tidy'
=>
FALSE
,
'debug'
=>
array
(),
'debug'
=>
array
(),
),
);
}
...
...
@@ -318,14 +334,61 @@ class FeedsXPathParserBase extends FeedsParser {
* Simply trims all XPath values from the form. That way when testing them
* later we can be sure that there aren't any strings with spaces in them.
*
* @todo
* validate xpath queries?
*
* @param &$values
* The values from the form to validate, passed by reference.
*/
public
function
sourceFormValidate
(
&
$values
)
{
$values
[
'context'
]
=
trim
(
$values
[
'context'
]);
foreach
(
$values
[
'sources'
]
as
&
$query
)
{
$query
=
trim
(
$query
);
$query
=
trim
(
$query
);
}
}
/**
* Override parent::sourceFormValidate().
*/
public
function
configFormValidate
(
&
$values
)
{
$this
->
sourceFormValidate
(
$values
);
}
/**
* Here we set libxml_use_internal_errors to TRUE because depending on the
* libxml version, $xml->xpath() might return FALSE or an empty array() when
* a query doesn't match.
*/
private
function
_query
(
$xml
,
$query
)
{
$use_errors
=
libxml_use_internal_errors
(
TRUE
);
// Perfom xpath query.
$results
=
$xml
->
xpath
(
$query
);
$error
=
libxml_get_last_error
();
libxml_clear_errors
();
libxml_use_internal_errors
(
$use_errors
);
return
array
(
$results
,
$error
);
}
/**
* Override parent::getMappingSources().
*/
public
function
getMappingSources
()
{
$xpath_source
=
array
(
'xpathparser:0'
=>
array
(
'name'
=>
t
(
'XPath Expression'
),
'description'
=>
t
(
'Allows you to configure an XPath expression that will populate this field.'
),
),
);
$sources
=
parent
::
getMappingSources
();
// Older versions of Feeds return FALSE here.
if
(
is_array
(
$sources
))
{
return
$sources
+
$xpath_source
;
}
return
$xpath_source
;
}
}
...
...
@@ -347,7 +410,7 @@ class FeedsXPathParserHTML extends FeedsXPathParserBase {
/**
* This is currently unsupported.
*/
if
(
$this
->
source_config
[
'exp'
][
'tidy'
])
{
if
(
isset
(
$this
->
source_config
[
'exp'
][
'tidy'
])
&&
$this
->
source_config
[
'exp'
][
'tidy'
])
{
$config
=
array
(
'merge-divs'
=>
FALSE
,
'merge-spans'
=>
FALSE
,
...
...
@@ -432,86 +495,122 @@ function feeds_xpathparser_form_feeds_ui_mapping_form_alter(&$form, &$form_state
/**
* Pseudo-parser of XPath queries. When an XML document has a default
* namespace this gets called so that adding the __default__ namepace where
* appropriate. Aren't we nice
.
* appropriate. Aren't we nice
?
*
* @todo
* Make this into a class so that we can save state on the object.
* Write tests for this beasty.
* Cleanup.
* @param $query
* An xpath query string.
* @return string
* An xpath query string with the __default__ namespace added.
*/
function
feeds_xpathparser_parse_xpath
(
$query
)
{
$query
=
preg_replace
(
'/\s+\(\s*/'
,
'('
,
$query
);
$word_boundary
=
array
(
'['
,
']'
,
'='
,
'('
,
')'
,
'.'
,
'<'
,
'>'
,
'*'
,
'!'
,
'|'
,
'/'
,
','
,
' '
);
$in_quotes
=
FALSE
;
$quote_char
=
''
;
$word
=
''
;
$output
=
''
;
$prev_boundary
=
''
;
for
(
$i
=
0
;
$i
<
strlen
(
$query
);
$i
++
)
{
$c
=
$query
[
$i
];
if
(
$c
==
'"'
|
$c
==
"'"
)
{
if
(
$in_quotes
&&
$c
==
$quote_char
)
{
$in_quotes
=
FALSE
;
$word
.
=
$c
;
$output
.
=
$word
;
$word
=
''
;
class
FeedsXPathQueryParser
{
function
__construct
(
$query
)
{
$this
->
query
=
preg_replace
(
'/\s+\(\s*/'
,
'('
,
$query
);
$this
->
word_boundaries
=
array
(
'['
,
']'
,
'='
,
'('
,
')'
,
'.'
,
'<'
,
'>'
,
'*'
,
'!'
,
'|'
,
'/'
,
','
,
' '
,
':'
,
);
$this
->
in_quotes
=
FALSE
;
$this
->
quote_char
=
''
;
$this
->
word
=
''
;
$this
->
output
=
''
;
$this
->
prev_boundary
=
''
;
$this
->
axis
=
''
;
$this
->
skip_next_word
=
FALSE
;
$this
->
start
();
}
function
start
()
{
for
(
$i
=
0
;
$i
<
strlen
(
$this
->
query
);
$i
++
)
{
$this
->
i
=
$i
;
$c
=
$this
->
query
[
$i
];
if
(
$c
==
'"'
||
$c
==
"'"
)
{
$this
->
handle_quote
(
$c
);
continue
;
}
if
(
$this
->
in_quotes
)
{
$this
->
word
.
=
$c
;
continue
;
}
elseif
(
!
$in_quotes
)
{
$in_quotes
=
TRUE
;
$output
.
=
_feeds_xpathparser_handle_word
(
$word
);
$word
=
$c
;
$quote_char
=
$c
;
if
(
in_array
(
$c
,
$this
->
word_boundaries
))
{
$this
->
handle_word_boundary
(
$c
);
}
else
{
$word
.
=
$c
;
$
this
->
word
.
=
$c
;
}
continue
;
}
$this
->
handle_word
();
}
if
(
$in_quotes
)
{
$word
.
=
$c
;
continue
;
function
handle_quote
(
$c
)
{
if
(
$this
->
in_quotes
&&
$c
==
$this
->
quote_char
)
{
$this
->
in_quotes
=
FALSE
;
$this
->
word
.
=
$c
;
$this
->
output
.
=
$this
->
word
;
$this
->
word
=
''
;
}
elseif
(
!
$this
->
in_quotes
)
{
$this
->
in_quotes
=
TRUE
;
$this
->
handle_word
();
$this
->
word
=
$c
;
$this
->
quote_char
=
$c
;
}
else
{
$this
->
word
.
=
$c
;
}
}
if
(
in_array
(
$c
,
$word_boundary
))
{
if
(
in_array
(
$word
,
array
(
'div'
,
'or'
,
'and'
,
'mod'
))
&&
$prev_boundary
==
' '
)
{
$output
.
=
$word
;
}
else
{
$output
.
=
_feeds_xpathparser_handle_word
(
$word
,
$c
);
}
$output
.
=
$c
;
$word
=
''
;
$prev_boundary
=
$c
;
function
handle_word_boundary
(
$c
)
{
if
(
in_array
(
$this
->
word
,
array
(
'div'
,
'or'
,
'and'
,
'mod'
))
&&
$this
->
prev_boundary
==
' '
&&
$c
==
' '
)
{
$this
->
output
.
=
$this
->
word
;
}
else
{
$
word
.
=
$c
;
$
this
->
handle_word
(
$c
)
;
}
$this
->
output
.
=
$c
;
$this
->
word
=
''
;
$this
->
prev_boundary
=
$c
;
}
return
$output
.
_feeds_xpathparser_handle_word
(
$word
);
}
function
_feeds_xpathparser_handle_word
(
$word
,
$c
=
''
,
$axis
=
FALSE
)
{
function
handle_word
(
$c
=
''
)
{
if
(
$this
->
word
==
''
)
{
return
;
}
if
(
$c
==
':'
&&
$this
->
query
[
$this
->
i
+
1
]
==
':'
)
{
$this
->
axis
=
$this
->
word
;
}
if
(
$c
==
':'
&&
$this
->
query
[
$this
->
i
-
1
]
!=
':'
&&
$this
->
query
[
$this
->
i
+
1
]
!=
':'
)
{
$this
->
output
.
=
$this
->
word
;
$this
->
skip_next_word
=
TRUE
;
return
;
}
if
(
strlen
(
$word
)
===
0
)
{
return
''
;
}
if
(
$this
->
skip_next_word
)
{
$this
->
skip_next_word
=
FALSE
;
$this
->
output
.
=
$this
->
word
;
return
;
}
if
(
strpos
(
$word
,
'::'
)
!==
FALSE
)
{
$word
=
explode
(
'::'
,
$word
,
2
);
return
$word
[
0
]
.
'::'
.
_feeds_xpathparser_handle_word
(
$word
[
1
],
'::'
,
$word
[
0
]);
}
if
(
is_numeric
(
$this
->
word
)
||
$this
->
axis
==
'attribute'
||
strpos
(
$this
->
word
,
'@'
)
===
0
||
$c
==
'('
||
$c
==
':'
)
{
$this
->
output
.
=
$this
->
word
;
return
;
}
if
(
is_numeric
(
$word
)
||
$axis
==
'attribute'
||
strpos
(
$word
,
'@'
)
===
0
||
$c
==
'('
||
strpos
(
$word
,
':'
)
!==
FALSE
)
{
return
$word
;
$this
->
output
.
=
'__default__:'
.
$this
->
word
;
}
return
'__default__:'
.
$word
;
function
getQuery
()
{
return
$this
->
output
;
}
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment