I resolved my problem who was that with google sheet function calling myImportXml() answers aborted when the depth of xpath was over 2 .
The reason was the use of global variable for my array nextNodes[]
I replace it by adding an array return et send, between the recursives calls of the function. So my script no longer needs global variables end become the code below
I forgot to tell why i have recreated the existing google sheet function IMPORTXML !
IMPORTXML don't process with HTML page that don't validating XML enough.
So with my script we can operate modification on the xml document before parsing it and process the xPath research.
function myImportXml(url,path){
if (url==null) url="http://api.allocine.fr/rest/v3/movie?media=mp4-lc&partner=YW5kcm9pZC12Mg&profile=large&version=2&code=265621"; // to test
if (path==null) path="//nationality"; // to test
var xmlDocument=XmlService.parse(UrlFetchApp.fetch(url).getContentText());
var root=xmlDocument.getRootElement();
var mynamespace=root.getNamespace();
var items=xPathThrowDepthXml(path, root,mynamespace);
return items;
}
function xPathThrowDepthXml(path, node,mynamespace,niveau,nextNodes,log) {
//https://stackoverflow.com/questions/48185348/google-script-xml-parsing-error-cannot-find-function/58177338#58177338
/* Retourne les texte et attribut selon le chemin xpath fourni, pour le noeud xml donné, avec son namespace
TOUT ce qui est géré fonctionne ! :)
tag[2], tag[@attrib] , rating[@note]/@note
Sauts d'éléments, même dès la racine,
/tag1/tag2 ou //tag1/tag2 ou ///tag1/tag2 sont equivalents
Syntaxes xpath comprises: tag[@class="meta-body-item"] , tag[contains(text(),"Nationalité")]/span
un ou plusieurs points pour remonter dans la hierarchie (tag1/.../title tag1/./././title sont équivalents )
Ex de requette complexe qui est comprise: div[@class="meta-body-item"]/span[@class="light"][contains(text(),"Box Office")]/../span[2]
Plus de variable ni tableau à déclarer globalement. Ceux ci sont maintenant envoyés récursivement et bidirectionnelement
Ainsi le script répond à un appel dune function de feuille de calcul
*/
var nextNodesDebug,NodNamDebug,firstChild,nodeValues,tagName,paths,remainingPath
var nextNodesCopi=nextNodes;var textWanted=null;
if (niveau==null){
niveau=-1;
nextNodes=[];
}
niveau++;
if (log==3) Logger.log(niveau);
nextNodes.push([]);
if (Array.isArray(node)) { // if node is an array, return the result for each entry - This does not seem to occur !
return node.map(function(path,singleNode,mynamespace,niveau) {
return xPathThrowDepthXml(path, singleNode,mynamespace,niveau,nextNodes,log);
});
}
if (!node) {
return;
}
paths = path.replace(/\/{3,}/g, "//").replace(/^\/{2,}/, "/").split('/');
firstChild= paths [0];
var firstChildDebug=firstChild;
remainingPath = paths .slice(1).join('/');
var indexMatch = firstChild.match(/(\w+)\[(\d+)\]/); //tag[2] if child ends with [\d] - find a list, return this index
var attributeMatch = firstChild.match(/^@(\w+)/); // /@attribu - to give the value of a named attribut
var searchAttributeMatch = firstChild.match(/(\w+)\[@(\w+)\]/); // tag[@attribu] - to give element having a named attribut
var searchAttributeEgalMatch = firstChild.match(/(\w+)\[@(\w+)=['"](.*)["']\]/); // tag[@attribu] - to give element having a named attribut and a wanted value
var searchTextContainsMatch = firstChild.match(/(\w+)\[contains\(text\(\),['"](.*)['"]\)\]/); // tag[contains(text(),'Affiche')] - to give element having a wanted text
if (indexMatch) {
tagName = indexMatch[1];
var index = indexMatch[2];
var children = node.getChildren(tagName,mynamespace);
nextNodesDebug=children[index];NodNamDebug=nextNodesDebug.getName();
nextNodes[niveau].push(nextNodesDebug);
} else if (firstChild === '') { // skiping element cases like newsList//title
// if another name is next, use as a matching tag (and remove from path)
tagName = '';
if (paths.length > 1) {
tagName = paths[1];
remainingPath = paths.slice(2).join('/');
}
if (tagName.match(/(.*)\[/)) tagName=tagName.match(/(.*)\[/)[1];
nextNodesDebug=node.getDescendants();
for (var itag in nextNodesDebug){
try{ var nodeName=nextNodesDebug[itag].getName();}catch(er){var nodeName="";}
if (nodeName==tagName) {
nextNodes[niveau].push(nextNodesDebug[itag]);
}
}
} else if (searchAttributeMatch) {// search the tagNAme element having a wanted atribut named attribNameWanted
tagName = searchAttributeMatch[1];
var attribNameWanted = searchAttributeMatch[2];
var children= node.getChildren(tagName,mynamespace);
for (var itag in children){
if (children[itag].getName()==tagName){
var nodeName=children[itag].getName();
var attribs = children[itag].getAttributes();
for (var iAtrib in attribs){
if (attribs[iAtrib].getName()==attribNameWanted){ // here we have a tagNAme element having a wanted atribut named attribNameWanted
nextNodesDebug=children[itag];NodNamDebug=nextNodesDebug.getName();
nextNodes[niveau].push(nextNodesDebug);//Logger.log("N"+nextNodes[niveau]);
}
}
}
}
} else if (searchAttributeEgalMatch) {// search the tagNAme element having a wanted attribut named attribNameWanted that have the value valueAttribWanted
tagName = searchAttributeEgalMatch[1];
var attribNameWanted = searchAttributeEgalMatch[2];
var valueAttribWanted = searchAttributeEgalMatch[3];
var children= node.getChildren(tagName,mynamespace);
for (var itag in children){
if (children[itag].getName()==tagName){
var nodeName=children[itag].getName();
var attribs = children[itag].getAttributes();
for (var iAtrib in attribs){
if (attribs[iAtrib].getName()==attribNameWanted){ // here we have a tagNAme element having a wanted attribut named attribNameWanted that have the value valueAttribWanted
var atval=attribs[iAtrib].getValue();
if(attribs[iAtrib].getValue()==valueAttribWanted){
nextNodesDebug=children[itag];NodNamDebug=nextNodesDebug.getName();
nextNodes[niveau].push(nextNodesDebug);//Logger.log("N"+nextNodes[niveau]);
}
}
}
}
}
} else if (attributeMatch) {
// @ means attribute
var parent=node.getParentElement().getName();
var attributeName = attributeMatch[1];
//Logger.log("niv"+niveau);Logger.log("nod"+node.getDescendants())
var nodeValuesDebug=""
try{ nodeValuesDebug=node.getAttribute(attributeName).getValue();}catch(er){}
nodeValues=nodeValuesDebug;
} else if ( firstChild.split(".").length-1==firstChild.length) { // firsChild=="." or ".." or ..... -> go up one or more levels
var parent=node
for (var n=0;n<firstChild.length;n++){
parent=parent.getParentElement();
}
nextNodes[niveau].push(parent);
} else {
// simple child to search - But we must try to find children
textWanted=null;
if (searchTextContainsMatch){ // if we must finding element having a wanted text
firstChild=searchTextContainsMatch[1];
textWanted=searchTextContainsMatch[2];
}
var children= node.getChildren(firstChild,mynamespace);
if (children.length!=0){
for (var itag in children){
if (children[itag].getName()==firstChild){ // &&itag<3 limitation can be added here for degug purposes
nextNodesDebug=children[itag];NodNamDebug=nextNodesDebug.getName();
if (log==3) Logger.log(niveau+"nextNodpush"+itag+NodNamDebug);
if (textWanted==null){
nextNodes[niveau].push(nextNodesDebug);//Logger.log(niveau+"->Nodes"+nextNodes[niveau]);
}
if (children[itag].getText()==textWanted){
nextNodes[niveau].push(nextNodesDebug);//Logger.log(niveau+"->Nodes"+nextNodes[niveau]);
}
}
}
}
}
var result=[];
if (nodeValues) {
result.push(nodeValues);
}
else{
if (log==3) Logger.log(niveau+"for nextNode"+nextNodes[niveau].length);
for (var inextNode in nextNodes[niveau]){
if (remainingPath !== '') {
var mem=xPathThrowDepthXml(remainingPath, nextNodes[niveau][inextNode],mynamespace,niveau,nextNodes,log)
nextNodes=mem[1]
if (log==3) Logger.log(niveau+"Result.PUSH MEM"+mem[0]);
if (mem!="") result.push(mem[0]);
} else {
var inextNodeTextDebug=""
if (nextNodes[niveau][inextNode]!=null) {
inextNodeTextDebug=nextNodes[niveau][inextNode].getText && nextNodes[niveau][inextNode].getText();
if (inextNodeTextDebug!=null)result.push(inextNodeTextDebug);
if (log==3) Logger.log(niveau+"Result.pushTEXT="+inextNodeTextDebug);
}
}
}
nextNodes[niveau].pop();
}
if (niveau==0){return result
} else {
return [result,nextNodes]
}
}