/*! @license
* tXml
* Copyright 2015 Tobias Nickel
* SPDX-License-Identifier: MIT
*/
goog.provide('shaka.util.TXml');
goog.require('shaka.util.StringUtils');
goog.require('shaka.log');
/**
* This code is a modified version of the tXml library.
*
* @author: Tobias Nickel
* created: 06.04.2015
* https://github.com/TobiasNickel/tXml
*/
/**
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
shaka.util.TXml = class {
/**
* Parse some data
* @param {BufferSource} data
* @param {string=} expectedRootElemName
* @return {shaka.extern.xml.Node | null}
*/
static parseXml(data, expectedRootElemName) {
const xmlString = shaka.util.StringUtils.fromBytesAutoDetect(data);
return shaka.util.TXml.parseXmlString(xmlString, expectedRootElemName);
}
/**
* Parse some data
* @param {string} xmlString
* @param {string=} expectedRootElemName
* @return {shaka.extern.xml.Node | null}
*/
static parseXmlString(xmlString, expectedRootElemName) {
const result = shaka.util.TXml.parse(xmlString);
if (!expectedRootElemName && result.length) {
return result[0];
}
const rootNode = result.find((n) => n.tagName === expectedRootElemName);
if (rootNode) {
return rootNode;
}
shaka.log.error('parseXml root element not found!');
return null;
}
/**
* Parse some data
* @param {string} schema
* @return {string}
*/
static getKnownNameSpace(schema) {
if (shaka.util.TXml.knownNameSpaces_.has(schema)) {
return shaka.util.TXml.knownNameSpaces_.get(schema);
}
return '';
}
/**
* Parse some data
* @param {string} schema
* @param {string} NS
*/
static setKnownNameSpace(schema, NS) {
shaka.util.TXml.knownNameSpaces_.set(schema, NS);
}
/**
* parseXML / html into a DOM Object,
* with no validation and some failure tolerance
* @param {string} S your XML to parse
* @return {Array.<shaka.extern.xml.Node>}
*/
static parse(S) {
let pos = 0;
const openBracket = '<';
const openBracketCC = '<'.charCodeAt(0);
const closeBracket = '>';
const closeBracketCC = '>'.charCodeAt(0);
const minusCC = '-'.charCodeAt(0);
const slashCC = '/'.charCodeAt(0);
const exclamationCC = '!'.charCodeAt(0);
const singleQuoteCC = '\''.charCodeAt(0);
const doubleQuoteCC = '"'.charCodeAt(0);
const openCornerBracketCC = '['.charCodeAt(0);
/**
* parsing a list of entries
*/
function parseChildren(tagName, preserveSpace = false) {
/** @type {Array.<shaka.extern.xml.Node | string>} */
const children = [];
while (S[pos]) {
if (S.charCodeAt(pos) == openBracketCC) {
if (S.charCodeAt(pos + 1) === slashCC) {
const closeStart = pos + 2;
pos = S.indexOf(closeBracket, pos);
const closeTag = S.substring(closeStart, pos);
let indexOfCloseTag = closeTag.indexOf(tagName);
if (indexOfCloseTag == -1) {
// handle VTT closing tags like <c.lime></c>
const indexOfPeriod = tagName.indexOf('.');
if (indexOfPeriod > 0) {
const shortTag = tagName.substring(0, indexOfPeriod);
indexOfCloseTag = closeTag.indexOf(shortTag);
}
}
// eslint-disable-next-line no-restricted-syntax
if (indexOfCloseTag == -1) {
const parsedText = S.substring(0, pos).split('\n');
throw new Error(
'Unexpected close tag\nLine: ' + (parsedText.length - 1) +
'\nColumn: ' +
(parsedText[parsedText.length - 1].length + 1) +
'\nChar: ' + S[pos],
);
}
if (pos + 1) {
pos += 1;
}
return children;
} else if (S.charCodeAt(pos + 1) === exclamationCC) {
if (S.charCodeAt(pos + 2) == minusCC) {
while (pos !== -1 && !(S.charCodeAt(pos) === closeBracketCC &&
S.charCodeAt(pos - 1) == minusCC &&
S.charCodeAt(pos - 2) == minusCC &&
pos != -1)) {
pos = S.indexOf(closeBracket, pos + 1);
}
if (pos === -1) {
pos = S.length;
}
} else if (
S.charCodeAt(pos + 2) === openCornerBracketCC &&
S.charCodeAt(pos + 8) === openCornerBracketCC &&
S.substr(pos + 3, 5).toLowerCase() === 'cdata'
) {
// cdata
const cdataEndIndex = S.indexOf(']]>', pos);
if (cdataEndIndex == -1) {
children.push(S.substr(pos + 9));
pos = S.length;
} else {
children.push(S.substring(pos + 9, cdataEndIndex));
pos = cdataEndIndex + 3;
}
continue;
}
pos++;
continue;
}
const node = parseNode(preserveSpace);
children.push(node);
if (typeof node === 'string') {
return children;
}
if (node.tagName[0] === '?' && node.children) {
children.push(...node.children);
node.children = [];
}
} else {
const text = parseText();
if (preserveSpace) {
if (text.length > 0) {
children.push(text);
}
} else if (children.length &&
text.length == 1 && text[0] == '\n') {
children.push(text);
} else {
const trimmed = text.trim();
if (trimmed.length > 0) {
children.push(text);
}
}
pos++;
}
}
return children;
}
/**
* returns the text outside of texts until the first '<'
*/
function parseText() {
const start = pos;
pos = S.indexOf(openBracket, pos) - 1;
if (pos === -2) {
pos = S.length;
}
return S.slice(start, pos + 1);
}
/**
* returns text until the first nonAlphabetic letter
*/
const nameSpacer = '\r\n\t>/= ';
/**
* Parse text in current context
* @return {string}
*/
function parseName() {
const start = pos;
while (nameSpacer.indexOf(S[pos]) === -1 && S[pos]) {
pos++;
}
return S.slice(start, pos);
}
/**
* Parse text in current context
* @param {boolean} preserveSpace Preserve the space between nodes
* @return {shaka.extern.xml.Node | string}
*/
function parseNode(preserveSpace) {
pos++;
const tagName = parseName();
const attributes = {};
let children = [];
// parsing attributes
while (S.charCodeAt(pos) !== closeBracketCC && S[pos]) {
const c = S.charCodeAt(pos);
// abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
if ((c > 64 && c < 91) || (c > 96 && c < 123)) {
const name = parseName();
// search beginning of the string
let code = S.charCodeAt(pos);
while (code && code !== singleQuoteCC && code !== doubleQuoteCC &&
!((code > 64 && code < 91) || (code > 96 && code < 123)) &&
code !== closeBracketCC) {
pos++;
code = S.charCodeAt(pos);
}
let value = parseString();
if (code === singleQuoteCC || code === doubleQuoteCC) {
if (pos === -1) {
/** @type {shaka.extern.xml.Node} */
const node = {
tagName,
attributes,
children,
parent: null,
};
for (let i = 0; i < children.length; i++) {
if (typeof children[i] !== 'string') {
children[i].parent = node;
}
}
return node;
}
} else {
value = null;
pos--;
}
if (name.startsWith('xmlns:')) {
const segs = name.split(':');
shaka.util.TXml.setKnownNameSpace(
/** @type {string} */ (value), segs[1]);
}
if (tagName === 'tt' &&
name === 'xml:space' &&
value === 'preserve') {
preserveSpace = true;
}
attributes[name] = value;
}
pos++;
}
if (S.charCodeAt(pos - 1) !== slashCC) {
pos++;
const contents = parseChildren(tagName, preserveSpace);
children = contents;
} else {
pos++;
}
/** @type {shaka.extern.xml.Node} */
const node = {
tagName,
attributes,
children,
parent: null,
};
const childrenLength = children.length;
for (let i = 0; i < childrenLength; i++) {
const childrenValue = children[i];
if (typeof childrenValue !== 'string') {
childrenValue.parent = node;
} else if (i == childrenLength - 1 && childrenValue == '\n') {
children.pop();
}
}
return node;
}
/**
* Parse string in current context
* @return {string}
*/
function parseString() {
const startChar = S[pos];
const startpos = pos + 1;
pos = S.indexOf(startChar, startpos);
return S.slice(startpos, pos);
}
return parseChildren('');
}
/**
* Verifies if the element is a TXml node.
* @param {!shaka.extern.xml.Node} elem The XML element.
* @return {!boolean} Is the element a TXml node
*/
static isNode(elem) {
return !!(elem.tagName);
}
/**
* Checks if a node is of type text.
* @param {!shaka.extern.xml.Node | string} elem The XML element.
* @return {boolean} True if it is a text node.
*/
static isText(elem) {
return typeof elem === 'string';
}
/**
* gets child XML elements.
* @param {!shaka.extern.xml.Node} elem The parent XML element.
* @return {!Array.<!shaka.extern.xml.Node>} The child XML elements.
*/
static getChildNodes(elem) {
const found = [];
if (!elem.children) {
return [];
}
for (const child of elem.children) {
if (typeof child !== 'string') {
found.push(child);
}
}
return found;
}
/**
* Finds child XML elements.
* @param {!shaka.extern.xml.Node} elem The parent XML element.
* @param {string} name The child XML element's tag name.
* @return {!Array.<!shaka.extern.xml.Node>} The child XML elements.
*/
static findChildren(elem, name) {
const found = [];
if (!elem.children) {
return [];
}
for (const child of elem.children) {
if (child.tagName === name) {
found.push(child);
}
}
return found;
}
/**
* Gets inner text.
* @param {!shaka.extern.xml.Node | string} node The XML element.
* @return {?string} The text contents, or null if there are none.
*/
static getTextContents(node) {
const StringUtils = shaka.util.StringUtils;
if (typeof node === 'string') {
return StringUtils.htmlUnescape(node);
}
const textContent = node.children.reduce(
(acc, curr) => (typeof curr === 'string' ? acc + curr : acc),
'',
);
if (textContent === '') {
return null;
}
return StringUtils.htmlUnescape(textContent);
}
/**
* Gets the text contents of a node.
* @param {!shaka.extern.xml.Node} node The XML element.
* @return {?string} The text contents, or null if there are none.
*/
static getContents(node) {
if (!Array.from(node.children).every(
(n) => typeof n === 'string' )) {
return null;
}
// Read merged text content from all text nodes.
let text = shaka.util.TXml.getTextContents(node);
if (text) {
text = text.trim();
}
return text;
}
/**
* Finds child XML elements recursively.
* @param {!shaka.extern.xml.Node} elem The parent XML element.
* @param {string} name The child XML element's tag name.
* @param {!Array.<!shaka.extern.xml.Node>} found accumulator for found nodes
* @return {!Array.<!shaka.extern.xml.Node>} The child XML elements.
*/
static getElementsByTagName(elem, name, found = []) {
if (elem.tagName === name) {
found.push(elem);
}
if (elem.children) {
for (const child of elem.children) {
shaka.util.TXml.getElementsByTagName(child, name, found);
}
}
return found;
}
/**
* Finds a child XML element.
* @param {!shaka.extern.xml.Node} elem The parent XML element.
* @param {string} name The child XML element's tag name.
* @return {shaka.extern.xml.Node | null} The child XML element,
* or null if a child XML element
* does not exist with the given tag name OR if there exists more than one
* child XML element with the given tag name.
*/
static findChild(elem, name) {
const children = shaka.util.TXml.findChildren(elem, name);
if (children.length != 1) {
return null;
}
return children[0];
}
/**
* Finds a namespace-qualified child XML element.
* @param {!shaka.extern.xml.Node} elem The parent XML element.
* @param {string} ns The child XML element's namespace URI.
* @param {string} name The child XML element's local name.
* @return {shaka.extern.xml.Node | null} The child XML element, or null
* if a child XML element
* does not exist with the given tag name OR if there exists more than one
* child XML element with the given tag name.
*/
static findChildNS(elem, ns, name) {
const children = shaka.util.TXml.findChildrenNS(elem, ns, name);
if (children.length != 1) {
return null;
}
return children[0];
}
/**
* Parses an attribute by its name.
* @param {!shaka.extern.xml.Node} elem The XML element.
* @param {string} name The attribute name.
* @param {function(string): (T|null)} parseFunction A function that parses
* the attribute.
* @param {(T|null)=} defaultValue The attribute's default value, if not
* specified, the attibute's default value is null.
* @return {(T|null)} The parsed attribute on success, or the attribute's
* default value if the attribute does not exist or could not be parsed.
* @template T
*/
static parseAttr(elem, name, parseFunction, defaultValue = null) {
let parsedValue = null;
const value = elem.attributes[name];
if (value != null) {
parsedValue = parseFunction(value);
}
return parsedValue == null ? defaultValue : parsedValue;
}
/**
* Gets a namespace-qualified attribute.
* @param {!shaka.extern.xml.Node} elem The element to get from.
* @param {string} ns The namespace URI.
* @param {string} name The local name of the attribute.
* @return {?string} The attribute's value, or null if not present.
*/
static getAttributeNS(elem, ns, name) {
const schemaNS = shaka.util.TXml.getKnownNameSpace(ns);
// Think this is equivalent
const attribute = elem.attributes[`${schemaNS}:${name}`];
return attribute || null;
}
/**
* Finds namespace-qualified child XML elements.
* @param {!shaka.extern.xml.Node} elem The parent XML element.
* @param {string} ns The child XML element's namespace URI.
* @param {string} name The child XML element's local name.
* @return {!Array.<!shaka.extern.xml.Node>} The child XML elements.
*/
static findChildrenNS(elem, ns, name) {
const schemaNS = shaka.util.TXml.getKnownNameSpace(ns);
const found = [];
if (elem.children) {
for (const child of elem.children) {
if (child && child.tagName === `${schemaNS}:${name}`) {
found.push(child);
}
}
}
return found;
}
/**
* Gets a namespace-qualified attribute.
* @param {!shaka.extern.xml.Node} elem The element to get from.
* @param {!Array.<string>} nsList The lis of namespace URIs.
* @param {string} name The local name of the attribute.
* @return {?string} The attribute's value, or null if not present.
*/
static getAttributeNSList(elem, nsList, name) {
for (const ns of nsList) {
const attr = shaka.util.TXml.getAttributeNS(
elem, ns, name,
);
if (attr) {
return attr;
}
}
return null;
}
/**
* Parses an XML date string.
* @param {string} dateString
* @return {?number} The parsed date in seconds on success; otherwise, return
* null.
*/
static parseDate(dateString) {
if (!dateString) {
return null;
}
// Times in the manifest should be in UTC. If they don't specify a timezone,
// Date.parse() will use the local timezone instead of UTC. So manually add
// the timezone if missing ('Z' indicates the UTC timezone).
// Format: YYYY-MM-DDThh:mm:ss.ssssss
if (/^\d+-\d+-\d+T\d+:\d+:\d+(\.\d+)?$/.test(dateString)) {
dateString += 'Z';
}
const result = Date.parse(dateString);
return isNaN(result) ? null : (result / 1000.0);
}
/**
* Parses an XML duration string.
* Negative values are not supported. Years and months are treated as exactly
* 365 and 30 days respectively.
* @param {string} durationString The duration string, e.g., "PT1H3M43.2S",
* which means 1 hour, 3 minutes, and 43.2 seconds.
* @return {?number} The parsed duration in seconds on success; otherwise,
* return null.
* @see {@link http://www.datypic.com/sc/xsd/t-xsd_duration.html}
*/
static parseDuration(durationString) {
if (!durationString) {
return null;
}
const re = '^P(?:([0-9]*)Y)?(?:([0-9]*)M)?(?:([0-9]*)D)?' +
'(?:T(?:([0-9]*)H)?(?:([0-9]*)M)?(?:([0-9.]*)S)?)?$';
const matches = new RegExp(re).exec(durationString);
if (!matches) {
shaka.log.warning('Invalid duration string:', durationString);
return null;
}
// Note: Number(null) == 0 but Number(undefined) == NaN.
const years = Number(matches[1] || null);
const months = Number(matches[2] || null);
const days = Number(matches[3] || null);
const hours = Number(matches[4] || null);
const minutes = Number(matches[5] || null);
const seconds = Number(matches[6] || null);
// Assume a year always has 365 days and a month always has 30 days.
const d = (60 * 60 * 24 * 365) * years +
(60 * 60 * 24 * 30) * months +
(60 * 60 * 24) * days +
(60 * 60) * hours +
60 * minutes +
seconds;
return isFinite(d) ? d : null;
}
/**
* Parses a range string.
* @param {string} rangeString The range string, e.g., "101-9213".
* @return {?{start: number, end: number}} The parsed range on success;
* otherwise, return null.
*/
static parseRange(rangeString) {
const matches = /([0-9]+)-([0-9]+)/.exec(rangeString);
if (!matches) {
return null;
}
const start = Number(matches[1]);
if (!isFinite(start)) {
return null;
}
const end = Number(matches[2]);
if (!isFinite(end)) {
return null;
}
return {start: start, end: end};
}
/**
* Parses an integer.
* @param {string} intString The integer string.
* @return {?number} The parsed integer on success; otherwise, return null.
*/
static parseInt(intString) {
const n = Number(intString);
return (n % 1 === 0) ? n : null;
}
/**
* Parses a positive integer.
* @param {string} intString The integer string.
* @return {?number} The parsed positive integer on success; otherwise,
* return null.
*/
static parsePositiveInt(intString) {
const n = Number(intString);
return (n % 1 === 0) && (n > 0) ? n : null;
}
/**
* Parses a non-negative integer.
* @param {string} intString The integer string.
* @return {?number} The parsed non-negative integer on success; otherwise,
* return null.
*/
static parseNonNegativeInt(intString) {
const n = Number(intString);
return (n % 1 === 0) && (n >= 0) ? n : null;
}
/**
* Parses a floating point number.
* @param {string} floatString The floating point number string.
* @return {?number} The parsed floating point number on success; otherwise,
* return null. May return -Infinity or Infinity.
*/
static parseFloat(floatString) {
const n = Number(floatString);
return !isNaN(n) ? n : null;
}
/**
* Parses a boolean.
* @param {string} booleanString The boolean string.
* @return {boolean} The boolean
*/
static parseBoolean(booleanString) {
if (!booleanString) {
return false;
}
return booleanString.toLowerCase() === 'true';
}
/**
* Evaluate a division expressed as a string.
* @param {string} exprString
* The expression to evaluate, e.g. "200/2". Can also be a single number.
* @return {?number} The evaluated expression as floating point number on
* success; otherwise return null.
*/
static evalDivision(exprString) {
let res;
let n;
if ((res = exprString.match(/^(\d+)\/(\d+)$/))) {
n = Number(res[1]) / Number(res[2]);
} else {
n = Number(exprString);
}
return !isNaN(n) ? n : null;
}
/**
* Parse xPath strings for segments and id targets.
* @param {string} exprString
* @return {!Array<!shaka.util.TXml.PathNode>}
*/
static parseXpath(exprString) {
const returnPaths = [];
// Split string by paths but ignore '/' in quotes
const paths = exprString
.split(/\/+(?=(?:[^'"]*['"][^'"]*['"])*[^'"]*$)/);
for (const path of paths) {
const nodeName = path.match(/\b([A-Z])\w+/);
// We only want the id attribute in which case
// /'(.*?)'/ will suffice to get it.
const idAttr = path.match(/(@id='(.*?)')/);
if (nodeName) {
returnPaths.push({
name: nodeName[0],
id: idAttr ?
idAttr[0].match(/'(.*?)'/)[0].replace(/'/gm, '') : null,
});
}
}
return returnPaths;
}
/**
* Converts a tXml node to DOM element.
* @param {shaka.extern.xml.Node} node
* @param {boolean=} doParents
* @param {boolean=} doChildren
* @return {!Element}
*/
static txmlNodeToDomElement(node, doParents = true, doChildren = true) {
const TXml = shaka.util.TXml;
const element = document.createElement(node.tagName);
for (const k in node.attributes) {
const v = node.attributes[k];
element.setAttribute(k, v);
}
if (doParents && node.parent) {
const parentElement = TXml.txmlNodeToDomElement(
node.parent, /* doParents= */ true, /* doChildren= */ false);
parentElement.appendChild(element);
}
if (doChildren) {
for (const child of node.children) {
let childElement;
if (typeof child == 'string') {
childElement = new Text(child);
} else {
childElement = TXml.txmlNodeToDomElement(
child, /* doParents= */ false, /* doChildren= */ true);
}
element.appendChild(childElement);
}
}
return element;
}
};
shaka.util.TXml.knownNameSpaces_ = new Map([]);
/**
* @typedef {{
* name: string,
* id: ?string
* }}
*/
shaka.util.TXml.PathNode;