Url path segments in Google Sheets
I assume you know how to add a custom function in Google Apps Script ;)
I also assume you see the usefulness of a script that extracts all URL-segments from a URL-path, and does this not only for a single URL, but for as many as your Sheet can possible have.
Just call =getPathSegments(A:A)
for a complete column or =getPathSegments(A1)
for a single cell. One example:
=arrayformula(if(isurl(A2:A), getPathSegments(A2:A,4),""))
So here, first, I check if all cells from A2
downwards in the complete column A actually are valid URLs (with the standard =isurl()
function), and if so, I call my own custom function. I wrap this in an =arrayformula()
, so I only need to call this once.
Note the "4" parameter in the function call: this is an optional parameter that limits the possible amount of columns returned to 4 (in theory, if you have an url with 100 segments, it would also return 100 columns. This prevents that and gives you some more flexibility over the output).
The code #
/**
* Processes an input that may be a single URL or an array of URLs.
* Extracts the path segments from each URL, limited by the maxSegments parameter, and outputs them in a way that spreads across subsequent columns in Google Sheets.
* @param {string|array} input - The URL or an array of URLs.
* @param {number} maxSegments - The maximum number of path segments to return. If not provided, defaults to returning all segments.
* @return A two-dimensional array with path segments for each URL in subsequent columns, limited by maxSegments.
*/
function getPathSegments(input, maxSegments) {
// Function to extract and return path segments as an array, limited by maxSegments
const extractPathSegments = (url, maxSegments) => {
const pathRegex = /https?:\/\/[^\/]+(\/[^?#]*)?/i;
const match = pathRegex.exec(url);
if (match && match[1]) {
let segments = match[1].split('/').filter(segment => segment.length > 0);
if (maxSegments && !isNaN(maxSegments)) {
segments = segments.slice(0, maxSegments); // Limit the number of segments if maxSegments is provided
}
return segments;
} else {
return []; // Return an empty array if no path is found
}
};
// Determine if maxSegments was actually provided and is a valid number
const maxSegmentsProvided = maxSegments && !isNaN(maxSegments);
// Process input accordingly
if (typeof input === 'string') {
// Single URL
return [extractPathSegments(input, maxSegmentsProvided ? maxSegments : undefined)];
} else if (Array.isArray(input) && input.length > 0 && typeof input[0] === 'string') {
// Array of URLs (single column of strings)
return input.map(url => extractPathSegments(url, maxSegmentsProvided ? maxSegments : undefined));
} else if (Array.isArray(input) && Array.isArray(input[0])) {
// Two-dimensional array input (URLs in the first column)
return input.map(row => {
const url = row[0]; // Assuming URL is in the first column
return extractPathSegments(url, maxSegmentsProvided ? maxSegments : undefined);
});
} else {
// Unrecognized input format
return [[]];
}
}