diff --git a/.changeset/orange-badgers-accept.md b/.changeset/orange-badgers-accept.md new file mode 100644 index 0000000..15fdc9f --- /dev/null +++ b/.changeset/orange-badgers-accept.md @@ -0,0 +1,5 @@ +--- +"google-indexing-script": patch +--- + +Add site url checker diff --git a/src/index.ts b/src/index.ts index 740c2bb..97c8b85 100644 --- a/src/index.ts +++ b/src/index.ts @@ -6,6 +6,7 @@ import { getEmojiForStatus, getPageIndexingStatus, convertToFilePath, + checkSiteUrl, } from "./shared/gsc"; import { getSitemapPages } from "./shared/sitemap"; import { Status } from "./shared/types"; @@ -48,7 +49,7 @@ export const index = async ( } const accessToken = await getAccessToken(options.client_email, options.private_key, options.path); - const siteUrl = convertToSiteUrl(input); + let siteUrl = convertToSiteUrl(input); console.log(`🔎 Processing site: ${siteUrl}`); const cachePath = path.join(".cache", `${convertToFilePath(siteUrl)}.json`); @@ -58,6 +59,8 @@ export const index = async ( process.exit(1); } + siteUrl = await checkSiteUrl(accessToken, siteUrl); + const [sitemaps, pages] = await getSitemapPages(accessToken, siteUrl); if (sitemaps.length === 0) { diff --git a/src/shared/gsc.ts b/src/shared/gsc.ts index a5cb5fb..c212781 100644 --- a/src/shared/gsc.ts +++ b/src/shared/gsc.ts @@ -1,3 +1,4 @@ +import { webmasters_v3 } from "googleapis"; import { Status } from "./types"; import { fetchRetry } from "./utils"; @@ -22,6 +23,126 @@ export function convertToFilePath(path: string) { return path.replace("http://", "http_").replace("https://", "https_").replace("/", "_"); } +/** + * Converts an HTTP URL to a sc-domain URL format. + * @param httpUrl The HTTP URL to be converted. + * @returns The sc-domain formatted URL. + */ +export function convertToSCDomain(httpUrl: string) { + return `sc-domain:${httpUrl.replace("http://", "").replace("https://", "").replace("/", "")}`; +} + +/** + * Converts a domain to an HTTP URL. + * @param domain The domain to be converted. + * @returns The HTTP URL. + */ +export function convertToHTTP(domain: string) { + return `http://${domain}/`; +} + +/** + * Converts a domain to an HTTPS URL. + * @param domain The domain to be converted. + * @returns The HTTPS URL. + */ +export function convertToHTTPS(domain: string) { + return `https://${domain}/`; +} + +/** + * Retrieves a list of sites associated with the specified service account from the Google Webmasters API. + * @param accessToken - The access token for authentication. + * @returns An array containing the site URLs associated with the service account. + */ +export async function getSites(accessToken: string) { + const sitesResponse = await fetchRetry('https://www.googleapis.com/webmasters/v3/sites', { + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${accessToken}`, + }, + }); + + if (sitesResponse.status === 403) { + console.error('🔐 This service account doesn\'t have access to any sites.'); + return []; + } + + const sitesBody: webmasters_v3.Schema$SitesListResponse = await sitesResponse.json(); + + if (!sitesBody.siteEntry) { + console.error('❌ No sites found, add them to Google Search Console and try again.'); + return []; + } + + return sitesBody.siteEntry.map((x) => x.siteUrl); +} + +/** + * Checks if the site URL is valid and accessible by the service account. + * @param accessToken - The access token for authentication. + * @param siteUrl - The URL of the site to check. + * @returns The corrected URL if found, otherwise the original site URL. + */ +export async function checkSiteUrl( + accessToken: string, + siteUrl: string +) { + const sites = await getSites(accessToken); + + if (!sites.includes(siteUrl)) { + if (siteUrl.startsWith("sc-domain:")) { + if (sites.includes(convertToHTTP(siteUrl.replace("sc-domain:", "")))) { + const correctUrl = convertToHTTP(siteUrl.replace("sc-domain:", "")); + console.warn(`🚨 Found HTTP version of the site, please next time use this format instead: ${correctUrl}`); + return correctUrl; + } else if (sites.includes(convertToHTTPS(siteUrl.replace("sc-domain:", "")))) { + const correctUrl = convertToHTTPS(siteUrl.replace("sc-domain:", "")); + console.warn(`🚨 Found HTTPS version of the site, please next time use this format instead: ${correctUrl}`); + return correctUrl; + } else { + console.error("❌ This service account doesn't have access to this site."); + console.error(""); + process.exit(1); + } + } else if (siteUrl.startsWith("https://")) { + if (sites.includes(convertToHTTP(siteUrl))) { + const correctUrl = convertToHTTP(siteUrl); + console.warn(`🚨 Found HTTP version of the site, please next time use this format instead: ${correctUrl}`); + return correctUrl; + } else if (sites.includes(convertToSCDomain(siteUrl))) { + const correctUrl = convertToSCDomain(siteUrl); + console.warn(`🚨 Found sc-domain version of the site, please next time use this format instead: ${correctUrl.replace("sc-domain:", "")}`); + return correctUrl; + } else { + console.error("❌ This service account doesn't have access to this site."); + console.error(""); + process.exit(1); + } + } else if (siteUrl.startsWith("http://")) { + if (sites.includes(convertToHTTPS(siteUrl))) { + const correctUrl = convertToHTTPS(siteUrl); + console.warn(`🚨 Found HTTPS version of the site, please next time use this format instead: ${correctUrl}`); + return correctUrl; + } else if (sites.includes(convertToSCDomain(siteUrl))) { + const correctUrl = convertToSCDomain(siteUrl); + console.warn(`🚨 Found sc-domain version of the site, please next time use this format instead: ${correctUrl.replace("sc-domain:", "")}`); + return correctUrl; + } else { + console.error("❌ This service account doesn't have access to this site."); + console.error(""); + process.exit(1); + } + } else { + console.error("❌ Unknown site URL format."); + console.error(""); + process.exit(1); + } + } else { + return siteUrl; + } +} + /** * Retrieves the indexing status of a page. * @param accessToken - The access token for authentication.