Merge pull request #53 from urbanisierung/feat-add-retry-for-read-requests-if-rate-limited

feat(get-publish-metadata): optional retries if rate limited
This commit is contained in:
Guilherme Oenning 2024-05-15 09:46:08 +01:00 committed by GitHub
commit 621444cc57
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 83 additions and 18 deletions

View File

@ -4,7 +4,8 @@ Use this script to get your entire site indexed on Google in less than 48 hours.
You can read more about the motivation behind it and how it works in this blog post https://seogets.com/blog/google-indexing-script
> [!IMPORTANT]
> [!IMPORTANT]
>
> 1. Indexing != Ranking. This will not help your page rank on Google, it'll just let Google know about the existence of your pages.
> 2. This script uses [Google Indexing API](https://developers.google.com/search/apis/indexing-api/v3/quickstart). We do not recommend using this script on spam/low-quality content.
@ -80,6 +81,7 @@ google-indexing-script seogets.com
# cloned repository
npm run index seogets.com
```
</details>
<details>
@ -92,6 +94,7 @@ Run the script with the domain or url you want to index.
```bash
GIS_CLIENT_EMAIL=your-client-email GIS_PRIVATE_KEY=your-private-key gis seogets.com
```
</details>
<details>
@ -104,6 +107,7 @@ Once you have the values, run the script with the domain or url you want to inde
```bash
gis seogets.com --client-email your-client-email --private-key your-private-key
```
</details>
<details>
@ -116,18 +120,19 @@ npm i google-indexing-script
```
```javascript
import { index } from 'google-indexing-script'
import serviceAccount from './service_account.json'
import { index } from "google-indexing-script";
import serviceAccount from "./service_account.json";
index('seogets.com', {
index("seogets.com", {
client_email: serviceAccount.client_email,
private_key: serviceAccount.private_key
private_key: serviceAccount.private_key,
})
.then(console.log)
.catch(console.error)
.catch(console.error);
```
Read the [API documentation](https://paka.dev/npm/google-indexing-script) for more details.
</details>
Here's an example of what you should expect:
@ -135,10 +140,44 @@ Here's an example of what you should expect:
![](./output.png)
> [!IMPORTANT]
>
> - Your site must have 1 or more sitemaps submitted to Google Search Console. Otherwise, the script will not be able to find the pages to index.
> - You can run the script as many times as you want. It will only index the pages that are not already indexed.
> - Sites with a large number of pages might take a while to index, be patient.
## Quota
Depending on your account several quotas are configured for the API (see [docs](https://developers.google.com/search/apis/indexing-api/v3/quota-pricing#quota)). By default the script exits as soon as the rate limit is exceeded. You can configure a retry mechanism for the read requests that apply on a per minute time frame.
<details>
<summary>With environment variables</summary>
```bash
export GIS_QUOTA_RPM_RETRY=true
```
</details>
<details>
<summary>As a npm module</summary>
```javascript
import { index } from 'google-indexing-script'
import serviceAccount from './service_account.json'
index('seogets.com', {
client_email: serviceAccount.client_email,
private_key: serviceAccount.private_key
quota: {
rpmRetry: true
}
})
.then(console.log)
.catch(console.error)
```
</details>
## 🔀 Alternative
If you prefer a hands-free, and less technical solution, you can use a SaaS platform like [TagParrot](https://tagparrot.com/?via=goenning).

View File

@ -15,11 +15,20 @@ import { readFileSync, existsSync, mkdirSync, writeFileSync } from "fs";
import path from "path";
const CACHE_TIMEOUT = 1000 * 60 * 60 * 24 * 14; // 14 days
export const QUOTA = {
rpm: {
retries: 3,
waitingTime: 60000, // 1 minute
},
};
export type IndexOptions = {
client_email?: string;
private_key?: string;
path?: string;
quota?: {
rpmRetry?: boolean; // read requests per minute: retry after waiting time
};
};
/**
@ -27,10 +36,7 @@ export type IndexOptions = {
* @param input - The domain or site URL to index.
* @param options - (Optional) Additional options for indexing.
*/
export const index = async (
input: string = process.argv[2],
options: IndexOptions = {},
) => {
export const index = async (input: string = process.argv[2], options: IndexOptions = {}) => {
if (!input) {
console.error("❌ Please provide a domain or site URL as the first argument.");
console.error("");
@ -47,6 +53,11 @@ export const index = async (
if (!options.path) {
options.path = args["path"] || process.env.GIS_PATH;
}
if (!options.quota) {
options.quota = {
rpmRetry: args["rpm-retry"] === "true" || process.env.GIS_QUOTA_RPM_RETRY === "true",
};
}
const accessToken = await getAccessToken(options.client_email, options.private_key, options.path);
let siteUrl = convertToSiteUrl(input);
@ -145,7 +156,9 @@ export const index = async (
for (const url of indexablePages) {
console.log(`📄 Processing url: ${url}`);
const status = await getPublishMetadata(accessToken, url);
const status = await getPublishMetadata(accessToken, url, {
retriesOnRateLimit: options.quota.rpmRetry ? QUOTA.rpm.retries : 0,
});
if (status === 404) {
await requestIndexing(accessToken, url);
console.log("🚀 Indexing requested successfully. It may take a few days for Google to process it.");

View File

@ -1,4 +1,5 @@
import { webmasters_v3 } from "googleapis";
import { QUOTA } from "..";
import { Status } from "./types";
import { fetchRetry } from "./utils";
@ -202,9 +203,10 @@ export function getEmojiForStatus(status: Status) {
* Retrieves metadata for publishing from the given URL.
* @param accessToken - The access token for authentication.
* @param url - The URL for which to retrieve metadata.
* @param options - The options for the request.
* @returns The status of the request.
*/
export async function getPublishMetadata(accessToken: string, url: string) {
export async function getPublishMetadata(accessToken: string, url: string, options?: { retriesOnRateLimit: number }) {
const response = await fetchRetry(
`https://indexing.googleapis.com/v3/urlNotifications/metadata?url=${encodeURIComponent(url)}`,
{
@ -223,12 +225,23 @@ export async function getPublishMetadata(accessToken: string, url: string) {
}
if (response.status === 429) {
console.error("🚦 Rate limit exceeded, try again later.");
console.error("");
console.error(" Quota: https://developers.google.com/search/apis/indexing-api/v3/quota-pricing#quota");
console.error(" Usage: https://console.cloud.google.com/apis/enabled");
console.error("");
process.exit(1);
if (options?.retriesOnRateLimit && options?.retriesOnRateLimit > 0) {
const RPM_WATING_TIME = (QUOTA.rpm.retries - options.retriesOnRateLimit + 1) * QUOTA.rpm.waitingTime; // increase waiting time for each retry
console.log(
`🚦 Rate limit exceeded for read requests. Retries left: ${options.retriesOnRateLimit}. Waiting for ${
RPM_WATING_TIME / 1000
}sec.`
);
await new Promise((resolve) => setTimeout(resolve, RPM_WATING_TIME));
await getPublishMetadata(accessToken, url, { retriesOnRateLimit: options.retriesOnRateLimit - 1 });
} else {
console.error("🚦 Rate limit exceeded, try again later.");
console.error("");
console.error(" Quota: https://developers.google.com/search/apis/indexing-api/v3/quota-pricing#quota");
console.error(" Usage: https://console.cloud.google.com/apis/enabled");
console.error("");
process.exit(1);
}
}
if (response.status >= 500) {