Nathan Sobo d37a0dd916 Cache repeated regexp matches against the same string for another 5-10% boost
This is tricky… basically when the scanner is matching across the same string repeatedly, it can recycle previous results if the following conditions are true:

- The string is the same
- We're matching at a position >= the last position
- The result for the regex is a failure or starts >= the current start position
2012-09-27 15:53:02 -06:00

158 lines
4.9 KiB

#import <Cocoa/Cocoa.h>
#import <iostream>
#import "CocoaOniguruma/OnigRegexp.h"
#import "include/cef_base.h"
#import "include/cef_v8.h"
#import "onig_scanner.h"
namespace v8_extensions {
using namespace std;
extern NSString *stringFromCefV8Value(const CefRefPtr<CefV8Value>& value);
class OnigScannerUserData : public CefBase {
OnigScannerUserData(CefRefPtr<CefV8Value> sources) {
int length = sources->GetArrayLength();
for (int i = 0; i < length; i++) {
NSString *sourceString = stringFromCefV8Value(sources->GetValue(i));
regExps[i] = [[OnigRegexp compile:sourceString] retain];
~OnigScannerUserData() {
for (vector<OnigRegexp *>::iterator iter = regExps.begin(); iter < regExps.end(); iter++) {
[*iter release];
for (vector<OnigResult *>::iterator iter = cachedResults.begin(); iter < cachedResults.end(); iter++) {
[*iter release];
CefRefPtr<CefV8Value> FindNextMatch(CefRefPtr<CefV8Value> v8String, CefRefPtr<CefV8Value> v8StartLocation) {
std::string string = v8String->GetStringValue().ToString();
int startLocation = v8StartLocation->GetIntValue();
int bestIndex = -1;
int bestLocation = NULL;
OnigResult *bestResult = NULL;
bool useCachedResults = (string == lastMatchedString && startLocation >= lastStartLocation);
lastStartLocation = startLocation;
if (!useCachedResults) {
lastMatchedString = string;
vector<OnigRegexp *>::iterator iter = regExps.begin();
int index = 0;
while (iter < regExps.end()) {
OnigRegexp *regExp = *iter;
bool useCachedResult = false;
OnigResult *result = NULL;
if (useCachedResults && index <= maxCachedIndex) {
result = cachedResults[index];
useCachedResult = (result == NULL || [result locationAt:0] >= startLocation);
if (!useCachedResult) {
result = [regExp search:[NSString stringWithUTF8String:string.c_str()] start:startLocation];
cachedResults[index] = [result retain];
maxCachedIndex = index;
if ([result count] > 0) {
int location = [result locationAt:0];
if (bestIndex == -1 || location < bestLocation) {
bestLocation = location;
bestResult = result;
bestIndex = index;
if (location == startLocation) {
if (bestIndex >= 0) {
CefRefPtr<CefV8Value> result = CefV8Value::CreateObject(NULL);
result->SetValue("index", CefV8Value::CreateInt(bestIndex), V8_PROPERTY_ATTRIBUTE_NONE);
result->SetValue("captureIndices", CaptureIndicesForMatch(bestResult), V8_PROPERTY_ATTRIBUTE_NONE);
return result;
} else {
return CefV8Value::CreateNull();
void ClearCachedResults() {
maxCachedIndex = -1;
for (vector<OnigResult *>::iterator iter = cachedResults.begin(); iter < cachedResults.end(); iter++) {
[*iter release];
*iter = NULL;
CefRefPtr<CefV8Value> CaptureIndicesForMatch(OnigResult *result) {
CefRefPtr<CefV8Value> array = CefV8Value::CreateArray([result count] * 3);
int i = 0;
int resultCount = [result count];
for (int index = 0; index < resultCount; index++) {
int captureLength = [result lengthAt:index];
int captureStart = [result locationAt:index];
array->SetValue(i++, CefV8Value::CreateInt(index));
array->SetValue(i++, CefV8Value::CreateInt(captureStart));
array->SetValue(i++, CefV8Value::CreateInt(captureStart + captureLength));
return array;
std::vector<OnigRegexp *> regExps;
std::string lastMatchedString;
std::vector<OnigResult *> cachedResults;
int maxCachedIndex;
int lastStartLocation;
OnigScanner::OnigScanner() : CefV8Handler() {
NSString *filePath = [[[NSBundle mainBundle] resourcePath] stringByAppendingPathComponent:@"v8_extensions/onig_scanner.js"];
NSString *extensionCode = [NSString stringWithContentsOfFile:filePath encoding:NSUTF8StringEncoding error:nil];
CefRegisterExtension("v8/onig-scanner", [extensionCode UTF8String], this);
bool OnigScanner::Execute(const CefString& name,
CefRefPtr<CefV8Value> object,
const CefV8ValueList& arguments,
CefRefPtr<CefV8Value>& retval,
CefString& exception) {
if (name == "findNextMatch") {
OnigScannerUserData *userData = (OnigScannerUserData *)object->GetUserData().get();
retval = userData->FindNextMatch(arguments[0], arguments[1]);
return true;
else if (name == "buildScanner") {
retval = CefV8Value::CreateObject(NULL);
retval->SetUserData(new OnigScannerUserData(arguments[0]));
return true;
return false;
} // namespace v8_extensions