Merge branch 'fix-pipeline' into 'development'

fix: perbaikan file .gitlab-ci.yml dan docker-compose untuk LTI API See merge request mbugroup/lti-api!33
2026-05-20 13:31:56 +00:00 · 2025-10-21 16:56:56 +00:00
parent 452403d71e bb60e987e5
commit 85dfc33191
3546 changed files with 4952571 additions and 107 deletions
@@ -3,9 +3,13 @@ root = "."
 tmp_dir = "tmp"

 [build]
-cmd = "go build -o ./tmp/main ./cmd/api"
-bin = "tmp/main"
-full_bin = "APP_ENV=dev ./tmp/main"
+# Build binary utama
+cmd = "go build -o /lti-api/tmp/main ./cmd/api"
+# Lokasi binary hasil build
+bin = "/lti-api/tmp/main"
+# Jalankan binary langsung dengan environment dev
+full_bin = "APP_ENV=dev /lti-api/tmp/main"
+# File yang dipantau oleh Air
 include_ext = ["go", "tpl", "tmpl", "html"]
 exclude_dir = ["vendor", "tmp"]

@@ -0,0 +1,58 @@
+# .env.lti-api (Development Server with Domain)
+# =============================================
+
+# Server configuration
+VERSION=0.0.1
+APP_ENV=dev
+APP_HOST=0.0.0.0
+APP_PORT=8081
+APP_URL=https://dev-api-lti.mbugroup.id
+
+# Database configuration (pakai PostgreSQL milik SSO)
+DB_HOST=sso-postgres
+DB_USER=postgres
+DB_PASSWORD=postgres
+DB_NAME=db_lti_erp
+DB_PORT=5432
+
+# JWT configuration
+JWT_SECRET=changeme
+JWT_ACCESS_EXP_MINUTES=30
+JWT_REFRESH_EXP_DAYS=30
+JWT_RESET_PASSWORD_EXP_MINUTES=10
+JWT_VERIFY_EMAIL_EXP_MINUTES=10
+
+# Redis (pakai Redis milik SSO)
+REDIS_URL=redis://sso-redis:6379/0
+
+# CORS configuration
+CORS_ALLOW_ORIGINS=https://dev-api-sso.mbugroup.id,https://dev-lti.mbugroup.id,https://dev-api-lti.mbugroup.id,http://localhost:3000
+CORS_ALLOW_METHODS=GET,POST,PUT,PATCH,DELETE,OPTIONS
+CORS_ALLOW_HEADERS=Authorization,Content-Type,X-Requested-With
+CORS_EXPOSE_HEADERS=Link,Location
+CORS_ALLOW_CREDENTIALS=true
+CORS_MAX_AGE=600
+
+# SSO Integration (Gunakan domain backend SSO)
+SSO_ISSUER=https://dev-api-sso.mbugroup.id
+SSO_JWKS_URL=https://dev-api-sso.mbugroup.id/api/.well-known/jwks.json
+SSO_ALLOWED_AUDIENCES=
+SSO_AUTHORIZE_URL=https://dev-api-sso.mbugroup.id/api/sso/authorize
+SSO_TOKEN_URL=https://dev-api-sso.mbugroup.id/api/sso/token
+SSO_GETME_URL=https://dev-api-sso.mbugroup.id/api/auth/get-me
+
+# Cookie & session configuration
+SSO_ACCESS_COOKIE_NAME=sso_access
+SSO_REFRESH_COOKIE_NAME=sso_refresh
+SSO_COOKIE_DOMAIN=.mbugroup.id
+SSO_COOKIE_SECURE=true
+SSO_COOKIE_SAMESITE=Lax
+SSO_PKCE_TTL_SECONDS=300
+
+# SSO webhook / user sync settings
+SSO_USER_SYNC_SIGNATURE_DRIFT_SECONDS=120
+SSO_USER_SYNC_NONCE_TTL_SECONDS=600
+SSO_USER_SYNC_MAX_BODY_BYTES=32768
+
+# Client registration for SSO
+SSO_CLIENTS={"Lumbung-Telur-Indonesia":{"public_id":"Lumbung-Telur-Indonesia","redirect_uri":"https://dev-api-lti.mbugroup.id/api/sso/callback","scope":"openid profile","default_return_uri":"https://dev-lti.mbugroup.id","allowed_return_origins":["https://dev-lti.mbugroup.id","http://localhost:3000"],"sync_secret":"onUyfODIMHOh4TgGLgyWLmsNeVNxFRHqoLJFLPjr"}}
@@ -1,120 +1,59 @@
-# --- Load .env kalau ada, dan export ke shell child ---
-ifneq (,$(wildcard .env))
-include .env
-export
-endif
+# ===============================
+# LTI-API Makefile (Docker Setup)
+# ===============================

-# --- Konfigurasi umum ---
-COMPOSE          ?= docker compose -f docker-compose.local.yml
-NETWORK          ?= lti-api_go-network
-MIGRATE_IMAGE    ?= migrate/migrate
-MIGRATIONS_DIR   := $(PWD)/internal/database/migrations
+APP_NAME := lti-api
+COMPOSE := docker compose -f docker-compose.yaml
+NETWORK := lti-network
+ENV_FILE := .env.lti-api

-# Fallback agar tetap jalan meski .env kosong
-DB_HOST          ?= postgresdb
-DB_PORT          ?= 5432
-DB_USER          ?= postgres
-DB_PASSWORD      ?= postgres
-DB_NAME          ?= db_lti_erp
+include $(ENV_FILE)
+export $(shell sed 's/=.*//' $(ENV_FILE))

-DB_URL           := postgres://$(DB_USER):$(DB_PASSWORD)@$(DB_HOST):$(DB_PORT)/$(DB_NAME)?sslmode=disable
+MIGRATIONS_DIR := ./migrations
+MIGRATE_IMAGE := migrate/migrate:v4.15.2
+DB_URL := postgres://$(DB_USER):$(DB_PASSWORD)@lti-postgres:5432/$(DB_NAME)?sslmode=disable

-# Tunggu DB ready memakai pg_isready dari image postgres
-WAIT_DB          := docker run --rm --network $(NETWORK) postgres:alpine \
-	sh -c 'until pg_isready -h $(DB_HOST) -p $(DB_PORT) -U $(DB_USER) -d $(DB_NAME); do echo "waiting for postgres..."; sleep 1; done'
-
-# Default target
-.DEFAULT_GOAL := start
-
-# --- Daftar phony targets ---
-.PHONY: start build test lint gen \
-        db-up wait-db \
-        migration-% migrate-up migrate-down migrate-fresh \
-        seed \
-        docker-local docker-down docker-nuke docker-cache psql
-
-# --- Go workflow ---
-start:
-	@go run cmd/api/main.go
-
-build:
-	@go build -o tmp/app ./cmd/api
-
-test:
-	@go test ./test/...
-
-lint:
-	@golangci-lint run
-
-# --- Compose / DB helpers ---
-db-up:
-	@$(COMPOSE) up -d postgresdb
-
-wait-db:
-	@$(WAIT_DB)
-
-# --- Migration (pembuatan file) ---
-# Contoh: make migration-create_users_table
-# ":" akan diubah ke "_" (biar aman untuk nama file)
-migration-%:
-	@migrate create -ext sql -dir internal/database/migrations $(subst :,_,$*)
-
-# --- Migration (apply via docker image 'migrate') ---
-migrate-up: db-up wait-db
-	@docker run --rm -v $(MIGRATIONS_DIR):/migrations --network $(NETWORK) \
-		$(MIGRATE_IMAGE) -path=/migrations/ -database "$(DB_URL)" up
-
-# Contoh:
-#   make migrate-down step=2   → rollback 2 step
-#   make migrate-down          → rollback semua
-
-migrate-down: db-up wait-db
-	@if [ -n "$(step)" ]; then \
-		echo "⬇️  Migrating down $(step) step(s)..."; \
-		docker run --rm -v $(MIGRATIONS_DIR):/migrations --network $(NETWORK) \
-			$(MIGRATE_IMAGE) -path=/migrations/ -database "$(DB_URL)" down $(step); \
-	else \
-		echo "⬇️  Migrating down ALL steps..."; \
-		docker run --rm -v $(MIGRATIONS_DIR):/migrations --network $(NETWORK) \
-			$(MIGRATE_IMAGE) -path=/migrations/ -database "$(DB_URL)" down -all; \
-	fi
-
-migrate-fresh: migrate-down migrate-up
-	@true
-
-# Pakai: make migrate-force v=20250917120000
-migrate-force:
-	@docker run --rm -v $(MIGRATIONS_DIR):/migrations --network $(NETWORK) \
-		$(MIGRATE_IMAGE) -path=/migrations/ -database "$(DB_URL)" force $(v)
-
-
-# --- Seeder ---
-seed: db-up wait-db
-	@$(COMPOSE) run --rm app go run cmd/seed/main.go
-
-# --- Docker orchestration convenience ---
+# --- Docker ---
 docker-local:
+	@echo "🚀 Starting $(APP_NAME) with local PostgreSQL & Redis..."
 	@$(COMPOSE) up --build -d

 docker-down:
 	@$(COMPOSE) down --remove-orphans

-# ⚠️ Akan menghapus container, images dan volumes.
 docker-nuke:
+	@echo "💣 Removing all containers, images, and volumes..."
 	@$(COMPOSE) down --rmi all --volumes --remove-orphans

-docker-cache:
-	@docker builder prune -f
+# --- Database / Migration ---

-# --- PSQL shell ke DB di container ---
-psql: db-up
-	@$(COMPOSE) exec -it postgresdb psql -U $(DB_USER) -d $(DB_NAME)
+wait-db:
+	@echo "⏳ Waiting for database lti-postgres to be ready (inside Docker network)..."
+	@$(COMPOSE) run --rm app sh -c 'until nc -z lti-postgres 5432; do echo "Waiting for DB..."; sleep 2; done; echo "✅ Database is ready!"'

-# Single feature
-# example: make gen feat=product-category
+migrate-up: wait-db
+	@echo "⬆️  Running migrations..."
+	@docker run --rm -v $(MIGRATIONS_DIR):/migrations --network $(NETWORK) \
+		$(MIGRATE_IMAGE) -path=/migrations/ -database "$(DB_URL)" up

-# Sub feature
-# make gen feat=master/area
-gen:
-	@go run tools/gen.go $(feat)
-# 	@goimports -w internal
+migrate-down: wait-db
+	@echo "⬇️  Rolling back all migrations..."
+	@docker run --rm -v $(MIGRATIONS_DIR):/migrations --network $(NETWORK) \
+		$(MIGRATE_IMAGE) -path=/migrations/ -database "$(DB_URL)" down -all
+
+seed:
+	@echo "🌱 Running seed script..."
+	@$(COMPOSE) run --rm app go run cmd/seed/main.go
+
+psql:
+	@docker exec -it lti-postgres psql -U $(DB_USER) -d $(DB_NAME)
+
+logs:
+	@$(COMPOSE) logs -f app
+
+restart:
+	@$(COMPOSE) restart
+
+status:
+	@$(COMPOSE) ps
@@ -0,0 +1,77 @@
+version: "3.9"
+
+services:
+  dev-lti-api:
+    container_name: dev-lti-api
+    build:
+      context: .
+      dockerfile: Dockerfile.local
+    image: dev-lti-api:latest
+    working_dir: /lti-api
+    command: air -c .air.toml
+    ports:
+      - "8081:8081"
+    env_file:
+      - .env.lti-api
+    environment:
+      # override agar koneksi ke container internal
+      DB_HOST: dev-lti-postgres
+      DB_PORT: 5432
+      REDIS_URL: redis://dev-lti-redis:6379/0
+    volumes:
+      - .:/lti-api
+      - ./internal/config/jwtRS256.key:/run/keys/jwtRS256.key
+      - ./internal/config/jwtRS256.key.pub:/run/keys/jwtRS256.key.pub
+    depends_on:
+      - dev-lti-postgres
+      - dev-lti-redis
+    networks:
+      - lti-network
+    healthcheck:
+      test: ["CMD-SHELL", "wget -qO- http://localhost:8081/healthz || exit 1"]
+      interval: 10s
+      timeout: 3s
+      retries: 10
+      start_period: 10s
+
+  dev-lti-postgres:
+    image: postgres:15-alpine
+    container_name: dev-lti-postgres
+    restart: always
+    environment:
+      POSTGRES_USER: ${DB_USER:-postgres}
+      POSTGRES_PASSWORD: ${DB_PASSWORD:-postgres}
+      POSTGRES_DB: ${DB_NAME:-db_lti_erp}
+    ports:
+      - "5433:5432"
+    volumes:
+      - dev-lti-postgres-data:/var/lib/postgresql/data
+    networks:
+      - lti-network
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U ${DB_USER:-postgres} -d ${DB_NAME:-db_lti_erp}"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 5s
+
+  dev-lti-redis:
+    image: redis:7-alpine
+    container_name: dev-lti-redis
+    restart: always
+    ports:
+      - "6380:6379"
+    networks:
+      - lti-network
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 10s
+      timeout: 3s
+      retries: 10
+
+networks:
+  lti-network:
+    driver: bridge
+
+volumes:
+  dev-lti-postgres-data:
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2021 Micah Parks
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
@@ -0,0 +1,69 @@
+package keyfunc
+
+import (
+	"crypto/ecdsa"
+	"crypto/elliptic"
+	"errors"
+	"fmt"
+	"math/big"
+)
+
+const (
+	// ktyEC is the key type (kty) in the JWT header for ECDSA.
+	ktyEC = "EC"
+
+	// p256 represents a 256-bit cryptographic elliptical curve type.
+	p256 = "P-256"
+
+	// p384 represents a 384-bit cryptographic elliptical curve type.
+	p384 = "P-384"
+
+	// p521 represents a 521-bit cryptographic elliptical curve type.
+	p521 = "P-521"
+)
+
+var (
+	// ErrECDSACurve indicates an error with the ECDSA curve.
+	ErrECDSACurve = errors.New("invalid ECDSA curve")
+)
+
+// ECDSA parses a jsonWebKey and turns it into an ECDSA public key.
+func (j *jsonWebKey) ECDSA() (publicKey *ecdsa.PublicKey, err error) {
+	if j.X == "" || j.Y == "" || j.Curve == "" {
+		return nil, fmt.Errorf("%w: %s", ErrMissingAssets, ktyEC)
+	}
+
+	// Decode the X coordinate from Base64.
+	//
+	// According to RFC 7518, this is a Base64 URL unsigned integer.
+	// https://tools.ietf.org/html/rfc7518#section-6.3
+	xCoordinate, err := base64urlTrailingPadding(j.X)
+	if err != nil {
+		return nil, err
+	}
+	yCoordinate, err := base64urlTrailingPadding(j.Y)
+	if err != nil {
+		return nil, err
+	}
+
+	publicKey = &ecdsa.PublicKey{}
+	switch j.Curve {
+	case p256:
+		publicKey.Curve = elliptic.P256()
+	case p384:
+		publicKey.Curve = elliptic.P384()
+	case p521:
+		publicKey.Curve = elliptic.P521()
+	default:
+		return nil, fmt.Errorf("%w: unknown curve: %s", ErrECDSACurve, j.Curve)
+	}
+
+	// Turn the X coordinate into *big.Int.
+	//
+	// According to RFC 7517, these numbers are in big-endian format.
+	// https://tools.ietf.org/html/rfc7517#appendix-A.1
+	publicKey.X = big.NewInt(0).SetBytes(xCoordinate)
+	publicKey.Y = big.NewInt(0).SetBytes(yCoordinate)
+
+	return publicKey, nil
+}
@@ -0,0 +1,29 @@
+package keyfunc
+
+import (
+	"crypto/ed25519"
+	"fmt"
+)
+
+const (
+	// ktyEC is the key type (kty) in the JWT header for EdDSA.
+	ktyOKP = "OKP"
+)
+
+// EdDSA parses a jsonWebKey and turns it into a EdDSA public key.
+func (j *jsonWebKey) EdDSA() (publicKey ed25519.PublicKey, err error) {
+	if j.X == "" {
+		return nil, fmt.Errorf("%w: %s", ErrMissingAssets, ktyOKP)
+	}
+
+	// Decode the public key from Base64.
+	//
+	// According to RFC 8037, this is from Base64 URL bytes.
+	// https://datatracker.ietf.org/doc/html/rfc8037#appendix-A.2
+	publicBytes, err := base64urlTrailingPadding(j.X)
+	if err != nil {
+		return nil, err
+	}
+
+	return publicBytes, nil
+}
@@ -0,0 +1,130 @@
+{
+  "keys": [
+    {
+      "kid": "zXew0UJ1h6Q4CCcd_9wxMzvcp5cEBifH0KWrCz2Kyxc",
+      "kty": "RSA",
+      "alg": "PS256",
+      "use": "sig",
+      "n": "wqS81x6fItPUdh1OWCT8p3AuLYgFlpmg61WXp6sp1pVijoyF29GOSaD9xE-vLtegX-5h0BnP7va0bwsOAPdh6SdeVslEifNGHCtID0xNFqHNWcXSt4eLfQKAPFUq0TsEO-8P1QHRq6yeG8JAFaxakkaagLFuV8Vd_21PGJFWhvJodJLhX_-Ym9L8XUpIPps_mQriMUOWDe-5DWjHnDtfV7mgaOxbBvVo3wj8V2Lmo5Li4HabT4MEzeJ6e9IdFo2kj_44Yy9osX-PMPtu8BQz_onPgf0wjrVWt349Rj6OkS8RxlNGYeuIxYZr0TOhP5F-yEPhSXDsKdVTwPf7zAAaKQ",
+      "e": "AQAB",
+      "x5c": [
+        "MIICmzCCAYMCBgF4HR7HNDANBgkqhkiG9w0BAQsFADARMQ8wDQYDVQQDDAZtYXN0ZXIwHhcNMjEwMzEwMTcwOTE5WhcNMzEwMzEwMTcxMDU5WjARMQ8wDQYDVQQDDAZtYXN0ZXIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDCpLzXHp8i09R2HU5YJPyncC4tiAWWmaDrVZenqynWlWKOjIXb0Y5JoP3ET68u16Bf7mHQGc/u9rRvCw4A92HpJ15WyUSJ80YcK0gPTE0Woc1ZxdK3h4t9AoA8VSrROwQ77w/VAdGrrJ4bwkAVrFqSRpqAsW5XxV3/bU8YkVaG8mh0kuFf/5ib0vxdSkg+mz+ZCuIxQ5YN77kNaMecO19XuaBo7FsG9WjfCPxXYuajkuLgdptPgwTN4np70h0WjaSP/jhjL2ixf48w+27wFDP+ic+B/TCOtVa3fj1GPo6RLxHGU0Zh64jFhmvRM6E/kX7IQ+FJcOwp1VPA9/vMABopAgMBAAEwDQYJKoZIhvcNAQELBQADggEBALILq1Z4oQNJZEUt24VZcvknsWtQtvPxl3JNcBQgDR5/IMgl5VndRZ9OT56KUqrR5xRsWiCvh5Lgv4fUEzAAo9ToiPLub1SKP063zWrvfgi3YZ19bty0iXFm7l2cpQ3ejFV7WpcdLJE0lapFdPLo6QaRdgNu/1p4vbYg7zSK1fQ0OY5b3ajhAx/bhWlrN685owRbO5/r4rUOa6oo9l4Qn7jUxKUx4rcoe7zUM7qrpOPqKvn0DBp3n1/+9pOZXCjIfZGvYwP5NhzBDCkRzaXcJHlOqWzMBzyovVrzVmUilBcj+EsTYJs0gVXKzduX5zO6YWhFs23lu7AijdkxTY65YM0="
+      ],
+      "x5t": "IYIeevIT57t8ppUejM42Bqx6f3I",
+      "x5t#S256": "TuOrBy2NcTlFSWuZ8Kh8W8AjQagb4fnfP1SlKMO8-So"
+    },
+    {
+      "kid": "ebJxnm9B3QDBljB5XJWEu72qx6BawDaMAhwz4aKPkQ0",
+      "kty": "EC",
+      "alg": "ES512",
+      "use": "sig",
+      "crv": "P-521",
+      "x": "YQ95Xj8MTzcHytbU1h8YkCN2kdEQA7ThuZ1ctB9Ekiw6tlM9RwL62eQvzEt4Rz8qN69uRqgU9RzxQOkSU5xVvyo",
+      "y": "SMMuP3QnAPHtx7Go2ARsG3NBaySWBLmVvS8s2Ss7Vm_ISWenNbdjKOsY1XvtiQz5scGzWDCEUoZzgV8Ve1mLOV0"
+    },
+    {
+      "kid": "TVAAet63O3xy_KK6_bxVIu7Ra3_z1wlB543Fbwi5VaU",
+      "kty": "EC",
+      "alg": "ES384",
+      "use": "sig",
+      "crv": "P-384",
+      "x": "Pik2o5as-evijFABH5p6YLXHnWw8iQ_N1ummPY1c_UgG6NO0za-gNOhTz2-tsd_w",
+      "y": "e98VSff71k19SY_mHgp3707lgQVrhfVpiGa-sGaKxOWVpxd2jWMhB0Q4RpSRuCp5"
+    },
+    {
+      "kid": "arlUxX4hh56rNO-XdIPhDT7bqBMqcBwNQuP_TnZJNGs",
+      "kty": "RSA",
+      "alg": "RS512",
+      "use": "sig",
+      "n": "hhtifu8LL3ICE3BAX5l1KZv6Lni0lhlhBusSfepnpxcb4C_z2U71cQTnLY27kt8WB4bNG6e5_KMx9K3xUdd3euj9MCq8vytwEPieeHE1KXQuhJfLv017lhpK_dRMOHyc-9-50YNdgs_8KWRkrzjjuYrCiO9Iu76n5319e-SC8OPvNUglqxp2N0Sp2ltne2ZrpN8T3OEEXT62TSGmLAVopRGw5gllNVrJfmEyZJCRrBM6s5CQcz8un0FjkAAC4DI6QD-eBL0qG3_NR0hQvR1he2o4BLwjOKH45Pk_jj-eArp-DD6Xq6ABQVb5SNOSdaxl5lnmuotRoY3G5d9YSl-K3w",
+      "e": "AQAB",
+      "x5c": [
+        "MIICmzCCAYMCBgF4HSCcDzANBgkqhkiG9w0BAQsFADARMQ8wDQYDVQQDDAZtYXN0ZXIwHhcNMjEwMzEwMTcxMTE5WhcNMzEwMzEwMTcxMjU5WjARMQ8wDQYDVQQDDAZtYXN0ZXIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCGG2J+7wsvcgITcEBfmXUpm/oueLSWGWEG6xJ96menFxvgL/PZTvVxBOctjbuS3xYHhs0bp7n8ozH0rfFR13d66P0wKry/K3AQ+J54cTUpdC6El8u/TXuWGkr91Ew4fJz737nRg12Cz/wpZGSvOOO5isKI70i7vqfnfX175ILw4+81SCWrGnY3RKnaW2d7Zmuk3xPc4QRdPrZNIaYsBWilEbDmCWU1Wsl+YTJkkJGsEzqzkJBzPy6fQWOQAALgMjpAP54EvSobf81HSFC9HWF7ajgEvCM4ofjk+T+OP54Cun4MPperoAFBVvlI05J1rGXmWea6i1Ghjcbl31hKX4rfAgMBAAEwDQYJKoZIhvcNAQELBQADggEBAB7bpwPoL02WGCCVhCsbDkq9GeFUwF01opVyFTijZlTUoTf5RcaR2qAH9/irkLjZeFeyozzC5mGvIVruBwnx/6l4PcAMxKK4YiheFVoO/dytpGMCj6ToNmKpjlXzOLAHelieWIUDtAFSYzENjIO01PyXTGYpxebpQCocJBvppj5HqARS9iNPcqBltMhxWrWmMu81tOG3Y7yd2xsIYXk6KjaoefLeN8Was4BPJ0zR6tTSEm6ZOvSRvlppqh84kz7LmWem7gGHAsY2G3tWBUmOdO/SMNMThqV62yLf7sKsuoE1w06lfmrf6D2zGwoEyz+TT6fdSkc34Yeh7+c01X6nFWU="
+      ],
+      "x5t": "geiCPGtT_10T8xGLUK1LA0_YQEE",
+      "x5t#S256": "dLp3_QNGwMbYll5VecnR8Q9NSeFVfqJPBTa2_8qf48I"
+    },
+    {
+      "kid": "tW6ae7TomE6_2jooM-sf9N_6lWg7HNtaQXrDsElBzM4",
+      "kty": "RSA",
+      "alg": "PS512",
+      "use": "sig",
+      "n": "p32N7jqKfMUB6_dKY1uZ3wizzPlBAXg9XrntfUcwNLRPfTBnshpt4uQBf3T8fexkbzhtR18oHvim-YvcWfC5eLGQmWHYiVwACa_C7oGqx51ijK2LRbUg4TKhnZX2X3Ld9xvr3HsosKh2UXn_Ay8nuvdfH-U6S7btT6a-AIFlt3BpqZP0EOl7rY-ie8nXoA13xX6BoyzYiNcugdYCU6czQcmTIJ1JLS0zohi4aTNehRt-1VMRpIMx7q7Ouq3Zhbi7RcDo-_D8FPRhWc2eEKd-h8ebFTIxEOrkguBIomjEFTf3SfYbOB_h-14v9Q2yz-NzyId3-ujRCQGC0hn-cixe2w",
+      "e": "AQAB",
+      "x5c": [
+        "MIICmzCCAYMCBgF4BKAxqzANBgkqhkiG9w0BAQsFADARMQ8wDQYDVQQDDAZtYXN0ZXIwHhcNMjEwMzA1MjMwMDEwWhcNMzEwMzA1MjMwMTUwWjARMQ8wDQYDVQQDDAZtYXN0ZXIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCnfY3uOop8xQHr90pjW5nfCLPM+UEBeD1eue19RzA0tE99MGeyGm3i5AF/dPx97GRvOG1HXyge+Kb5i9xZ8Ll4sZCZYdiJXAAJr8LugarHnWKMrYtFtSDhMqGdlfZfct33G+vceyiwqHZRef8DLye6918f5TpLtu1Ppr4AgWW3cGmpk/QQ6Xutj6J7ydegDXfFfoGjLNiI1y6B1gJTpzNByZMgnUktLTOiGLhpM16FG37VUxGkgzHurs66rdmFuLtFwOj78PwU9GFZzZ4Qp36Hx5sVMjEQ6uSC4EiiaMQVN/dJ9hs4H+H7Xi/1DbLP43PIh3f66NEJAYLSGf5yLF7bAgMBAAEwDQYJKoZIhvcNAQELBQADggEBAHVWNBTExqlg4LTcyhUXI5U0iNPcMIVdKDoGPDc3EPjXyYNyjURX0oZ6b1Wv5t+XGmpZRqJNYb92xraQatIzLEsRn4IrmzViP+dIyFU8BEDubixTxeqx7LSw2j6LIFnZ05XdmWknlksNTlqi4CT6KL+1c24+QU3CcmU3mkQEIPA2yC4SdAB1oXI0jh49uP6a+JrE7JREZGAdwbIpZ1cqV6acPiJW3tOYfLrHwo7KYn3KwJvIBHXgFBNwx7fl2gYNQ0VEGKub3qVwW5RO5R/6Tcla9uZEfEiamms/Pn4hFA1qbsNHtA9IRGVRSmVeBKDxRvo0fxOUXp+NuZxEnhsoP3I="
+      ],
+      "x5t": "f1l1fxICz1fe9mI-sSrtc19EDhU",
+      "x5t#S256": "NUJWRA4ADpLEg_SMkSoE4FKQN0H1Tlz85L-i7puVcqQ"
+    },
+    {
+      "kid": "Lx1FmayP2YBtxaqS1SKJRJGiXRKnw2ov5WmYIMG-BLE",
+      "kty": "RSA",
+      "alg": "PS384",
+      "use": "sig",
+      "n": "q7WM4SnrdzlFSo_A1DRhc-8Ho-pBsfs49kGRbw3O_OKFIUyZrzHaRuovW_QaEAyiO3HX8CNcGPcpHdmpl4DhTGEBLcd6xXtCaa65ct00Mq7ZHCRRCrKLh6lJ0rY9fP8vCV0RBigpkNoRfrqLQQN4VeVFTbGSrDaS0LzPbap0-q5FKXUR-OQmQEtOupXhKFQtbB73tL83YnG6Swl7nXsx54ulEoDzcCCYt7pjCVVp7L9fzI2_ucTdtQclAJVQZGKpsx7vabOJuiMUwuAIz56lOJyXRMePsW8UogwC4FA2A52STsYlhOPsDEW4iIExFVNqs-CGoDGhYLIavaCkZhXM0w",
+      "e": "AQAB",
+      "x5c": [
+        "MIICmzCCAYMCBgF4HR+9XjANBgkqhkiG9w0BAQsFADARMQ8wDQYDVQQDDAZtYXN0ZXIwHhcNMjEwMzEwMTcxMDIyWhcNMzEwMzEwMTcxMjAyWjARMQ8wDQYDVQQDDAZtYXN0ZXIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCrtYzhKet3OUVKj8DUNGFz7wej6kGx+zj2QZFvDc784oUhTJmvMdpG6i9b9BoQDKI7cdfwI1wY9ykd2amXgOFMYQEtx3rFe0Jprrly3TQyrtkcJFEKsouHqUnStj18/y8JXREGKCmQ2hF+uotBA3hV5UVNsZKsNpLQvM9tqnT6rkUpdRH45CZAS066leEoVC1sHve0vzdicbpLCXudezHni6USgPNwIJi3umMJVWnsv1/Mjb+5xN21ByUAlVBkYqmzHu9ps4m6IxTC4AjPnqU4nJdEx4+xbxSiDALgUDYDnZJOxiWE4+wMRbiIgTEVU2qz4IagMaFgshq9oKRmFczTAgMBAAEwDQYJKoZIhvcNAQELBQADggEBADTgP3SrcG3p9XUB7sM4a2IeY0J4bSEtqlZBuHgdgekYJ5DXETJ3hV/82GjitU50NBup0IJyI9KZ0KCwqHIKC2Jn/6biOpM9Ipk4BtNVzx3qKNsDac9qZmyMpm4V9QuWakajknerhwyynG3siGUntbPmLvf5UKvKtbiKlWS4dBPwfedIUnC85mYEnNKSzSI1NiM6TWHB9zQYkARXlb89sh0HBYs08BfRMyBVM+l3OczIyGeQAfhcL+pxPP/0jqPr1ctHUBj2zXkjZxDw1oJFgeD9GDtPcjc3spB20vsRtQUBlzbJElbGflqWGHJK5l5n7gNd3ZXZT0HJ+wUpPE8EUaM="
+      ],
+      "x5t": "fjRYR1986VCLzbaZaw5r25UKahw",
+      "x5t#S256": "ZHNHpizlsjD3qSZh7gJQQBu8W9jBL2HR0y7-3u2Wb-g"
+    },
+    {
+      "kid": "gnmAfvmlsi3kKH3VlM1AJ85P2hekQ8ON_XvJqs3xPD8",
+      "kty": "RSA",
+      "alg": "RS384",
+      "use": "sig",
+      "n": "qUNQewKl3APQcbpACMNJ2XphPpupt395z6OZvj5CW9tiRXY3J7dqi8U0bWoIhtmmc7Js6hjp-A5W_FVStuXlT1hLyjJsHeu9ZVPnfIl2MnYN83zQBKw8E4mFsVv0UXNvkVPBF_k0yXrz-ABleWLOgFGnkNU9csc3Z5aihHcwRmC_oS7PZ9Vc-l0xBCyF3YRHI-al8ppSHwFreOweF3-JP3poNAXd906_tjX2KlHSJmNqcUNiSfEluyCp02ALlRFKXUQ1HlfSupHcHySDlanfUyIzZgM9ysCvC1vfNdAuwZ44oUBMul_XPxxhzlewL2Y8PtSDLUDWGTIou8M8049D8Q",
+      "e": "AQAB",
+      "x5c": [
+        "MIICmzCCAYMCBgF4BJVfaDANBgkqhkiG9w0BAQsFADARMQ8wDQYDVQQDDAZtYXN0ZXIwHhcNMjEwMzA1MjI0ODIxWhcNMzEwMzA1MjI1MDAxWjARMQ8wDQYDVQQDDAZtYXN0ZXIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCpQ1B7AqXcA9BxukAIw0nZemE+m6m3f3nPo5m+PkJb22JFdjcnt2qLxTRtagiG2aZzsmzqGOn4Dlb8VVK25eVPWEvKMmwd671lU+d8iXYydg3zfNAErDwTiYWxW/RRc2+RU8EX+TTJevP4AGV5Ys6AUaeQ1T1yxzdnlqKEdzBGYL+hLs9n1Vz6XTEELIXdhEcj5qXymlIfAWt47B4Xf4k/emg0Bd33Tr+2NfYqUdImY2pxQ2JJ8SW7IKnTYAuVEUpdRDUeV9K6kdwfJIOVqd9TIjNmAz3KwK8LW9810C7BnjihQEy6X9c/HGHOV7AvZjw+1IMtQNYZMii7wzzTj0PxAgMBAAEwDQYJKoZIhvcNAQELBQADggEBABoThxhMd7Xiq4x0GJeoJFv2yDKXCL3dJEAEWtOr2+PqdeJl/ZfOxBXynIvrdtYnQdICztN5ydEgDsZ02piDsxZ+s/0SA0iqjw/MEoBYobmr8V+xwUv+WtRLpTBXqWGMuG7NEtrbjKid0iKLLAOAU4dcHQ49iOF9VLnbTkf1EXp4iphJreaubOXMwT6/JDzQPT1dRR34hlhYeKKzMSA0Cz5aYL1tI+eH12rar0MDczXykLChNS/8MlyTzreEf0siUiS9S1kj/lOZKQDg9E/z8fm5vmHEHzAVwf4ON5iO29tDsqLw7BeJqC4AESjliXIqMrdpFynfPnIsGgf3dnph5BM="
+      ],
+      "x5t": "CmRnQVduZWtEsdOC4mauUUsSWxA",
+      "x5t#S256": "BvC0LmuM8ZIApN3TQQZWWbGO-d082Ah5d3D6vPvahGw"
+    },
+    {
+      "kid": "CGt0ZWS4Lc5faiKSdi0tU0fjCAdvGROQRGU9iR7tV0A",
+      "kty": "EC",
+      "alg": "ES256",
+      "use": "sig",
+      "crv": "P-256",
+      "x": "DPW7n9yjfE6Rt-VvVmEdeu4QdW44qifocAPPDxACDDY",
+      "y": "-ejsVw8222-hg2dJWx3QV0hE4-I0Ujp7ZsWebE68JE0"
+    },
+    {
+      "kid": "C65q0EKQyhpd1m4fr7SKO2He_nAxgCtAdws64d2BLt8",
+      "kty": "RSA",
+      "alg": "RS256",
+      "use": "sig",
+      "n": "ja99ybDrLvw11Z4CvNlDI-kkqJEBpSnvDf0pZF2DvBlvYmeVYL_ChqIe8E9GyHUmLMdtO_jifSgOqE5b8vILwi1kZnJR7N857uEnbWM9YTeevi_RZ-E_hr4frW2NKJ78YGvCzwLKG2GgtSjj0zuTLnSaK8fCGzqXgy6paXNhgHUSZgGwvO0YItpMlyJeqEj1wGTWz1IyA1sguF1cC7K0fojPbPoBwrhvaAeoGRPLraE0rrBsQv8iiLwnRBIez9B1j0NiUG8Iad953Y7UzaKOAw8crIEK45NIK_yxHUpxqcHLjPIcRyIyJGioRyGK7cp-_7iPLOCutQc-u46mom1_ZQ",
+      "e": "AQAB",
+      "x5c": [
+        "MIICmzCCAYMCBgF4BJRpbzANBgkqhkiG9w0BAQsFADARMQ8wDQYDVQQDDAZtYXN0ZXIwHhcNMjEwMzA1MjI0NzE4WhcNMzEwMzA1MjI0ODU4WjARMQ8wDQYDVQQDDAZtYXN0ZXIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCNr33JsOsu/DXVngK82UMj6SSokQGlKe8N/SlkXYO8GW9iZ5Vgv8KGoh7wT0bIdSYsx207+OJ9KA6oTlvy8gvCLWRmclHs3znu4SdtYz1hN56+L9Fn4T+Gvh+tbY0onvxga8LPAsobYaC1KOPTO5MudJorx8IbOpeDLqlpc2GAdRJmAbC87Rgi2kyXIl6oSPXAZNbPUjIDWyC4XVwLsrR+iM9s+gHCuG9oB6gZE8utoTSusGxC/yKIvCdEEh7P0HWPQ2JQbwhp33ndjtTNoo4DDxysgQrjk0gr/LEdSnGpwcuM8hxHIjIkaKhHIYrtyn7/uI8s4K61Bz67jqaibX9lAgMBAAEwDQYJKoZIhvcNAQELBQADggEBAHrGJFhVNiQupIwkn2jiW/jBobm9CHUxOwQL5E7WdRz5uaOJ0v62PrynOQE9xim9Qk8bT3q7DThZs66U9bpIk3msKVRgXRfn5FZy1H5RKOlEEFZhGakPqSlC1yPbhUNhHXMs3GTzdGMLtYaGvSy6XM/8/zqVqVwgh6BpbAR9RfiSdyaiNTSBriu+n/tHW934G9J8UIzdfpVcb0Yt9y4o0UgIXt64NtGFq7zmNJijH88AxBZFB6eUUmQQCczebzoAjyYbVOes5gGFzboVWcyLe3iyD0vvsAVHJViXeiGoxhpKnc8ryISpRUBzsKngf5uZo3bnrD9PHLYBoGOHgzII1xw="
+      ],
+      "x5t": "5GNr3LeRXHWI4YR8-QTSsF98oTI",
+      "x5t#S256": "Dgd0_wZZqvRuf4GEISPNHREX-1ixTMIsrPeGzk0bCxs"
+    },
+    {
+      "kty": "OKP",
+      "d": "TJ0UPkOZDPfneEDSH2ETbLQWjrALD-BPZQR-E7mgPvY",
+      "use": "sig",
+      "crv": "Ed25519",
+      "kid": "Q56A",
+      "x": "iZli54E2SkbrOvAThwrnxn1AMIOaazi_ckl6B-hbDK8"
+    },
+    {
+      "kty": "oct",
+      "use": "sig",
+      "kid": "hmac",
+      "k": "V_8Ob8dVs6JuZx6expyjShoUgFgxoaovGjmGhesL2jA"
+    },
+    {
+      "e": "AQAB",
+      "use": "enc",
+      "kid": "kidWithBadUse",
+      "kty": "RSA",
+      "n": "znO8fsURSvghcjbMu2nysqZhsreTkj-y46YL39kctmlj7-qqVLuvTUtw0XvsxwLi9WWczz_BsAm2Rn6LzyhvXUXjj6uMP8tk-HhWc4RMXP-esqB7y6WUmR8SioT94SykuVhWMDxwkg7kXTg_GWEYibEFJ7YM16vVZ2Na5z2vRfMRy7VARXRhDrinJmW0B-oY9FurPTyaZSDqOr-3Qkhk1jm9-6ygFsOkmnd4Ljnq28t8hq_4k3bdZSolZv11boQS8vDO-Fo_2YoQVxm4YMIjcr8bxZcali2slOEytEC5ItOKTPA_CydM62sJubw7MuTrOKh6GJrq0xnw6MtqR46-MQ"
+    }
+  ]
+}
@@ -0,0 +1,247 @@
+package keyfunc
+
+import (
+	"bytes"
+	"context"
+	"errors"
+	"fmt"
+	"net/http"
+	"sync"
+	"time"
+)
+
+var (
+	// ErrRefreshImpossible is returned when a refresh is attempted on a JWKS that was not created from a remote
+	// resource.
+	ErrRefreshImpossible = errors.New("refresh impossible: JWKS was not created from a remote resource")
+
+	// defaultRefreshTimeout is the default duration for the context used to create the HTTP request for a refresh of
+	// the JWKS.
+	defaultRefreshTimeout = time.Minute
+)
+
+// Get loads the JWKS at the given URL.
+func Get(jwksURL string, options Options) (jwks *JWKS, err error) {
+	jwks = &JWKS{
+		jwksURL: jwksURL,
+	}
+
+	applyOptions(jwks, options)
+
+	if jwks.client == nil {
+		jwks.client = http.DefaultClient
+	}
+	if jwks.requestFactory == nil {
+		jwks.requestFactory = defaultRequestFactory
+	}
+	if jwks.responseExtractor == nil {
+		jwks.responseExtractor = ResponseExtractorStatusOK
+	}
+	if jwks.refreshTimeout == 0 {
+		jwks.refreshTimeout = defaultRefreshTimeout
+	}
+	if !options.JWKUseNoWhitelist && len(jwks.jwkUseWhitelist) == 0 {
+		jwks.jwkUseWhitelist = map[JWKUse]struct{}{
+			UseOmitted:   {},
+			UseSignature: {},
+		}
+	}
+
+	err = jwks.refresh()
+	if err != nil {
+		if options.TolerateInitialJWKHTTPError {
+			if jwks.refreshErrorHandler != nil {
+				jwks.refreshErrorHandler(err)
+			}
+			jwks.keys = make(map[string]parsedJWK)
+		} else {
+			return nil, err
+		}
+	}
+
+	if jwks.refreshInterval != 0 || jwks.refreshUnknownKID {
+		if jwks.ctx == nil {
+			jwks.ctx = context.Background()
+		}
+		jwks.ctx, jwks.cancel = context.WithCancel(jwks.ctx)
+		jwks.refreshRequests = make(chan refreshRequest, 1)
+		go jwks.backgroundRefresh()
+	}
+
+	return jwks, nil
+}
+
+// Refresh manually refreshes the JWKS with the remote resource. It can bypass the rate limit if configured to do so.
+// This function will return an ErrRefreshImpossible if the JWKS was created from a static source like given keys or raw
+// JSON, because there is no remote resource to refresh from.
+//
+// This function will block until the refresh is finished or an error occurs.
+func (j *JWKS) Refresh(ctx context.Context, options RefreshOptions) error {
+	if j.jwksURL == "" {
+		return ErrRefreshImpossible
+	}
+
+	// Check if the background goroutine was launched.
+	if j.refreshInterval != 0 || j.refreshUnknownKID {
+		ctx, cancel := context.WithCancel(ctx)
+
+		req := refreshRequest{
+			cancel:          cancel,
+			ignoreRateLimit: options.IgnoreRateLimit,
+		}
+
+		select {
+		case <-ctx.Done():
+			return fmt.Errorf("failed to send request refresh to background goroutine: %w", j.ctx.Err())
+		case j.refreshRequests <- req:
+		}
+
+		<-ctx.Done()
+
+		if !errors.Is(ctx.Err(), context.Canceled) {
+			return fmt.Errorf("unexpected keyfunc background refresh context error: %w", ctx.Err())
+		}
+	} else {
+		err := j.refresh()
+		if err != nil {
+			return fmt.Errorf("failed to refresh JWKS: %w", err)
+		}
+	}
+
+	return nil
+}
+
+// backgroundRefresh is meant to be a separate goroutine that will update the keys in a JWKS over a given interval of
+// time.
+func (j *JWKS) backgroundRefresh() {
+	var lastRefresh time.Time
+	var queueOnce sync.Once
+	var refreshMux sync.Mutex
+	if j.refreshRateLimit != 0 {
+		lastRefresh = time.Now().Add(-j.refreshRateLimit)
+	}
+
+	// Create a channel that will never send anything unless there is a refresh interval.
+	refreshInterval := make(<-chan time.Time)
+
+	refresh := func() {
+		err := j.refresh()
+		if err != nil && j.refreshErrorHandler != nil {
+			j.refreshErrorHandler(err)
+		}
+		lastRefresh = time.Now()
+	}
+
+	// Enter an infinite loop that ends when the background ends.
+	for {
+		if j.refreshInterval != 0 {
+			refreshInterval = time.After(j.refreshInterval)
+		}
+
+		select {
+		case <-refreshInterval:
+			select {
+			case <-j.ctx.Done():
+				return
+			case j.refreshRequests <- refreshRequest{}:
+			default: // If the j.refreshRequests channel is full, don't send another request.
+			}
+
+		case req := <-j.refreshRequests:
+			refreshMux.Lock()
+			if req.ignoreRateLimit {
+				refresh()
+			} else if j.refreshRateLimit != 0 && lastRefresh.Add(j.refreshRateLimit).After(time.Now()) {
+				// Launch a goroutine that will get a reservation for a JWKS refresh or fail to and immediately return.
+				queueOnce.Do(func() {
+					go func() {
+						refreshMux.Lock()
+						wait := time.Until(lastRefresh.Add(j.refreshRateLimit))
+						refreshMux.Unlock()
+						select {
+						case <-j.ctx.Done():
+							return
+						case <-time.After(wait):
+						}
+
+						refreshMux.Lock()
+						defer refreshMux.Unlock()
+						refresh()
+						queueOnce = sync.Once{}
+					}()
+				})
+			} else {
+				refresh()
+			}
+			if req.cancel != nil {
+				req.cancel()
+			}
+			refreshMux.Unlock()
+
+		// Clean up this goroutine when its context expires.
+		case <-j.ctx.Done():
+			return
+		}
+	}
+}
+
+func defaultRequestFactory(ctx context.Context, url string) (*http.Request, error) {
+	return http.NewRequestWithContext(ctx, http.MethodGet, url, bytes.NewReader(nil))
+}
+
+// refresh does an HTTP GET on the JWKS URL to rebuild the JWKS.
+func (j *JWKS) refresh() (err error) {
+	var ctx context.Context
+	var cancel context.CancelFunc
+	if j.ctx != nil {
+		ctx, cancel = context.WithTimeout(j.ctx, j.refreshTimeout)
+	} else {
+		ctx, cancel = context.WithTimeout(context.Background(), j.refreshTimeout)
+	}
+	defer cancel()
+
+	req, err := j.requestFactory(ctx, j.jwksURL)
+	if err != nil {
+		return fmt.Errorf("failed to create request via factory function: %w", err)
+	}
+
+	resp, err := j.client.Do(req)
+	if err != nil {
+		return err
+	}
+
+	jwksBytes, err := j.responseExtractor(ctx, resp)
+	if err != nil {
+		return fmt.Errorf("failed to extract response via extractor function: %w", err)
+	}
+
+	// Only reprocess if the JWKS has changed.
+	if len(jwksBytes) != 0 && bytes.Equal(jwksBytes, j.raw) {
+		return nil
+	}
+	j.raw = jwksBytes
+
+	updated, err := NewJSON(jwksBytes)
+	if err != nil {
+		return err
+	}
+
+	j.mux.Lock()
+	defer j.mux.Unlock()
+	j.keys = updated.keys
+
+	if j.givenKeys != nil {
+		for kid, key := range j.givenKeys {
+			// Only overwrite the key if configured to do so.
+			if !j.givenKIDOverride {
+				if _, ok := j.keys[kid]; ok {
+					continue
+				}
+			}
+
+			j.keys[kid] = parsedJWK{public: key.inter}
+		}
+	}
+
+	return nil
+}
@@ -0,0 +1,115 @@
+package keyfunc
+
+import (
+	"crypto/ecdsa"
+	"crypto/ed25519"
+	"crypto/rsa"
+	"encoding/json"
+)
+
+// GivenKey represents a cryptographic key that resides in a JWKS. In conjuncture with Options.
+type GivenKey struct {
+	algorithm string
+	inter     interface{}
+}
+
+// GivenKeyOptions represents the configuration options for a GivenKey.
+type GivenKeyOptions struct {
+	// Algorithm is the given key's signing algorithm. Its value will be compared to unverified tokens' "alg" header.
+	//
+	// See RFC 8725 Section 3.1 for details.
+	// https://www.rfc-editor.org/rfc/rfc8725#section-3.1
+	//
+	// For a list of possible values, please see:
+	// https://www.rfc-editor.org/rfc/rfc7518#section-3.1
+	// https://www.iana.org/assignments/jose/jose.xhtml#web-signature-encryption-algorithms
+	Algorithm string
+}
+
+// NewGiven creates a JWKS from a map of given keys.
+func NewGiven(givenKeys map[string]GivenKey) (jwks *JWKS) {
+	keys := make(map[string]parsedJWK)
+
+	for kid, given := range givenKeys {
+		keys[kid] = parsedJWK{
+			algorithm: given.algorithm,
+			public:    given.inter,
+		}
+	}
+
+	return &JWKS{
+		keys: keys,
+	}
+}
+
+// NewGivenCustom creates a new GivenKey given an untyped variable. The key argument is expected to be a type supported
+// by the jwt package used.
+//
+// Consider the options carefully as each field may have a security implication.
+//
+// See the https://pkg.go.dev/github.com/golang-jwt/jwt/v5#RegisterSigningMethod function for registering an unsupported
+// signing method.
+func NewGivenCustom(key interface{}, options GivenKeyOptions) (givenKey GivenKey) {
+	return GivenKey{
+		algorithm: options.Algorithm,
+		inter:     key,
+	}
+}
+
+// NewGivenECDSA creates a new GivenKey given an ECDSA public key.
+//
+// Consider the options carefully as each field may have a security implication.
+func NewGivenECDSA(key *ecdsa.PublicKey, options GivenKeyOptions) (givenKey GivenKey) {
+	return GivenKey{
+		algorithm: options.Algorithm,
+		inter:     key,
+	}
+}
+
+// NewGivenEdDSA creates a new GivenKey given an EdDSA public key.
+//
+// Consider the options carefully as each field may have a security implication.
+func NewGivenEdDSA(key ed25519.PublicKey, options GivenKeyOptions) (givenKey GivenKey) {
+	return GivenKey{
+		algorithm: options.Algorithm,
+		inter:     key,
+	}
+}
+
+// NewGivenHMAC creates a new GivenKey given an HMAC key in a byte slice.
+//
+// Consider the options carefully as each field may have a security implication.
+func NewGivenHMAC(key []byte, options GivenKeyOptions) (givenKey GivenKey) {
+	return GivenKey{
+		algorithm: options.Algorithm,
+		inter:     key,
+	}
+}
+
+// NewGivenRSA creates a new GivenKey given an RSA public key.
+//
+// Consider the options carefully as each field may have a security implication.
+func NewGivenRSA(key *rsa.PublicKey, options GivenKeyOptions) (givenKey GivenKey) {
+	return GivenKey{
+		algorithm: options.Algorithm,
+		inter:     key,
+	}
+}
+
+// NewGivenKeysFromJSON parses a raw JSON message into a map of key IDs (`kid`) to GivenKeys. The returned map is
+// suitable for passing to `NewGiven()` or as `Options.GivenKeys` to `Get()`
+func NewGivenKeysFromJSON(jwksBytes json.RawMessage) (map[string]GivenKey, error) {
+	// Parse by making a temporary JWKS instance. No need to lock its map since it doesn't escape this function.
+	j, err := NewJSON(jwksBytes)
+	if err != nil {
+		return nil, err
+	}
+	keys := make(map[string]GivenKey, len(j.keys))
+	for kid, cryptoKey := range j.keys {
+		keys[kid] = GivenKey{
+			algorithm: cryptoKey.algorithm,
+			inter:     cryptoKey.public,
+		}
+	}
+	return keys, nil
+}
@@ -0,0 +1,239 @@
+package keyfunc
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net/http"
+	"sync"
+	"time"
+)
+
+var (
+	// ErrJWKAlgMismatch indicates that the given JWK was found, but its "alg" parameter's value did not match that of
+	// the JWT.
+	ErrJWKAlgMismatch = errors.New(`the given JWK was found, but its "alg" parameter's value did not match the expected algorithm`)
+
+	// ErrJWKUseWhitelist indicates that the given JWK was found, but its "use" parameter's value was not whitelisted.
+	ErrJWKUseWhitelist = errors.New(`the given JWK was found, but its "use" parameter's value was not whitelisted`)
+
+	// ErrKIDNotFound indicates that the given key ID was not found in the JWKS.
+	ErrKIDNotFound = errors.New("the given key ID was not found in the JWKS")
+
+	// ErrMissingAssets indicates there are required assets are missing to create a public key.
+	ErrMissingAssets = errors.New("required assets are missing to create a public key")
+)
+
+// ErrorHandler is a function signature that consumes an error.
+type ErrorHandler func(err error)
+
+const (
+	// UseEncryption is a JWK "use" parameter value indicating the JSON Web Key is to be used for encryption.
+	UseEncryption JWKUse = "enc"
+	// UseOmitted is a JWK "use" parameter value that was not specified or was empty.
+	UseOmitted JWKUse = ""
+	// UseSignature is a JWK "use" parameter value indicating the JSON Web Key is to be used for signatures.
+	UseSignature JWKUse = "sig"
+)
+
+// JWKUse is a set of values for the "use" parameter of a JWK.
+// See https://tools.ietf.org/html/rfc7517#section-4.2.
+type JWKUse string
+
+// jsonWebKey represents a JSON Web Key inside a JWKS.
+type jsonWebKey struct {
+	Algorithm string `json:"alg"`
+	Curve     string `json:"crv"`
+	Exponent  string `json:"e"`
+	K         string `json:"k"`
+	ID        string `json:"kid"`
+	Modulus   string `json:"n"`
+	Type      string `json:"kty"`
+	Use       string `json:"use"`
+	X         string `json:"x"`
+	Y         string `json:"y"`
+}
+
+// parsedJWK represents a JSON Web Key parsed with fields as the correct Go types.
+type parsedJWK struct {
+	algorithm string
+	public    interface{}
+	use       JWKUse
+}
+
+// JWKS represents a JSON Web Key Set (JWK Set).
+type JWKS struct {
+	jwkUseWhitelist     map[JWKUse]struct{}
+	cancel              context.CancelFunc
+	client              *http.Client
+	ctx                 context.Context
+	raw                 []byte
+	givenKeys           map[string]GivenKey
+	givenKIDOverride    bool
+	jwksURL             string
+	keys                map[string]parsedJWK
+	mux                 sync.RWMutex
+	refreshErrorHandler ErrorHandler
+	refreshInterval     time.Duration
+	refreshRateLimit    time.Duration
+	refreshRequests     chan refreshRequest
+	refreshTimeout      time.Duration
+	refreshUnknownKID   bool
+	requestFactory      func(ctx context.Context, url string) (*http.Request, error)
+	responseExtractor   func(ctx context.Context, resp *http.Response) (json.RawMessage, error)
+}
+
+// rawJWKS represents a JWKS in JSON format.
+type rawJWKS struct {
+	Keys []*jsonWebKey `json:"keys"`
+}
+
+// NewJSON creates a new JWKS from a raw JSON message.
+func NewJSON(jwksBytes json.RawMessage) (jwks *JWKS, err error) {
+	var rawKS rawJWKS
+	err = json.Unmarshal(jwksBytes, &rawKS)
+	if err != nil {
+		return nil, err
+	}
+
+	// Iterate through the keys in the raw JWKS. Add them to the JWKS.
+	jwks = &JWKS{
+		keys: make(map[string]parsedJWK, len(rawKS.Keys)),
+	}
+	for _, key := range rawKS.Keys {
+		var keyInter interface{}
+		switch keyType := key.Type; keyType {
+		case ktyEC:
+			keyInter, err = key.ECDSA()
+			if err != nil {
+				continue
+			}
+		case ktyOKP:
+			keyInter, err = key.EdDSA()
+			if err != nil {
+				continue
+			}
+		case ktyOct:
+			keyInter, err = key.Oct()
+			if err != nil {
+				continue
+			}
+		case ktyRSA:
+			keyInter, err = key.RSA()
+			if err != nil {
+				continue
+			}
+		default:
+			// Ignore unknown key types silently.
+			continue
+		}
+
+		jwks.keys[key.ID] = parsedJWK{
+			algorithm: key.Algorithm,
+			use:       JWKUse(key.Use),
+			public:    keyInter,
+		}
+	}
+
+	return jwks, nil
+}
+
+// EndBackground ends the background goroutine to update the JWKS. It can only happen once and is only effective if the
+// JWKS has a background goroutine refreshing the JWKS keys.
+func (j *JWKS) EndBackground() {
+	if j.cancel != nil {
+		j.cancel()
+	}
+}
+
+// KIDs returns the key IDs (`kid`) for all keys in the JWKS.
+func (j *JWKS) KIDs() (kids []string) {
+	j.mux.RLock()
+	defer j.mux.RUnlock()
+	kids = make([]string, len(j.keys))
+	index := 0
+	for kid := range j.keys {
+		kids[index] = kid
+		index++
+	}
+	return kids
+}
+
+// Len returns the number of keys in the JWKS.
+func (j *JWKS) Len() int {
+	j.mux.RLock()
+	defer j.mux.RUnlock()
+	return len(j.keys)
+}
+
+// RawJWKS returns a copy of the raw JWKS received from the given JWKS URL.
+func (j *JWKS) RawJWKS() []byte {
+	j.mux.RLock()
+	defer j.mux.RUnlock()
+	raw := make([]byte, len(j.raw))
+	copy(raw, j.raw)
+	return raw
+}
+
+// ReadOnlyKeys returns a read-only copy of the mapping of key IDs (`kid`) to cryptographic keys.
+func (j *JWKS) ReadOnlyKeys() map[string]interface{} {
+	keys := make(map[string]interface{})
+	j.mux.Lock()
+	for kid, cryptoKey := range j.keys {
+		keys[kid] = cryptoKey.public
+	}
+	j.mux.Unlock()
+	return keys
+}
+
+// getKey gets the jsonWebKey from the given KID from the JWKS. It may refresh the JWKS if configured to.
+func (j *JWKS) getKey(alg, kid string) (jsonKey interface{}, err error) {
+	j.mux.RLock()
+	pubKey, ok := j.keys[kid]
+	j.mux.RUnlock()
+
+	if !ok {
+		if !j.refreshUnknownKID {
+			return nil, ErrKIDNotFound
+		}
+
+		ctx, cancel := context.WithCancel(j.ctx)
+		req := refreshRequest{
+			cancel: cancel,
+		}
+
+		// Refresh the JWKS.
+		select {
+		case <-j.ctx.Done():
+			return
+		case j.refreshRequests <- req:
+		default:
+			// If the j.refreshRequests channel is full, return the error early.
+			return nil, ErrKIDNotFound
+		}
+
+		// Wait for the JWKS refresh to finish.
+		<-ctx.Done()
+
+		j.mux.RLock()
+		defer j.mux.RUnlock()
+		if pubKey, ok = j.keys[kid]; !ok {
+			return nil, ErrKIDNotFound
+		}
+	}
+
+	// jwkUseWhitelist might be empty if the jwks was from keyfunc.NewJSON() or if JWKUseNoWhitelist option was true.
+	if len(j.jwkUseWhitelist) > 0 {
+		_, ok = j.jwkUseWhitelist[pubKey.use]
+		if !ok {
+			return nil, fmt.Errorf(`%w: JWK "use" parameter value %q is not whitelisted`, ErrJWKUseWhitelist, pubKey.use)
+		}
+	}
+
+	if pubKey.algorithm != "" && pubKey.algorithm != alg {
+		return nil, fmt.Errorf(`%w: JWK "alg" parameter value %q does not match token "alg" parameter value %q`, ErrJWKAlgMismatch, pubKey.algorithm, alg)
+	}
+
+	return pubKey.public, nil
+}
@@ -0,0 +1,59 @@
+package keyfunc
+
+import (
+	"encoding/base64"
+	"errors"
+	"fmt"
+	"strings"
+
+	"github.com/golang-jwt/jwt/v5"
+)
+
+var (
+	// ErrKID indicates that the JWT had an invalid kid.
+	ErrKID = errors.New("the JWT has an invalid kid")
+)
+
+// Keyfunc matches the signature of github.com/golang-jwt/jwt/v5's jwt.Keyfunc function.
+func (j *JWKS) Keyfunc(token *jwt.Token) (interface{}, error) {
+	kid, alg, err := kidAlg(token)
+	if err != nil {
+		return nil, err
+	}
+	return j.getKey(alg, kid)
+}
+
+// Keyfunc matches the signature of github.com/golang-jwt/jwt/v5's jwt.Keyfunc function.
+func (m *MultipleJWKS) Keyfunc(token *jwt.Token) (interface{}, error) {
+	return m.keySelector(m, token)
+}
+
+func kidAlg(token *jwt.Token) (kid, alg string, err error) {
+	kidInter, ok := token.Header["kid"]
+	if !ok {
+		return "", "", fmt.Errorf("%w: could not find kid in JWT header", ErrKID)
+	}
+	kid, ok = kidInter.(string)
+	if !ok {
+		return "", "", fmt.Errorf("%w: could not convert kid in JWT header to string", ErrKID)
+	}
+	alg, ok = token.Header["alg"].(string)
+	if !ok {
+		// For test coverage purposes, this should be impossible to reach because the JWT package rejects a token
+		// without an alg parameter in the header before calling jwt.Keyfunc.
+		return "", "", fmt.Errorf(`%w: the JWT header did not contain the "alg" parameter, which is required by RFC 7515 section 4.1.1`, ErrJWKAlgMismatch)
+	}
+	return kid, alg, nil
+}
+
+// base64urlTrailingPadding removes trailing padding before decoding a string from base64url. Some non-RFC compliant
+// JWKS contain padding at the end values for base64url encoded public keys.
+//
+// Trailing padding is required to be removed from base64url encoded keys.
+// RFC 7517 defines base64url the same as RFC 7515 Section 2:
+// https://datatracker.ietf.org/doc/html/rfc7517#section-1.1
+// https://datatracker.ietf.org/doc/html/rfc7515#section-2
+func base64urlTrailingPadding(s string) ([]byte, error) {
+	s = strings.TrimRight(s, "=")
+	return base64.RawURLEncoding.DecodeString(s)
+}
@@ -0,0 +1,72 @@
+package keyfunc
+
+import (
+	"errors"
+	"fmt"
+
+	"github.com/golang-jwt/jwt/v5"
+)
+
+// ErrMultipleJWKSSize is returned when the number of JWKS given are not enough to make a MultipleJWKS.
+var ErrMultipleJWKSSize = errors.New("multiple JWKS must have one or more remote JWK Set resources")
+
+// MultipleJWKS manages multiple JWKS and has a field for jwt.Keyfunc.
+type MultipleJWKS struct {
+	keySelector func(multiJWKS *MultipleJWKS, token *jwt.Token) (key interface{}, err error)
+	sets        map[string]*JWKS // No lock is required because this map is read-only after initialization.
+}
+
+// GetMultiple creates a new MultipleJWKS. A map of length one or more JWKS URLs to Options is required.
+//
+// Be careful when choosing Options for each JWKS in the map. If RefreshUnknownKID is set to true for all JWKS in the
+// map then many refresh requests would take place each time a JWT is processed, this should be rate limited by
+// RefreshRateLimit.
+func GetMultiple(multiple map[string]Options, options MultipleOptions) (multiJWKS *MultipleJWKS, err error) {
+	if len(multiple) < 1 {
+		return nil, fmt.Errorf("multiple JWKS must have one or more remote JWK Set resources: %w", ErrMultipleJWKSSize)
+	}
+
+	if options.KeySelector == nil {
+		options.KeySelector = KeySelectorFirst
+	}
+
+	multiJWKS = &MultipleJWKS{
+		sets:        make(map[string]*JWKS, len(multiple)),
+		keySelector: options.KeySelector,
+	}
+
+	for u, opts := range multiple {
+		jwks, err := Get(u, opts)
+		if err != nil {
+			return nil, fmt.Errorf("failed to get JWKS from %q: %w", u, err)
+		}
+		multiJWKS.sets[u] = jwks
+	}
+
+	return multiJWKS, nil
+}
+
+// JWKSets returns a copy of the map of JWK Sets. The map itself is a copy, but the JWKS are not and should be treated
+// as read-only.
+func (m *MultipleJWKS) JWKSets() map[string]*JWKS {
+	sets := make(map[string]*JWKS, len(m.sets))
+	for u, jwks := range m.sets {
+		sets[u] = jwks
+	}
+	return sets
+}
+
+// KeySelectorFirst returns the first key found in the multiple JWK Sets.
+func KeySelectorFirst(multiJWKS *MultipleJWKS, token *jwt.Token) (key interface{}, err error) {
+	kid, alg, err := kidAlg(token)
+	if err != nil {
+		return nil, err
+	}
+	for _, jwks := range multiJWKS.sets {
+		key, err = jwks.getKey(alg, kid)
+		if err == nil {
+			return key, nil
+		}
+	}
+	return nil, fmt.Errorf("failed to find key ID in multiple JWKS: %w", ErrKIDNotFound)
+}
@@ -0,0 +1,28 @@
+package keyfunc
+
+import (
+	"fmt"
+)
+
+const (
+	// ktyOct is the key type (kty) in the JWT header for oct.
+	ktyOct = "oct"
+)
+
+// Oct parses a jsonWebKey and turns it into a raw byte slice (octet). This includes HMAC keys.
+func (j *jsonWebKey) Oct() (publicKey []byte, err error) {
+	if j.K == "" {
+		return nil, fmt.Errorf("%w: %s", ErrMissingAssets, ktyOct)
+	}
+
+	// Decode the octet key from Base64.
+	//
+	// According to RFC 7517, this is Base64 URL bytes.
+	// https://datatracker.ietf.org/doc/html/rfc7517#section-1.1
+	publicKey, err = base64urlTrailingPadding(j.K)
+	if err != nil {
+		return nil, err
+	}
+
+	return publicKey, nil
+}
@@ -0,0 +1,165 @@
+package keyfunc
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"time"
+
+	"github.com/golang-jwt/jwt/v5"
+)
+
+// ErrInvalidHTTPStatusCode indicates that the HTTP status code is invalid.
+var ErrInvalidHTTPStatusCode = errors.New("invalid HTTP status code")
+
+// Options represents the configuration options for a JWKS.
+//
+// If either RefreshInterval is non-zero or RefreshUnknownKID is true, then a background goroutine will be launched to refresh the
+// remote JWKS under the specified circumstances.
+//
+// When using a background refresh goroutine, make sure to use RefreshRateLimit if paired with RefreshUnknownKID. Also
+// make sure to end the background refresh goroutine with the JWKS.EndBackground method when it's no longer needed.
+type Options struct {
+	// Client is the HTTP client used to get the JWKS via HTTP.
+	Client *http.Client
+
+	// Ctx is the context for the keyfunc's background refresh. When the context expires or is canceled, the background
+	// goroutine will end.
+	Ctx context.Context
+
+	// GivenKeys is a map of JWT key IDs, `kid`, to their given keys. If the JWKS has a background refresh goroutine,
+	// these values persist across JWKS refreshes. By default, if the remote JWKS resource contains a key with the same
+	// `kid` any given keys with the same `kid` will be overwritten by the keys from the remote JWKS. Use the
+	// GivenKIDOverride option to flip this behavior.
+	GivenKeys map[string]GivenKey
+
+	// GivenKIDOverride will make a GivenKey override any keys with the same ID (`kid`) in the remote JWKS. The is only
+	// effectual if GivenKeys is provided.
+	GivenKIDOverride bool
+
+	// JWKUseWhitelist is a whitelist of JWK `use` parameter values that will restrict what keys can be returned for
+	// jwt.Keyfunc. The assumption is that jwt.Keyfunc is only used for JWT signature verification.
+	// The default behavior is to only return a JWK if its `use` parameter has the value `"sig"`, an empty string, or if
+	// the parameter was omitted entirely.
+	JWKUseWhitelist []JWKUse
+
+	// JWKUseNoWhitelist overrides the JWKUseWhitelist field and its default behavior. If set to true, all JWKs will be
+	// returned regardless of their `use` parameter value.
+	JWKUseNoWhitelist bool
+
+	// RefreshErrorHandler is a function that consumes errors that happen during a JWKS refresh. This is only effectual
+	// if a background refresh goroutine is active.
+	RefreshErrorHandler ErrorHandler
+
+	// RefreshInterval is the duration to refresh the JWKS in the background via a new HTTP request. If this is not zero,
+	// then a background goroutine will be used to refresh the JWKS once per the given interval. Make sure to call the
+	// JWKS.EndBackground method to end this goroutine when it's no longer needed.
+	RefreshInterval time.Duration
+
+	// RefreshRateLimit limits the rate at which refresh requests are granted. Only one refresh request can be queued
+	// at a time any refresh requests received while there is already a queue are ignored. It does not make sense to
+	// have RefreshInterval's value shorter than this.
+	RefreshRateLimit time.Duration
+
+	// RefreshTimeout is the duration for the context timeout used to create the HTTP request for a refresh of the JWKS.
+	// This defaults to one minute. This is used for the HTTP request and any background goroutine refreshes.
+	RefreshTimeout time.Duration
+
+	// RefreshUnknownKID indicates that the JWKS refresh request will occur every time a kid that isn't cached is seen.
+	// This is done through a background goroutine. Without specifying a RefreshInterval a malicious client could
+	// self-sign X JWTs, send them to this service, then cause potentially high network usage proportional to X. Make
+	// sure to call the JWKS.EndBackground method to end this goroutine when it's no longer needed.
+	//
+	// It is recommended this option is not used when in MultipleJWKS. This is because KID collisions SHOULD be uncommon
+	// meaning nearly any JWT SHOULD trigger a refresh for the number of JWKS in the MultipleJWKS minus one.
+	RefreshUnknownKID bool
+
+	// RequestFactory creates HTTP requests for the remote JWKS resource located at the given url. For example, an
+	// HTTP header could be added to indicate a User-Agent.
+	RequestFactory func(ctx context.Context, url string) (*http.Request, error)
+
+	// ResponseExtractor consumes a *http.Response and produces the raw JSON for the JWKS. By default, the
+	// ResponseExtractorStatusOK function is used. The default behavior changed in v1.4.0.
+	ResponseExtractor func(ctx context.Context, resp *http.Response) (json.RawMessage, error)
+
+	// TolerateInitialJWKHTTPError will tolerate any error from the initial HTTP JWKS request. If an error occurs,
+	// the RefreshErrorHandler will be given the error. The program will continue to run as if the error did not occur
+	// and a valid JWK Set with no keys was received in the response. This allows for the background goroutine to
+	// request the JWKS at a later time.
+	//
+	// It does not make sense to mark this field as true unless the background refresh goroutine is active.
+	TolerateInitialJWKHTTPError bool
+}
+
+// MultipleOptions is used to configure the behavior when multiple JWKS are used by MultipleJWKS.
+type MultipleOptions struct {
+	// KeySelector is a function that selects the key to use for a given token. It will be used in the implementation
+	// for jwt.Keyfunc. If implementing this custom selector extract the key ID and algorithm from the token's header.
+	// Use the key ID to select a token and confirm the key's algorithm before returning it.
+	//
+	// This value defaults to KeySelectorFirst.
+	KeySelector func(multiJWKS *MultipleJWKS, token *jwt.Token) (key interface{}, err error)
+}
+
+// RefreshOptions are used to specify manual refresh behavior.
+type RefreshOptions struct {
+	IgnoreRateLimit bool
+}
+
+type refreshRequest struct {
+	cancel          context.CancelFunc
+	ignoreRateLimit bool
+}
+
+// ResponseExtractorStatusOK is meant to be used as the ResponseExtractor field for Options. It confirms that response
+// status code is 200 OK and returns the raw JSON from the response body.
+func ResponseExtractorStatusOK(ctx context.Context, resp *http.Response) (json.RawMessage, error) {
+	//goland:noinspection GoUnhandledErrorResult
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("%w: %d", ErrInvalidHTTPStatusCode, resp.StatusCode)
+	}
+	return io.ReadAll(resp.Body)
+}
+
+// ResponseExtractorStatusAny is meant to be used as the ResponseExtractor field for Options. It returns the raw JSON
+// from the response body regardless of the response status code.
+func ResponseExtractorStatusAny(ctx context.Context, resp *http.Response) (json.RawMessage, error) {
+	//goland:noinspection GoUnhandledErrorResult
+	defer resp.Body.Close()
+	return io.ReadAll(resp.Body)
+}
+
+// applyOptions applies the given options to the given JWKS.
+func applyOptions(jwks *JWKS, options Options) {
+	if options.Ctx != nil {
+		jwks.ctx, jwks.cancel = context.WithCancel(options.Ctx)
+	}
+
+	if options.GivenKeys != nil {
+		jwks.givenKeys = make(map[string]GivenKey)
+		for kid, key := range options.GivenKeys {
+			jwks.givenKeys[kid] = key
+		}
+	}
+
+	if !options.JWKUseNoWhitelist {
+		jwks.jwkUseWhitelist = make(map[JWKUse]struct{})
+		for _, use := range options.JWKUseWhitelist {
+			jwks.jwkUseWhitelist[use] = struct{}{}
+		}
+	}
+
+	jwks.client = options.Client
+	jwks.givenKIDOverride = options.GivenKIDOverride
+	jwks.refreshErrorHandler = options.RefreshErrorHandler
+	jwks.refreshInterval = options.RefreshInterval
+	jwks.refreshRateLimit = options.RefreshRateLimit
+	jwks.refreshTimeout = options.RefreshTimeout
+	jwks.refreshUnknownKID = options.RefreshUnknownKID
+	jwks.requestFactory = options.RequestFactory
+	jwks.responseExtractor = options.ResponseExtractor
+}
@@ -0,0 +1,43 @@
+package keyfunc
+
+import (
+	"crypto/rsa"
+	"fmt"
+	"math/big"
+)
+
+const (
+	// ktyRSA is the key type (kty) in the JWT header for RSA.
+	ktyRSA = "RSA"
+)
+
+// RSA parses a jsonWebKey and turns it into an RSA public key.
+func (j *jsonWebKey) RSA() (publicKey *rsa.PublicKey, err error) {
+	if j.Exponent == "" || j.Modulus == "" {
+		return nil, fmt.Errorf("%w: %s", ErrMissingAssets, ktyRSA)
+	}
+
+	// Decode the exponent from Base64.
+	//
+	// According to RFC 7518, this is a Base64 URL unsigned integer.
+	// https://tools.ietf.org/html/rfc7518#section-6.3
+	exponent, err := base64urlTrailingPadding(j.Exponent)
+	if err != nil {
+		return nil, err
+	}
+	modulus, err := base64urlTrailingPadding(j.Modulus)
+	if err != nil {
+		return nil, err
+	}
+
+	publicKey = &rsa.PublicKey{}
+
+	// Turn the exponent into an integer.
+	//
+	// According to RFC 7517, these numbers are in big-endian format.
+	// https://tools.ietf.org/html/rfc7517#appendix-A.1
+	publicKey.E = int(big.NewInt(0).SetBytes(exponent).Uint64())
+	publicKey.N = big.NewInt(0).SetBytes(modulus)
+
+	return publicKey, nil
+}
@@ -0,0 +1,19 @@
+Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
@@ -0,0 +1,14 @@
+This package is a brotli compressor and decompressor implemented in Go.
+It was translated from the reference implementation (https://github.com/google/brotli)
+with the `c2go` tool at https://github.com/andybalholm/c2go.
+
+I have been working on new compression algorithms (not translated from C)
+in the matchfinder package.
+You can use them with the NewWriterV2 function.
+Currently they give better results than the old implementation
+(at least for compressing my test file, Newton’s *Opticks*) 
+on levels 2 to 6.
+
+I am using it in production with https://github.com/andybalholm/redwood.
+
+API documentation is found at https://pkg.go.dev/github.com/andybalholm/brotli?tab=doc.
@@ -0,0 +1,185 @@
+package brotli
+
+import (
+	"sync"
+)
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function to find backward reference copies. */
+
+func computeDistanceCode(distance uint, max_distance uint, dist_cache []int) uint {
+	if distance <= max_distance {
+		var distance_plus_3 uint = distance + 3
+		var offset0 uint = distance_plus_3 - uint(dist_cache[0])
+		var offset1 uint = distance_plus_3 - uint(dist_cache[1])
+		if distance == uint(dist_cache[0]) {
+			return 0
+		} else if distance == uint(dist_cache[1]) {
+			return 1
+		} else if offset0 < 7 {
+			return (0x9750468 >> (4 * offset0)) & 0xF
+		} else if offset1 < 7 {
+			return (0xFDB1ACE >> (4 * offset1)) & 0xF
+		} else if distance == uint(dist_cache[2]) {
+			return 2
+		} else if distance == uint(dist_cache[3]) {
+			return 3
+		}
+	}
+
+	return distance + numDistanceShortCodes - 1
+}
+
+var hasherSearchResultPool sync.Pool
+
+func createBackwardReferences(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint, params *encoderParams, hasher hasherHandle, dist_cache []int, last_insert_len *uint, commands *[]command, num_literals *uint) {
+	var max_backward_limit uint = maxBackwardLimit(params.lgwin)
+	var insert_length uint = *last_insert_len
+	var pos_end uint = position + num_bytes
+	var store_end uint
+	if num_bytes >= hasher.StoreLookahead() {
+		store_end = position + num_bytes - hasher.StoreLookahead() + 1
+	} else {
+		store_end = position
+	}
+	var random_heuristics_window_size uint = literalSpreeLengthForSparseSearch(params)
+	var apply_random_heuristics uint = position + random_heuristics_window_size
+	var gap uint = 0
+	/* Set maximum distance, see section 9.1. of the spec. */
+
+	const kMinScore uint = scoreBase + 100
+
+	/* For speed up heuristics for random data. */
+
+	/* Minimum score to accept a backward reference. */
+	hasher.PrepareDistanceCache(dist_cache)
+	sr2, _ := hasherSearchResultPool.Get().(*hasherSearchResult)
+	if sr2 == nil {
+		sr2 = &hasherSearchResult{}
+	}
+	sr, _ := hasherSearchResultPool.Get().(*hasherSearchResult)
+	if sr == nil {
+		sr = &hasherSearchResult{}
+	}
+
+	for position+hasher.HashTypeLength() < pos_end {
+		var max_length uint = pos_end - position
+		var max_distance uint = brotli_min_size_t(position, max_backward_limit)
+		sr.len = 0
+		sr.len_code_delta = 0
+		sr.distance = 0
+		sr.score = kMinScore
+		hasher.FindLongestMatch(&params.dictionary, ringbuffer, ringbuffer_mask, dist_cache, position, max_length, max_distance, gap, params.dist.max_distance, sr)
+		if sr.score > kMinScore {
+			/* Found a match. Let's look for something even better ahead. */
+			var delayed_backward_references_in_row int = 0
+			max_length--
+			for ; ; max_length-- {
+				var cost_diff_lazy uint = 175
+				if params.quality < minQualityForExtensiveReferenceSearch {
+					sr2.len = brotli_min_size_t(sr.len-1, max_length)
+				} else {
+					sr2.len = 0
+				}
+				sr2.len_code_delta = 0
+				sr2.distance = 0
+				sr2.score = kMinScore
+				max_distance = brotli_min_size_t(position+1, max_backward_limit)
+				hasher.FindLongestMatch(&params.dictionary, ringbuffer, ringbuffer_mask, dist_cache, position+1, max_length, max_distance, gap, params.dist.max_distance, sr2)
+				if sr2.score >= sr.score+cost_diff_lazy {
+					/* Ok, let's just write one byte for now and start a match from the
+					   next byte. */
+					position++
+
+					insert_length++
+					*sr = *sr2
+					delayed_backward_references_in_row++
+					if delayed_backward_references_in_row < 4 && position+hasher.HashTypeLength() < pos_end {
+						continue
+					}
+				}
+
+				break
+			}
+
+			apply_random_heuristics = position + 2*sr.len + random_heuristics_window_size
+			max_distance = brotli_min_size_t(position, max_backward_limit)
+			{
+				/* The first 16 codes are special short-codes,
+				   and the minimum offset is 1. */
+				var distance_code uint = computeDistanceCode(sr.distance, max_distance+gap, dist_cache)
+				if (sr.distance <= (max_distance + gap)) && distance_code > 0 {
+					dist_cache[3] = dist_cache[2]
+					dist_cache[2] = dist_cache[1]
+					dist_cache[1] = dist_cache[0]
+					dist_cache[0] = int(sr.distance)
+					hasher.PrepareDistanceCache(dist_cache)
+				}
+
+				*commands = append(*commands, makeCommand(&params.dist, insert_length, sr.len, sr.len_code_delta, distance_code))
+			}
+
+			*num_literals += insert_length
+			insert_length = 0
+			/* Put the hash keys into the table, if there are enough bytes left.
+			   Depending on the hasher implementation, it can push all positions
+			   in the given range or only a subset of them.
+			   Avoid hash poisoning with RLE data. */
+			{
+				var range_start uint = position + 2
+				var range_end uint = brotli_min_size_t(position+sr.len, store_end)
+				if sr.distance < sr.len>>2 {
+					range_start = brotli_min_size_t(range_end, brotli_max_size_t(range_start, position+sr.len-(sr.distance<<2)))
+				}
+
+				hasher.StoreRange(ringbuffer, ringbuffer_mask, range_start, range_end)
+			}
+
+			position += sr.len
+		} else {
+			insert_length++
+			position++
+
+			/* If we have not seen matches for a long time, we can skip some
+			   match lookups. Unsuccessful match lookups are very very expensive
+			   and this kind of a heuristic speeds up compression quite
+			   a lot. */
+			if position > apply_random_heuristics {
+				/* Going through uncompressible data, jump. */
+				if position > apply_random_heuristics+4*random_heuristics_window_size {
+					var kMargin uint = brotli_max_size_t(hasher.StoreLookahead()-1, 4)
+					/* It is quite a long time since we saw a copy, so we assume
+					   that this data is not compressible, and store hashes less
+					   often. Hashes of non compressible data are less likely to
+					   turn out to be useful in the future, too, so we store less of
+					   them to not to flood out the hash table of good compressible
+					   data. */
+
+					var pos_jump uint = brotli_min_size_t(position+16, pos_end-kMargin)
+					for ; position < pos_jump; position += 4 {
+						hasher.Store(ringbuffer, ringbuffer_mask, position)
+						insert_length += 4
+					}
+				} else {
+					var kMargin uint = brotli_max_size_t(hasher.StoreLookahead()-1, 2)
+					var pos_jump uint = brotli_min_size_t(position+8, pos_end-kMargin)
+					for ; position < pos_jump; position += 2 {
+						hasher.Store(ringbuffer, ringbuffer_mask, position)
+						insert_length += 2
+					}
+				}
+			}
+		}
+	}
+
+	insert_length += pos_end - position
+	*last_insert_len = insert_length
+
+	hasherSearchResultPool.Put(sr)
+	hasherSearchResultPool.Put(sr2)
+}
@@ -0,0 +1,796 @@
+package brotli
+
+import "math"
+
+type zopfliNode struct {
+	length              uint32
+	distance            uint32
+	dcode_insert_length uint32
+	u                   struct {
+		cost     float32
+		next     uint32
+		shortcut uint32
+	}
+}
+
+const maxEffectiveDistanceAlphabetSize = 544
+
+const kInfinity float32 = 1.7e38 /* ~= 2 ^ 127 */
+
+var kDistanceCacheIndex = []uint32{0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1}
+
+var kDistanceCacheOffset = []int{0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3}
+
+func initZopfliNodes(array []zopfliNode, length uint) {
+	var stub zopfliNode
+	var i uint
+	stub.length = 1
+	stub.distance = 0
+	stub.dcode_insert_length = 0
+	stub.u.cost = kInfinity
+	for i = 0; i < length; i++ {
+		array[i] = stub
+	}
+}
+
+func zopfliNodeCopyLength(self *zopfliNode) uint32 {
+	return self.length & 0x1FFFFFF
+}
+
+func zopfliNodeLengthCode(self *zopfliNode) uint32 {
+	var modifier uint32 = self.length >> 25
+	return zopfliNodeCopyLength(self) + 9 - modifier
+}
+
+func zopfliNodeCopyDistance(self *zopfliNode) uint32 {
+	return self.distance
+}
+
+func zopfliNodeDistanceCode(self *zopfliNode) uint32 {
+	var short_code uint32 = self.dcode_insert_length >> 27
+	if short_code == 0 {
+		return zopfliNodeCopyDistance(self) + numDistanceShortCodes - 1
+	} else {
+		return short_code - 1
+	}
+}
+
+func zopfliNodeCommandLength(self *zopfliNode) uint32 {
+	return zopfliNodeCopyLength(self) + (self.dcode_insert_length & 0x7FFFFFF)
+}
+
+/* Histogram based cost model for zopflification. */
+type zopfliCostModel struct {
+	cost_cmd_               [numCommandSymbols]float32
+	cost_dist_              []float32
+	distance_histogram_size uint32
+	literal_costs_          []float32
+	min_cost_cmd_           float32
+	num_bytes_              uint
+}
+
+func initZopfliCostModel(self *zopfliCostModel, dist *distanceParams, num_bytes uint) {
+	var distance_histogram_size uint32 = dist.alphabet_size
+	if distance_histogram_size > maxEffectiveDistanceAlphabetSize {
+		distance_histogram_size = maxEffectiveDistanceAlphabetSize
+	}
+
+	self.num_bytes_ = num_bytes
+	self.literal_costs_ = make([]float32, (num_bytes + 2))
+	self.cost_dist_ = make([]float32, (dist.alphabet_size))
+	self.distance_histogram_size = distance_histogram_size
+}
+
+func cleanupZopfliCostModel(self *zopfliCostModel) {
+	self.literal_costs_ = nil
+	self.cost_dist_ = nil
+}
+
+func setCost(histogram []uint32, histogram_size uint, literal_histogram bool, cost []float32) {
+	var sum uint = 0
+	var missing_symbol_sum uint
+	var log2sum float32
+	var missing_symbol_cost float32
+	var i uint
+	for i = 0; i < histogram_size; i++ {
+		sum += uint(histogram[i])
+	}
+
+	log2sum = float32(fastLog2(sum))
+	missing_symbol_sum = sum
+	if !literal_histogram {
+		for i = 0; i < histogram_size; i++ {
+			if histogram[i] == 0 {
+				missing_symbol_sum++
+			}
+		}
+	}
+
+	missing_symbol_cost = float32(fastLog2(missing_symbol_sum)) + 2
+	for i = 0; i < histogram_size; i++ {
+		if histogram[i] == 0 {
+			cost[i] = missing_symbol_cost
+			continue
+		}
+
+		/* Shannon bits for this symbol. */
+		cost[i] = log2sum - float32(fastLog2(uint(histogram[i])))
+
+		/* Cannot be coded with less than 1 bit */
+		if cost[i] < 1 {
+			cost[i] = 1
+		}
+	}
+}
+
+func zopfliCostModelSetFromCommands(self *zopfliCostModel, position uint, ringbuffer []byte, ringbuffer_mask uint, commands []command, last_insert_len uint) {
+	var histogram_literal [numLiteralSymbols]uint32
+	var histogram_cmd [numCommandSymbols]uint32
+	var histogram_dist [maxEffectiveDistanceAlphabetSize]uint32
+	var cost_literal [numLiteralSymbols]float32
+	var pos uint = position - last_insert_len
+	var min_cost_cmd float32 = kInfinity
+	var cost_cmd []float32 = self.cost_cmd_[:]
+	var literal_costs []float32
+
+	histogram_literal = [numLiteralSymbols]uint32{}
+	histogram_cmd = [numCommandSymbols]uint32{}
+	histogram_dist = [maxEffectiveDistanceAlphabetSize]uint32{}
+
+	for i := range commands {
+		var inslength uint = uint(commands[i].insert_len_)
+		var copylength uint = uint(commandCopyLen(&commands[i]))
+		var distcode uint = uint(commands[i].dist_prefix_) & 0x3FF
+		var cmdcode uint = uint(commands[i].cmd_prefix_)
+		var j uint
+
+		histogram_cmd[cmdcode]++
+		if cmdcode >= 128 {
+			histogram_dist[distcode]++
+		}
+
+		for j = 0; j < inslength; j++ {
+			histogram_literal[ringbuffer[(pos+j)&ringbuffer_mask]]++
+		}
+
+		pos += inslength + copylength
+	}
+
+	setCost(histogram_literal[:], numLiteralSymbols, true, cost_literal[:])
+	setCost(histogram_cmd[:], numCommandSymbols, false, cost_cmd)
+	setCost(histogram_dist[:], uint(self.distance_histogram_size), false, self.cost_dist_)
+
+	for i := 0; i < numCommandSymbols; i++ {
+		min_cost_cmd = brotli_min_float(min_cost_cmd, cost_cmd[i])
+	}
+
+	self.min_cost_cmd_ = min_cost_cmd
+	{
+		literal_costs = self.literal_costs_
+		var literal_carry float32 = 0.0
+		num_bytes := int(self.num_bytes_)
+		literal_costs[0] = 0.0
+		for i := 0; i < num_bytes; i++ {
+			literal_carry += cost_literal[ringbuffer[(position+uint(i))&ringbuffer_mask]]
+			literal_costs[i+1] = literal_costs[i] + literal_carry
+			literal_carry -= literal_costs[i+1] - literal_costs[i]
+		}
+	}
+}
+
+func zopfliCostModelSetFromLiteralCosts(self *zopfliCostModel, position uint, ringbuffer []byte, ringbuffer_mask uint) {
+	var literal_costs []float32 = self.literal_costs_
+	var literal_carry float32 = 0.0
+	var cost_dist []float32 = self.cost_dist_
+	var cost_cmd []float32 = self.cost_cmd_[:]
+	var num_bytes uint = self.num_bytes_
+	var i uint
+	estimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask, ringbuffer, literal_costs[1:])
+	literal_costs[0] = 0.0
+	for i = 0; i < num_bytes; i++ {
+		literal_carry += literal_costs[i+1]
+		literal_costs[i+1] = literal_costs[i] + literal_carry
+		literal_carry -= literal_costs[i+1] - literal_costs[i]
+	}
+
+	for i = 0; i < numCommandSymbols; i++ {
+		cost_cmd[i] = float32(fastLog2(uint(11 + uint32(i))))
+	}
+
+	for i = 0; uint32(i) < self.distance_histogram_size; i++ {
+		cost_dist[i] = float32(fastLog2(uint(20 + uint32(i))))
+	}
+
+	self.min_cost_cmd_ = float32(fastLog2(11))
+}
+
+func zopfliCostModelGetCommandCost(self *zopfliCostModel, cmdcode uint16) float32 {
+	return self.cost_cmd_[cmdcode]
+}
+
+func zopfliCostModelGetDistanceCost(self *zopfliCostModel, distcode uint) float32 {
+	return self.cost_dist_[distcode]
+}
+
+func zopfliCostModelGetLiteralCosts(self *zopfliCostModel, from uint, to uint) float32 {
+	return self.literal_costs_[to] - self.literal_costs_[from]
+}
+
+func zopfliCostModelGetMinCostCmd(self *zopfliCostModel) float32 {
+	return self.min_cost_cmd_
+}
+
+/* REQUIRES: len >= 2, start_pos <= pos */
+/* REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity */
+/* Maintains the "ZopfliNode array invariant". */
+func updateZopfliNode(nodes []zopfliNode, pos uint, start_pos uint, len uint, len_code uint, dist uint, short_code uint, cost float32) {
+	var next *zopfliNode = &nodes[pos+len]
+	next.length = uint32(len | (len+9-len_code)<<25)
+	next.distance = uint32(dist)
+	next.dcode_insert_length = uint32(short_code<<27 | (pos - start_pos))
+	next.u.cost = cost
+}
+
+type posData struct {
+	pos            uint
+	distance_cache [4]int
+	costdiff       float32
+	cost           float32
+}
+
+/* Maintains the smallest 8 cost difference together with their positions */
+type startPosQueue struct {
+	q_   [8]posData
+	idx_ uint
+}
+
+func initStartPosQueue(self *startPosQueue) {
+	self.idx_ = 0
+}
+
+func startPosQueueSize(self *startPosQueue) uint {
+	return brotli_min_size_t(self.idx_, 8)
+}
+
+func startPosQueuePush(self *startPosQueue, posdata *posData) {
+	var offset uint = ^(self.idx_) & 7
+	self.idx_++
+	var len uint = startPosQueueSize(self)
+	var i uint
+	var q []posData = self.q_[:]
+	q[offset] = *posdata
+
+	/* Restore the sorted order. In the list of |len| items at most |len - 1|
+	   adjacent element comparisons / swaps are required. */
+	for i = 1; i < len; i++ {
+		if q[offset&7].costdiff > q[(offset+1)&7].costdiff {
+			var tmp posData = q[offset&7]
+			q[offset&7] = q[(offset+1)&7]
+			q[(offset+1)&7] = tmp
+		}
+
+		offset++
+	}
+}
+
+func startPosQueueAt(self *startPosQueue, k uint) *posData {
+	return &self.q_[(k-self.idx_)&7]
+}
+
+/* Returns the minimum possible copy length that can improve the cost of any */
+/* future position. */
+func computeMinimumCopyLength(start_cost float32, nodes []zopfliNode, num_bytes uint, pos uint) uint {
+	var min_cost float32 = start_cost
+	var len uint = 2
+	var next_len_bucket uint = 4
+	/* Compute the minimum possible cost of reaching any future position. */
+
+	var next_len_offset uint = 10
+	for pos+len <= num_bytes && nodes[pos+len].u.cost <= min_cost {
+		/* We already reached (pos + len) with no more cost than the minimum
+		   possible cost of reaching anything from this pos, so there is no point in
+		   looking for lengths <= len. */
+		len++
+
+		if len == next_len_offset {
+			/* We reached the next copy length code bucket, so we add one more
+			   extra bit to the minimum cost. */
+			min_cost += 1.0
+
+			next_len_offset += next_len_bucket
+			next_len_bucket *= 2
+		}
+	}
+
+	return uint(len)
+}
+
+/* REQUIRES: nodes[pos].cost < kInfinity
+   REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
+func computeDistanceShortcut(block_start uint, pos uint, max_backward_limit uint, gap uint, nodes []zopfliNode) uint32 {
+	var clen uint = uint(zopfliNodeCopyLength(&nodes[pos]))
+	var ilen uint = uint(nodes[pos].dcode_insert_length & 0x7FFFFFF)
+	var dist uint = uint(zopfliNodeCopyDistance(&nodes[pos]))
+
+	/* Since |block_start + pos| is the end position of the command, the copy part
+	   starts from |block_start + pos - clen|. Distances that are greater than
+	   this or greater than |max_backward_limit| + |gap| are static dictionary
+	   references, and do not update the last distances.
+	   Also distance code 0 (last distance) does not update the last distances. */
+	if pos == 0 {
+		return 0
+	} else if dist+clen <= block_start+pos+gap && dist <= max_backward_limit+gap && zopfliNodeDistanceCode(&nodes[pos]) > 0 {
+		return uint32(pos)
+	} else {
+		return nodes[pos-clen-ilen].u.shortcut
+	}
+}
+
+/* Fills in dist_cache[0..3] with the last four distances (as defined by
+   Section 4. of the Spec) that would be used at (block_start + pos) if we
+   used the shortest path of commands from block_start, computed from
+   nodes[0..pos]. The last four distances at block_start are in
+   starting_dist_cache[0..3].
+   REQUIRES: nodes[pos].cost < kInfinity
+   REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
+func computeDistanceCache(pos uint, starting_dist_cache []int, nodes []zopfliNode, dist_cache []int) {
+	var idx int = 0
+	var p uint = uint(nodes[pos].u.shortcut)
+	for idx < 4 && p > 0 {
+		var ilen uint = uint(nodes[p].dcode_insert_length & 0x7FFFFFF)
+		var clen uint = uint(zopfliNodeCopyLength(&nodes[p]))
+		var dist uint = uint(zopfliNodeCopyDistance(&nodes[p]))
+		dist_cache[idx] = int(dist)
+		idx++
+
+		/* Because of prerequisite, p >= clen + ilen >= 2. */
+		p = uint(nodes[p-clen-ilen].u.shortcut)
+	}
+
+	for ; idx < 4; idx++ {
+		dist_cache[idx] = starting_dist_cache[0]
+		starting_dist_cache = starting_dist_cache[1:]
+	}
+}
+
+/* Maintains "ZopfliNode array invariant" and pushes node to the queue, if it
+   is eligible. */
+func evaluateNode(block_start uint, pos uint, max_backward_limit uint, gap uint, starting_dist_cache []int, model *zopfliCostModel, queue *startPosQueue, nodes []zopfliNode) {
+	/* Save cost, because ComputeDistanceCache invalidates it. */
+	var node_cost float32 = nodes[pos].u.cost
+	nodes[pos].u.shortcut = computeDistanceShortcut(block_start, pos, max_backward_limit, gap, nodes)
+	if node_cost <= zopfliCostModelGetLiteralCosts(model, 0, pos) {
+		var posdata posData
+		posdata.pos = pos
+		posdata.cost = node_cost
+		posdata.costdiff = node_cost - zopfliCostModelGetLiteralCosts(model, 0, pos)
+		computeDistanceCache(pos, starting_dist_cache, nodes, posdata.distance_cache[:])
+		startPosQueuePush(queue, &posdata)
+	}
+}
+
+/* Returns longest copy length. */
+func updateNodes(num_bytes uint, block_start uint, pos uint, ringbuffer []byte, ringbuffer_mask uint, params *encoderParams, max_backward_limit uint, starting_dist_cache []int, num_matches uint, matches []backwardMatch, model *zopfliCostModel, queue *startPosQueue, nodes []zopfliNode) uint {
+	var cur_ix uint = block_start + pos
+	var cur_ix_masked uint = cur_ix & ringbuffer_mask
+	var max_distance uint = brotli_min_size_t(cur_ix, max_backward_limit)
+	var max_len uint = num_bytes - pos
+	var max_zopfli_len uint = maxZopfliLen(params)
+	var max_iters uint = maxZopfliCandidates(params)
+	var min_len uint
+	var result uint = 0
+	var k uint
+	var gap uint = 0
+
+	evaluateNode(block_start, pos, max_backward_limit, gap, starting_dist_cache, model, queue, nodes)
+	{
+		var posdata *posData = startPosQueueAt(queue, 0)
+		var min_cost float32 = (posdata.cost + zopfliCostModelGetMinCostCmd(model) + zopfliCostModelGetLiteralCosts(model, posdata.pos, pos))
+		min_len = computeMinimumCopyLength(min_cost, nodes, num_bytes, pos)
+	}
+
+	/* Go over the command starting positions in order of increasing cost
+	   difference. */
+	for k = 0; k < max_iters && k < startPosQueueSize(queue); k++ {
+		var posdata *posData = startPosQueueAt(queue, k)
+		var start uint = posdata.pos
+		var inscode uint16 = getInsertLengthCode(pos - start)
+		var start_costdiff float32 = posdata.costdiff
+		var base_cost float32 = start_costdiff + float32(getInsertExtra(inscode)) + zopfliCostModelGetLiteralCosts(model, 0, pos)
+		var best_len uint = min_len - 1
+		var j uint = 0
+		/* Look for last distance matches using the distance cache from this
+		   starting position. */
+		for ; j < numDistanceShortCodes && best_len < max_len; j++ {
+			var idx uint = uint(kDistanceCacheIndex[j])
+			var backward uint = uint(posdata.distance_cache[idx] + kDistanceCacheOffset[j])
+			var prev_ix uint = cur_ix - backward
+			var len uint = 0
+			var continuation byte = ringbuffer[cur_ix_masked+best_len]
+			if cur_ix_masked+best_len > ringbuffer_mask {
+				break
+			}
+
+			if backward > max_distance+gap {
+				/* Word dictionary -> ignore. */
+				continue
+			}
+
+			if backward <= max_distance {
+				/* Regular backward reference. */
+				if prev_ix >= cur_ix {
+					continue
+				}
+
+				prev_ix &= ringbuffer_mask
+				if prev_ix+best_len > ringbuffer_mask || continuation != ringbuffer[prev_ix+best_len] {
+					continue
+				}
+
+				len = findMatchLengthWithLimit(ringbuffer[prev_ix:], ringbuffer[cur_ix_masked:], max_len)
+			} else {
+				continue
+			}
+			{
+				var dist_cost float32 = base_cost + zopfliCostModelGetDistanceCost(model, j)
+				var l uint
+				for l = best_len + 1; l <= len; l++ {
+					var copycode uint16 = getCopyLengthCode(l)
+					var cmdcode uint16 = combineLengthCodes(inscode, copycode, j == 0)
+					var tmp float32
+					if cmdcode < 128 {
+						tmp = base_cost
+					} else {
+						tmp = dist_cost
+					}
+					var cost float32 = tmp + float32(getCopyExtra(copycode)) + zopfliCostModelGetCommandCost(model, cmdcode)
+					if cost < nodes[pos+l].u.cost {
+						updateZopfliNode(nodes, pos, start, l, l, backward, j+1, cost)
+						result = brotli_max_size_t(result, l)
+					}
+
+					best_len = l
+				}
+			}
+		}
+
+		/* At higher iterations look only for new last distance matches, since
+		   looking only for new command start positions with the same distances
+		   does not help much. */
+		if k >= 2 {
+			continue
+		}
+		{
+			/* Loop through all possible copy lengths at this position. */
+			var len uint = min_len
+			for j = 0; j < num_matches; j++ {
+				var match backwardMatch = matches[j]
+				var dist uint = uint(match.distance)
+				var is_dictionary_match bool = (dist > max_distance+gap)
+				var dist_code uint = dist + numDistanceShortCodes - 1
+				var dist_symbol uint16
+				var distextra uint32
+				var distnumextra uint32
+				var dist_cost float32
+				var max_match_len uint
+				/* We already tried all possible last distance matches, so we can use
+				   normal distance code here. */
+				prefixEncodeCopyDistance(dist_code, uint(params.dist.num_direct_distance_codes), uint(params.dist.distance_postfix_bits), &dist_symbol, &distextra)
+
+				distnumextra = uint32(dist_symbol) >> 10
+				dist_cost = base_cost + float32(distnumextra) + zopfliCostModelGetDistanceCost(model, uint(dist_symbol)&0x3FF)
+
+				/* Try all copy lengths up until the maximum copy length corresponding
+				   to this distance. If the distance refers to the static dictionary, or
+				   the maximum length is long enough, try only one maximum length. */
+				max_match_len = backwardMatchLength(&match)
+
+				if len < max_match_len && (is_dictionary_match || max_match_len > max_zopfli_len) {
+					len = max_match_len
+				}
+
+				for ; len <= max_match_len; len++ {
+					var len_code uint
+					if is_dictionary_match {
+						len_code = backwardMatchLengthCode(&match)
+					} else {
+						len_code = len
+					}
+					var copycode uint16 = getCopyLengthCode(len_code)
+					var cmdcode uint16 = combineLengthCodes(inscode, copycode, false)
+					var cost float32 = dist_cost + float32(getCopyExtra(copycode)) + zopfliCostModelGetCommandCost(model, cmdcode)
+					if cost < nodes[pos+len].u.cost {
+						updateZopfliNode(nodes, pos, start, uint(len), len_code, dist, 0, cost)
+						if len > result {
+							result = len
+						}
+					}
+				}
+			}
+		}
+	}
+
+	return result
+}
+
+func computeShortestPathFromNodes(num_bytes uint, nodes []zopfliNode) uint {
+	var index uint = num_bytes
+	var num_commands uint = 0
+	for nodes[index].dcode_insert_length&0x7FFFFFF == 0 && nodes[index].length == 1 {
+		index--
+	}
+	nodes[index].u.next = math.MaxUint32
+	for index != 0 {
+		var len uint = uint(zopfliNodeCommandLength(&nodes[index]))
+		index -= uint(len)
+		nodes[index].u.next = uint32(len)
+		num_commands++
+	}
+
+	return num_commands
+}
+
+/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
+func zopfliCreateCommands(num_bytes uint, block_start uint, nodes []zopfliNode, dist_cache []int, last_insert_len *uint, params *encoderParams, commands *[]command, num_literals *uint) {
+	var max_backward_limit uint = maxBackwardLimit(params.lgwin)
+	var pos uint = 0
+	var offset uint32 = nodes[0].u.next
+	var i uint
+	var gap uint = 0
+	for i = 0; offset != math.MaxUint32; i++ {
+		var next *zopfliNode = &nodes[uint32(pos)+offset]
+		var copy_length uint = uint(zopfliNodeCopyLength(next))
+		var insert_length uint = uint(next.dcode_insert_length & 0x7FFFFFF)
+		pos += insert_length
+		offset = next.u.next
+		if i == 0 {
+			insert_length += *last_insert_len
+			*last_insert_len = 0
+		}
+		{
+			var distance uint = uint(zopfliNodeCopyDistance(next))
+			var len_code uint = uint(zopfliNodeLengthCode(next))
+			var max_distance uint = brotli_min_size_t(block_start+pos, max_backward_limit)
+			var is_dictionary bool = (distance > max_distance+gap)
+			var dist_code uint = uint(zopfliNodeDistanceCode(next))
+			*commands = append(*commands, makeCommand(&params.dist, insert_length, copy_length, int(len_code)-int(copy_length), dist_code))
+
+			if !is_dictionary && dist_code > 0 {
+				dist_cache[3] = dist_cache[2]
+				dist_cache[2] = dist_cache[1]
+				dist_cache[1] = dist_cache[0]
+				dist_cache[0] = int(distance)
+			}
+		}
+
+		*num_literals += insert_length
+		pos += copy_length
+	}
+
+	*last_insert_len += num_bytes - pos
+}
+
+func zopfliIterate(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint, params *encoderParams, gap uint, dist_cache []int, model *zopfliCostModel, num_matches []uint32, matches []backwardMatch, nodes []zopfliNode) uint {
+	var max_backward_limit uint = maxBackwardLimit(params.lgwin)
+	var max_zopfli_len uint = maxZopfliLen(params)
+	var queue startPosQueue
+	var cur_match_pos uint = 0
+	var i uint
+	nodes[0].length = 0
+	nodes[0].u.cost = 0
+	initStartPosQueue(&queue)
+	for i = 0; i+3 < num_bytes; i++ {
+		var skip uint = updateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask, params, max_backward_limit, dist_cache, uint(num_matches[i]), matches[cur_match_pos:], model, &queue, nodes)
+		if skip < longCopyQuickStep {
+			skip = 0
+		}
+		cur_match_pos += uint(num_matches[i])
+		if num_matches[i] == 1 && backwardMatchLength(&matches[cur_match_pos-1]) > max_zopfli_len {
+			skip = brotli_max_size_t(backwardMatchLength(&matches[cur_match_pos-1]), skip)
+		}
+
+		if skip > 1 {
+			skip--
+			for skip != 0 {
+				i++
+				if i+3 >= num_bytes {
+					break
+				}
+				evaluateNode(position, i, max_backward_limit, gap, dist_cache, model, &queue, nodes)
+				cur_match_pos += uint(num_matches[i])
+				skip--
+			}
+		}
+	}
+
+	return computeShortestPathFromNodes(num_bytes, nodes)
+}
+
+/* Computes the shortest path of commands from position to at most
+   position + num_bytes.
+
+   On return, path->size() is the number of commands found and path[i] is the
+   length of the i-th command (copy length plus insert length).
+   Note that the sum of the lengths of all commands can be less than num_bytes.
+
+   On return, the nodes[0..num_bytes] array will have the following
+   "ZopfliNode array invariant":
+   For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
+     (1) nodes[i].copy_length() >= 2
+     (2) nodes[i].command_length() <= i and
+     (3) nodes[i - nodes[i].command_length()].cost < kInfinity
+
+ REQUIRES: nodes != nil and len(nodes) >= num_bytes + 1 */
+func zopfliComputeShortestPath(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint, params *encoderParams, dist_cache []int, hasher *h10, nodes []zopfliNode) uint {
+	var max_backward_limit uint = maxBackwardLimit(params.lgwin)
+	var max_zopfli_len uint = maxZopfliLen(params)
+	var model zopfliCostModel
+	var queue startPosQueue
+	var matches [2 * (maxNumMatchesH10 + 64)]backwardMatch
+	var store_end uint
+	if num_bytes >= hasher.StoreLookahead() {
+		store_end = position + num_bytes - hasher.StoreLookahead() + 1
+	} else {
+		store_end = position
+	}
+	var i uint
+	var gap uint = 0
+	var lz_matches_offset uint = 0
+	nodes[0].length = 0
+	nodes[0].u.cost = 0
+	initZopfliCostModel(&model, &params.dist, num_bytes)
+	zopfliCostModelSetFromLiteralCosts(&model, position, ringbuffer, ringbuffer_mask)
+	initStartPosQueue(&queue)
+	for i = 0; i+hasher.HashTypeLength()-1 < num_bytes; i++ {
+		var pos uint = position + i
+		var max_distance uint = brotli_min_size_t(pos, max_backward_limit)
+		var skip uint
+		var num_matches uint
+		num_matches = findAllMatchesH10(hasher, &params.dictionary, ringbuffer, ringbuffer_mask, pos, num_bytes-i, max_distance, gap, params, matches[lz_matches_offset:])
+		if num_matches > 0 && backwardMatchLength(&matches[num_matches-1]) > max_zopfli_len {
+			matches[0] = matches[num_matches-1]
+			num_matches = 1
+		}
+
+		skip = updateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask, params, max_backward_limit, dist_cache, num_matches, matches[:], &model, &queue, nodes)
+		if skip < longCopyQuickStep {
+			skip = 0
+		}
+		if num_matches == 1 && backwardMatchLength(&matches[0]) > max_zopfli_len {
+			skip = brotli_max_size_t(backwardMatchLength(&matches[0]), skip)
+		}
+
+		if skip > 1 {
+			/* Add the tail of the copy to the hasher. */
+			hasher.StoreRange(ringbuffer, ringbuffer_mask, pos+1, brotli_min_size_t(pos+skip, store_end))
+
+			skip--
+			for skip != 0 {
+				i++
+				if i+hasher.HashTypeLength()-1 >= num_bytes {
+					break
+				}
+				evaluateNode(position, i, max_backward_limit, gap, dist_cache, &model, &queue, nodes)
+				skip--
+			}
+		}
+	}
+
+	cleanupZopfliCostModel(&model)
+	return computeShortestPathFromNodes(num_bytes, nodes)
+}
+
+func createZopfliBackwardReferences(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint, params *encoderParams, hasher *h10, dist_cache []int, last_insert_len *uint, commands *[]command, num_literals *uint) {
+	var nodes []zopfliNode
+	nodes = make([]zopfliNode, (num_bytes + 1))
+	initZopfliNodes(nodes, num_bytes+1)
+	zopfliComputeShortestPath(num_bytes, position, ringbuffer, ringbuffer_mask, params, dist_cache, hasher, nodes)
+	zopfliCreateCommands(num_bytes, position, nodes, dist_cache, last_insert_len, params, commands, num_literals)
+	nodes = nil
+}
+
+func createHqZopfliBackwardReferences(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint, params *encoderParams, hasher hasherHandle, dist_cache []int, last_insert_len *uint, commands *[]command, num_literals *uint) {
+	var max_backward_limit uint = maxBackwardLimit(params.lgwin)
+	var num_matches []uint32 = make([]uint32, num_bytes)
+	var matches_size uint = 4 * num_bytes
+	var store_end uint
+	if num_bytes >= hasher.StoreLookahead() {
+		store_end = position + num_bytes - hasher.StoreLookahead() + 1
+	} else {
+		store_end = position
+	}
+	var cur_match_pos uint = 0
+	var i uint
+	var orig_num_literals uint
+	var orig_last_insert_len uint
+	var orig_dist_cache [4]int
+	var orig_num_commands int
+	var model zopfliCostModel
+	var nodes []zopfliNode
+	var matches []backwardMatch = make([]backwardMatch, matches_size)
+	var gap uint = 0
+	var shadow_matches uint = 0
+	var new_array []backwardMatch
+	for i = 0; i+hasher.HashTypeLength()-1 < num_bytes; i++ {
+		var pos uint = position + i
+		var max_distance uint = brotli_min_size_t(pos, max_backward_limit)
+		var max_length uint = num_bytes - i
+		var num_found_matches uint
+		var cur_match_end uint
+		var j uint
+
+		/* Ensure that we have enough free slots. */
+		if matches_size < cur_match_pos+maxNumMatchesH10+shadow_matches {
+			var new_size uint = matches_size
+			if new_size == 0 {
+				new_size = cur_match_pos + maxNumMatchesH10 + shadow_matches
+			}
+
+			for new_size < cur_match_pos+maxNumMatchesH10+shadow_matches {
+				new_size *= 2
+			}
+
+			new_array = make([]backwardMatch, new_size)
+			if matches_size != 0 {
+				copy(new_array, matches[:matches_size])
+			}
+
+			matches = new_array
+			matches_size = new_size
+		}
+
+		num_found_matches = findAllMatchesH10(hasher.(*h10), &params.dictionary, ringbuffer, ringbuffer_mask, pos, max_length, max_distance, gap, params, matches[cur_match_pos+shadow_matches:])
+		cur_match_end = cur_match_pos + num_found_matches
+		for j = cur_match_pos; j+1 < cur_match_end; j++ {
+			assert(backwardMatchLength(&matches[j]) <= backwardMatchLength(&matches[j+1]))
+		}
+
+		num_matches[i] = uint32(num_found_matches)
+		if num_found_matches > 0 {
+			var match_len uint = backwardMatchLength(&matches[cur_match_end-1])
+			if match_len > maxZopfliLenQuality11 {
+				var skip uint = match_len - 1
+				matches[cur_match_pos] = matches[cur_match_end-1]
+				cur_match_pos++
+				num_matches[i] = 1
+
+				/* Add the tail of the copy to the hasher. */
+				hasher.StoreRange(ringbuffer, ringbuffer_mask, pos+1, brotli_min_size_t(pos+match_len, store_end))
+				var pos uint = i
+				for i := 0; i < int(skip); i++ {
+					num_matches[pos+1:][i] = 0
+				}
+				i += skip
+			} else {
+				cur_match_pos = cur_match_end
+			}
+		}
+	}
+
+	orig_num_literals = *num_literals
+	orig_last_insert_len = *last_insert_len
+	copy(orig_dist_cache[:], dist_cache[:4])
+	orig_num_commands = len(*commands)
+	nodes = make([]zopfliNode, (num_bytes + 1))
+	initZopfliCostModel(&model, &params.dist, num_bytes)
+	for i = 0; i < 2; i++ {
+		initZopfliNodes(nodes, num_bytes+1)
+		if i == 0 {
+			zopfliCostModelSetFromLiteralCosts(&model, position, ringbuffer, ringbuffer_mask)
+		} else {
+			zopfliCostModelSetFromCommands(&model, position, ringbuffer, ringbuffer_mask, (*commands)[orig_num_commands:], orig_last_insert_len)
+		}
+
+		*commands = (*commands)[:orig_num_commands]
+		*num_literals = orig_num_literals
+		*last_insert_len = orig_last_insert_len
+		copy(dist_cache, orig_dist_cache[:4])
+		zopfliIterate(num_bytes, position, ringbuffer, ringbuffer_mask, params, gap, dist_cache, &model, num_matches, matches, nodes)
+		zopfliCreateCommands(num_bytes, position, nodes, dist_cache, last_insert_len, params, commands, num_literals)
+	}
+
+	cleanupZopfliCostModel(&model)
+	nodes = nil
+	matches = nil
+	num_matches = nil
+}
@@ -0,0 +1,436 @@
+package brotli
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions to estimate the bit cost of Huffman trees. */
+func shannonEntropy(population []uint32, size uint, total *uint) float64 {
+	var sum uint = 0
+	var retval float64 = 0
+	var population_end []uint32 = population[size:]
+	var p uint
+	for -cap(population) < -cap(population_end) {
+		p = uint(population[0])
+		population = population[1:]
+		sum += p
+		retval -= float64(p) * fastLog2(p)
+	}
+
+	if sum != 0 {
+		retval += float64(sum) * fastLog2(sum)
+	}
+	*total = sum
+	return retval
+}
+
+func bitsEntropy(population []uint32, size uint) float64 {
+	var sum uint
+	var retval float64 = shannonEntropy(population, size, &sum)
+	if retval < float64(sum) {
+		/* At least one bit per literal is needed. */
+		retval = float64(sum)
+	}
+
+	return retval
+}
+
+const kOneSymbolHistogramCost float64 = 12
+const kTwoSymbolHistogramCost float64 = 20
+const kThreeSymbolHistogramCost float64 = 28
+const kFourSymbolHistogramCost float64 = 37
+
+func populationCostLiteral(histogram *histogramLiteral) float64 {
+	var data_size uint = histogramDataSizeLiteral()
+	var count int = 0
+	var s [5]uint
+	var bits float64 = 0.0
+	var i uint
+	if histogram.total_count_ == 0 {
+		return kOneSymbolHistogramCost
+	}
+
+	for i = 0; i < data_size; i++ {
+		if histogram.data_[i] > 0 {
+			s[count] = i
+			count++
+			if count > 4 {
+				break
+			}
+		}
+	}
+
+	if count == 1 {
+		return kOneSymbolHistogramCost
+	}
+
+	if count == 2 {
+		return kTwoSymbolHistogramCost + float64(histogram.total_count_)
+	}
+
+	if count == 3 {
+		var histo0 uint32 = histogram.data_[s[0]]
+		var histo1 uint32 = histogram.data_[s[1]]
+		var histo2 uint32 = histogram.data_[s[2]]
+		var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
+		return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
+	}
+
+	if count == 4 {
+		var histo [4]uint32
+		var h23 uint32
+		var histomax uint32
+		for i = 0; i < 4; i++ {
+			histo[i] = histogram.data_[s[i]]
+		}
+
+		/* Sort */
+		for i = 0; i < 4; i++ {
+			var j uint
+			for j = i + 1; j < 4; j++ {
+				if histo[j] > histo[i] {
+					var tmp uint32 = histo[j]
+					histo[j] = histo[i]
+					histo[i] = tmp
+				}
+			}
+		}
+
+		h23 = histo[2] + histo[3]
+		histomax = brotli_max_uint32_t(h23, histo[0])
+		return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
+	}
+	{
+		var max_depth uint = 1
+		var depth_histo = [codeLengthCodes]uint32{0}
+		/* In this loop we compute the entropy of the histogram and simultaneously
+		   build a simplified histogram of the code length codes where we use the
+		   zero repeat code 17, but we don't use the non-zero repeat code 16. */
+
+		var log2total float64 = fastLog2(histogram.total_count_)
+		for i = 0; i < data_size; {
+			if histogram.data_[i] > 0 {
+				var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
+				/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
+				   = log2(total_count) - log2(count(symbol)) */
+
+				var depth uint = uint(log2p + 0.5)
+				/* Approximate the bit depth by round(-log2(P(symbol))) */
+				bits += float64(histogram.data_[i]) * log2p
+
+				if depth > 15 {
+					depth = 15
+				}
+
+				if depth > max_depth {
+					max_depth = depth
+				}
+
+				depth_histo[depth]++
+				i++
+			} else {
+				var reps uint32 = 1
+				/* Compute the run length of zeros and add the appropriate number of 0
+				   and 17 code length codes to the code length code histogram. */
+
+				var k uint
+				for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
+					reps++
+				}
+
+				i += uint(reps)
+				if i == data_size {
+					/* Don't add any cost for the last zero run, since these are encoded
+					   only implicitly. */
+					break
+				}
+
+				if reps < 3 {
+					depth_histo[0] += reps
+				} else {
+					reps -= 2
+					for reps > 0 {
+						depth_histo[repeatZeroCodeLength]++
+
+						/* Add the 3 extra bits for the 17 code length code. */
+						bits += 3
+
+						reps >>= 3
+					}
+				}
+			}
+		}
+
+		/* Add the estimated encoding cost of the code length code histogram. */
+		bits += float64(18 + 2*max_depth)
+
+		/* Add the entropy of the code length code histogram. */
+		bits += bitsEntropy(depth_histo[:], codeLengthCodes)
+	}
+
+	return bits
+}
+
+func populationCostCommand(histogram *histogramCommand) float64 {
+	var data_size uint = histogramDataSizeCommand()
+	var count int = 0
+	var s [5]uint
+	var bits float64 = 0.0
+	var i uint
+	if histogram.total_count_ == 0 {
+		return kOneSymbolHistogramCost
+	}
+
+	for i = 0; i < data_size; i++ {
+		if histogram.data_[i] > 0 {
+			s[count] = i
+			count++
+			if count > 4 {
+				break
+			}
+		}
+	}
+
+	if count == 1 {
+		return kOneSymbolHistogramCost
+	}
+
+	if count == 2 {
+		return kTwoSymbolHistogramCost + float64(histogram.total_count_)
+	}
+
+	if count == 3 {
+		var histo0 uint32 = histogram.data_[s[0]]
+		var histo1 uint32 = histogram.data_[s[1]]
+		var histo2 uint32 = histogram.data_[s[2]]
+		var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
+		return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
+	}
+
+	if count == 4 {
+		var histo [4]uint32
+		var h23 uint32
+		var histomax uint32
+		for i = 0; i < 4; i++ {
+			histo[i] = histogram.data_[s[i]]
+		}
+
+		/* Sort */
+		for i = 0; i < 4; i++ {
+			var j uint
+			for j = i + 1; j < 4; j++ {
+				if histo[j] > histo[i] {
+					var tmp uint32 = histo[j]
+					histo[j] = histo[i]
+					histo[i] = tmp
+				}
+			}
+		}
+
+		h23 = histo[2] + histo[3]
+		histomax = brotli_max_uint32_t(h23, histo[0])
+		return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
+	}
+	{
+		var max_depth uint = 1
+		var depth_histo = [codeLengthCodes]uint32{0}
+		/* In this loop we compute the entropy of the histogram and simultaneously
+		   build a simplified histogram of the code length codes where we use the
+		   zero repeat code 17, but we don't use the non-zero repeat code 16. */
+
+		var log2total float64 = fastLog2(histogram.total_count_)
+		for i = 0; i < data_size; {
+			if histogram.data_[i] > 0 {
+				var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
+				/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
+				   = log2(total_count) - log2(count(symbol)) */
+
+				var depth uint = uint(log2p + 0.5)
+				/* Approximate the bit depth by round(-log2(P(symbol))) */
+				bits += float64(histogram.data_[i]) * log2p
+
+				if depth > 15 {
+					depth = 15
+				}
+
+				if depth > max_depth {
+					max_depth = depth
+				}
+
+				depth_histo[depth]++
+				i++
+			} else {
+				var reps uint32 = 1
+				/* Compute the run length of zeros and add the appropriate number of 0
+				   and 17 code length codes to the code length code histogram. */
+
+				var k uint
+				for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
+					reps++
+				}
+
+				i += uint(reps)
+				if i == data_size {
+					/* Don't add any cost for the last zero run, since these are encoded
+					   only implicitly. */
+					break
+				}
+
+				if reps < 3 {
+					depth_histo[0] += reps
+				} else {
+					reps -= 2
+					for reps > 0 {
+						depth_histo[repeatZeroCodeLength]++
+
+						/* Add the 3 extra bits for the 17 code length code. */
+						bits += 3
+
+						reps >>= 3
+					}
+				}
+			}
+		}
+
+		/* Add the estimated encoding cost of the code length code histogram. */
+		bits += float64(18 + 2*max_depth)
+
+		/* Add the entropy of the code length code histogram. */
+		bits += bitsEntropy(depth_histo[:], codeLengthCodes)
+	}
+
+	return bits
+}
+
+func populationCostDistance(histogram *histogramDistance) float64 {
+	var data_size uint = histogramDataSizeDistance()
+	var count int = 0
+	var s [5]uint
+	var bits float64 = 0.0
+	var i uint
+	if histogram.total_count_ == 0 {
+		return kOneSymbolHistogramCost
+	}
+
+	for i = 0; i < data_size; i++ {
+		if histogram.data_[i] > 0 {
+			s[count] = i
+			count++
+			if count > 4 {
+				break
+			}
+		}
+	}
+
+	if count == 1 {
+		return kOneSymbolHistogramCost
+	}
+
+	if count == 2 {
+		return kTwoSymbolHistogramCost + float64(histogram.total_count_)
+	}
+
+	if count == 3 {
+		var histo0 uint32 = histogram.data_[s[0]]
+		var histo1 uint32 = histogram.data_[s[1]]
+		var histo2 uint32 = histogram.data_[s[2]]
+		var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
+		return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
+	}
+
+	if count == 4 {
+		var histo [4]uint32
+		var h23 uint32
+		var histomax uint32
+		for i = 0; i < 4; i++ {
+			histo[i] = histogram.data_[s[i]]
+		}
+
+		/* Sort */
+		for i = 0; i < 4; i++ {
+			var j uint
+			for j = i + 1; j < 4; j++ {
+				if histo[j] > histo[i] {
+					var tmp uint32 = histo[j]
+					histo[j] = histo[i]
+					histo[i] = tmp
+				}
+			}
+		}
+
+		h23 = histo[2] + histo[3]
+		histomax = brotli_max_uint32_t(h23, histo[0])
+		return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
+	}
+	{
+		var max_depth uint = 1
+		var depth_histo = [codeLengthCodes]uint32{0}
+		/* In this loop we compute the entropy of the histogram and simultaneously
+		   build a simplified histogram of the code length codes where we use the
+		   zero repeat code 17, but we don't use the non-zero repeat code 16. */
+
+		var log2total float64 = fastLog2(histogram.total_count_)
+		for i = 0; i < data_size; {
+			if histogram.data_[i] > 0 {
+				var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
+				/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
+				   = log2(total_count) - log2(count(symbol)) */
+
+				var depth uint = uint(log2p + 0.5)
+				/* Approximate the bit depth by round(-log2(P(symbol))) */
+				bits += float64(histogram.data_[i]) * log2p
+
+				if depth > 15 {
+					depth = 15
+				}
+
+				if depth > max_depth {
+					max_depth = depth
+				}
+
+				depth_histo[depth]++
+				i++
+			} else {
+				var reps uint32 = 1
+				/* Compute the run length of zeros and add the appropriate number of 0
+				   and 17 code length codes to the code length code histogram. */
+
+				var k uint
+				for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
+					reps++
+				}
+
+				i += uint(reps)
+				if i == data_size {
+					/* Don't add any cost for the last zero run, since these are encoded
+					   only implicitly. */
+					break
+				}
+
+				if reps < 3 {
+					depth_histo[0] += reps
+				} else {
+					reps -= 2
+					for reps > 0 {
+						depth_histo[repeatZeroCodeLength]++
+
+						/* Add the 3 extra bits for the 17 code length code. */
+						bits += 3
+
+						reps >>= 3
+					}
+				}
+			}
+		}
+
+		/* Add the estimated encoding cost of the code length code histogram. */
+		bits += float64(18 + 2*max_depth)
+
+		/* Add the entropy of the code length code histogram. */
+		bits += bitsEntropy(depth_histo[:], codeLengthCodes)
+	}
+
+	return bits
+}
@@ -0,0 +1,266 @@
+package brotli
+
+import "encoding/binary"
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Bit reading helpers */
+
+const shortFillBitWindowRead = (8 >> 1)
+
+var kBitMask = [33]uint32{
+	0x00000000,
+	0x00000001,
+	0x00000003,
+	0x00000007,
+	0x0000000F,
+	0x0000001F,
+	0x0000003F,
+	0x0000007F,
+	0x000000FF,
+	0x000001FF,
+	0x000003FF,
+	0x000007FF,
+	0x00000FFF,
+	0x00001FFF,
+	0x00003FFF,
+	0x00007FFF,
+	0x0000FFFF,
+	0x0001FFFF,
+	0x0003FFFF,
+	0x0007FFFF,
+	0x000FFFFF,
+	0x001FFFFF,
+	0x003FFFFF,
+	0x007FFFFF,
+	0x00FFFFFF,
+	0x01FFFFFF,
+	0x03FFFFFF,
+	0x07FFFFFF,
+	0x0FFFFFFF,
+	0x1FFFFFFF,
+	0x3FFFFFFF,
+	0x7FFFFFFF,
+	0xFFFFFFFF,
+}
+
+func bitMask(n uint32) uint32 {
+	return kBitMask[n]
+}
+
+type bitReader struct {
+	val_      uint64
+	bit_pos_  uint32
+	input     []byte
+	input_len uint
+	byte_pos  uint
+}
+
+type bitReaderState struct {
+	val_      uint64
+	bit_pos_  uint32
+	input     []byte
+	input_len uint
+	byte_pos  uint
+}
+
+/* Initializes the BrotliBitReader fields. */
+
+/* Ensures that accumulator is not empty.
+   May consume up to sizeof(brotli_reg_t) - 1 bytes of input.
+   Returns false if data is required but there is no input available.
+   For BROTLI_ALIGNED_READ this function also prepares bit reader for aligned
+   reading. */
+func bitReaderSaveState(from *bitReader, to *bitReaderState) {
+	to.val_ = from.val_
+	to.bit_pos_ = from.bit_pos_
+	to.input = from.input
+	to.input_len = from.input_len
+	to.byte_pos = from.byte_pos
+}
+
+func bitReaderRestoreState(to *bitReader, from *bitReaderState) {
+	to.val_ = from.val_
+	to.bit_pos_ = from.bit_pos_
+	to.input = from.input
+	to.input_len = from.input_len
+	to.byte_pos = from.byte_pos
+}
+
+func getAvailableBits(br *bitReader) uint32 {
+	return 64 - br.bit_pos_
+}
+
+/* Returns amount of unread bytes the bit reader still has buffered from the
+   BrotliInput, including whole bytes in br->val_. */
+func getRemainingBytes(br *bitReader) uint {
+	return uint(uint32(br.input_len-br.byte_pos) + (getAvailableBits(br) >> 3))
+}
+
+/* Checks if there is at least |num| bytes left in the input ring-buffer
+   (excluding the bits remaining in br->val_). */
+func checkInputAmount(br *bitReader, num uint) bool {
+	return br.input_len-br.byte_pos >= num
+}
+
+/* Guarantees that there are at least |n_bits| + 1 bits in accumulator.
+   Precondition: accumulator contains at least 1 bit.
+   |n_bits| should be in the range [1..24] for regular build. For portable
+   non-64-bit little-endian build only 16 bits are safe to request. */
+func fillBitWindow(br *bitReader, n_bits uint32) {
+	if br.bit_pos_ >= 32 {
+		br.val_ >>= 32
+		br.bit_pos_ ^= 32 /* here same as -= 32 because of the if condition */
+		br.val_ |= (uint64(binary.LittleEndian.Uint32(br.input[br.byte_pos:]))) << 32
+		br.byte_pos += 4
+	}
+}
+
+/* Mostly like BrotliFillBitWindow, but guarantees only 16 bits and reads no
+   more than BROTLI_SHORT_FILL_BIT_WINDOW_READ bytes of input. */
+func fillBitWindow16(br *bitReader) {
+	fillBitWindow(br, 17)
+}
+
+/* Tries to pull one byte of input to accumulator.
+   Returns false if there is no input available. */
+func pullByte(br *bitReader) bool {
+	if br.byte_pos == br.input_len {
+		return false
+	}
+
+	br.val_ >>= 8
+	br.val_ |= (uint64(br.input[br.byte_pos])) << 56
+	br.bit_pos_ -= 8
+	br.byte_pos++
+	return true
+}
+
+/* Returns currently available bits.
+   The number of valid bits could be calculated by BrotliGetAvailableBits. */
+func getBitsUnmasked(br *bitReader) uint64 {
+	return br.val_ >> br.bit_pos_
+}
+
+/* Like BrotliGetBits, but does not mask the result.
+   The result contains at least 16 valid bits. */
+func get16BitsUnmasked(br *bitReader) uint32 {
+	fillBitWindow(br, 16)
+	return uint32(getBitsUnmasked(br))
+}
+
+/* Returns the specified number of bits from |br| without advancing bit
+   position. */
+func getBits(br *bitReader, n_bits uint32) uint32 {
+	fillBitWindow(br, n_bits)
+	return uint32(getBitsUnmasked(br)) & bitMask(n_bits)
+}
+
+/* Tries to peek the specified amount of bits. Returns false, if there
+   is not enough input. */
+func safeGetBits(br *bitReader, n_bits uint32, val *uint32) bool {
+	for getAvailableBits(br) < n_bits {
+		if !pullByte(br) {
+			return false
+		}
+	}
+
+	*val = uint32(getBitsUnmasked(br)) & bitMask(n_bits)
+	return true
+}
+
+/* Advances the bit pos by |n_bits|. */
+func dropBits(br *bitReader, n_bits uint32) {
+	br.bit_pos_ += n_bits
+}
+
+func bitReaderUnload(br *bitReader) {
+	var unused_bytes uint32 = getAvailableBits(br) >> 3
+	var unused_bits uint32 = unused_bytes << 3
+	br.byte_pos -= uint(unused_bytes)
+	if unused_bits == 64 {
+		br.val_ = 0
+	} else {
+		br.val_ <<= unused_bits
+	}
+
+	br.bit_pos_ += unused_bits
+}
+
+/* Reads the specified number of bits from |br| and advances the bit pos.
+   Precondition: accumulator MUST contain at least |n_bits|. */
+func takeBits(br *bitReader, n_bits uint32, val *uint32) {
+	*val = uint32(getBitsUnmasked(br)) & bitMask(n_bits)
+	dropBits(br, n_bits)
+}
+
+/* Reads the specified number of bits from |br| and advances the bit pos.
+   Assumes that there is enough input to perform BrotliFillBitWindow. */
+func readBits(br *bitReader, n_bits uint32) uint32 {
+	var val uint32
+	fillBitWindow(br, n_bits)
+	takeBits(br, n_bits, &val)
+	return val
+}
+
+/* Tries to read the specified amount of bits. Returns false, if there
+   is not enough input. |n_bits| MUST be positive. */
+func safeReadBits(br *bitReader, n_bits uint32, val *uint32) bool {
+	for getAvailableBits(br) < n_bits {
+		if !pullByte(br) {
+			return false
+		}
+	}
+
+	takeBits(br, n_bits, val)
+	return true
+}
+
+/* Advances the bit reader position to the next byte boundary and verifies
+   that any skipped bits are set to zero. */
+func bitReaderJumpToByteBoundary(br *bitReader) bool {
+	var pad_bits_count uint32 = getAvailableBits(br) & 0x7
+	var pad_bits uint32 = 0
+	if pad_bits_count != 0 {
+		takeBits(br, pad_bits_count, &pad_bits)
+	}
+
+	return pad_bits == 0
+}
+
+/* Copies remaining input bytes stored in the bit reader to the output. Value
+   |num| may not be larger than BrotliGetRemainingBytes. The bit reader must be
+   warmed up again after this. */
+func copyBytes(dest []byte, br *bitReader, num uint) {
+	for getAvailableBits(br) >= 8 && num > 0 {
+		dest[0] = byte(getBitsUnmasked(br))
+		dropBits(br, 8)
+		dest = dest[1:]
+		num--
+	}
+
+	copy(dest, br.input[br.byte_pos:][:num])
+	br.byte_pos += num
+}
+
+func initBitReader(br *bitReader) {
+	br.val_ = 0
+	br.bit_pos_ = 64
+}
+
+func warmupBitReader(br *bitReader) bool {
+	/* Fixing alignment after unaligned BrotliFillWindow would result accumulator
+	   overflow. If unalignment is caused by BrotliSafeReadBits, then there is
+	   enough space in accumulator to fix alignment. */
+	if getAvailableBits(br) == 0 {
+		if !pullByte(br) {
+			return false
+		}
+	}
+
+	return true
+}
@@ -0,0 +1,56 @@
+package brotli
+
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Write bits into a byte array. */
+
+type bitWriter struct {
+	dst []byte
+
+	// Data waiting to be written is the low nbits of bits.
+	bits  uint64
+	nbits uint
+}
+
+func (w *bitWriter) writeBits(nb uint, b uint64) {
+	w.bits |= b << w.nbits
+	w.nbits += nb
+	if w.nbits >= 32 {
+		bits := w.bits
+		w.bits >>= 32
+		w.nbits -= 32
+		w.dst = append(w.dst,
+			byte(bits),
+			byte(bits>>8),
+			byte(bits>>16),
+			byte(bits>>24),
+		)
+	}
+}
+
+func (w *bitWriter) writeSingleBit(bit bool) {
+	if bit {
+		w.writeBits(1, 1)
+	} else {
+		w.writeBits(1, 0)
+	}
+}
+
+func (w *bitWriter) jumpToByteBoundary() {
+	dst := w.dst
+	for w.nbits != 0 {
+		dst = append(dst, byte(w.bits))
+		w.bits >>= 8
+		if w.nbits > 8 { // Avoid underflow
+			w.nbits -= 8
+		} else {
+			w.nbits = 0
+		}
+	}
+	w.bits = 0
+	w.dst = dst
+}
@@ -0,0 +1,144 @@
+package brotli
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Block split point selection utilities. */
+
+type blockSplit struct {
+	num_types          uint
+	num_blocks         uint
+	types              []byte
+	lengths            []uint32
+	types_alloc_size   uint
+	lengths_alloc_size uint
+}
+
+const (
+	kMaxLiteralHistograms        uint    = 100
+	kMaxCommandHistograms        uint    = 50
+	kLiteralBlockSwitchCost      float64 = 28.1
+	kCommandBlockSwitchCost      float64 = 13.5
+	kDistanceBlockSwitchCost     float64 = 14.6
+	kLiteralStrideLength         uint    = 70
+	kCommandStrideLength         uint    = 40
+	kSymbolsPerLiteralHistogram  uint    = 544
+	kSymbolsPerCommandHistogram  uint    = 530
+	kSymbolsPerDistanceHistogram uint    = 544
+	kMinLengthForBlockSplitting  uint    = 128
+	kIterMulForRefining          uint    = 2
+	kMinItersForRefining         uint    = 100
+)
+
+func countLiterals(cmds []command) uint {
+	var total_length uint = 0
+	/* Count how many we have. */
+
+	for i := range cmds {
+		total_length += uint(cmds[i].insert_len_)
+	}
+
+	return total_length
+}
+
+func copyLiteralsToByteArray(cmds []command, data []byte, offset uint, mask uint, literals []byte) {
+	var pos uint = 0
+	var from_pos uint = offset & mask
+	for i := range cmds {
+		var insert_len uint = uint(cmds[i].insert_len_)
+		if from_pos+insert_len > mask {
+			var head_size uint = mask + 1 - from_pos
+			copy(literals[pos:], data[from_pos:][:head_size])
+			from_pos = 0
+			pos += head_size
+			insert_len -= head_size
+		}
+
+		if insert_len > 0 {
+			copy(literals[pos:], data[from_pos:][:insert_len])
+			pos += insert_len
+		}
+
+		from_pos = uint((uint32(from_pos+insert_len) + commandCopyLen(&cmds[i])) & uint32(mask))
+	}
+}
+
+func myRand(seed *uint32) uint32 {
+	/* Initial seed should be 7. In this case, loop length is (1 << 29). */
+	*seed *= 16807
+
+	return *seed
+}
+
+func bitCost(count uint) float64 {
+	if count == 0 {
+		return -2.0
+	} else {
+		return fastLog2(count)
+	}
+}
+
+const histogramsPerBatch = 64
+
+const clustersPerBatch = 16
+
+func initBlockSplit(self *blockSplit) {
+	self.num_types = 0
+	self.num_blocks = 0
+	self.types = self.types[:0]
+	self.lengths = self.lengths[:0]
+	self.types_alloc_size = 0
+	self.lengths_alloc_size = 0
+}
+
+func splitBlock(cmds []command, data []byte, pos uint, mask uint, params *encoderParams, literal_split *blockSplit, insert_and_copy_split *blockSplit, dist_split *blockSplit) {
+	{
+		var literals_count uint = countLiterals(cmds)
+		var literals []byte = make([]byte, literals_count)
+
+		/* Create a continuous array of literals. */
+		copyLiteralsToByteArray(cmds, data, pos, mask, literals)
+
+		/* Create the block split on the array of literals.
+		   Literal histograms have alphabet size 256. */
+		splitByteVectorLiteral(literals, literals_count, kSymbolsPerLiteralHistogram, kMaxLiteralHistograms, kLiteralStrideLength, kLiteralBlockSwitchCost, params, literal_split)
+
+		literals = nil
+	}
+	{
+		var insert_and_copy_codes []uint16 = make([]uint16, len(cmds))
+		/* Compute prefix codes for commands. */
+
+		for i := range cmds {
+			insert_and_copy_codes[i] = cmds[i].cmd_prefix_
+		}
+
+		/* Create the block split on the array of command prefixes. */
+		splitByteVectorCommand(insert_and_copy_codes, kSymbolsPerCommandHistogram, kMaxCommandHistograms, kCommandStrideLength, kCommandBlockSwitchCost, params, insert_and_copy_split)
+
+		/* TODO: reuse for distances? */
+
+		insert_and_copy_codes = nil
+	}
+	{
+		var distance_prefixes []uint16 = make([]uint16, len(cmds))
+		var j uint = 0
+		/* Create a continuous array of distance prefixes. */
+
+		for i := range cmds {
+			var cmd *command = &cmds[i]
+			if commandCopyLen(cmd) != 0 && cmd.cmd_prefix_ >= 128 {
+				distance_prefixes[j] = cmd.dist_prefix_ & 0x3FF
+				j++
+			}
+		}
+
+		/* Create the block split on the array of distance prefixes. */
+		splitByteVectorDistance(distance_prefixes, j, kSymbolsPerDistanceHistogram, kMaxCommandHistograms, kCommandStrideLength, kDistanceBlockSwitchCost, params, dist_split)
+
+		distance_prefixes = nil
+	}
+}
@@ -0,0 +1,434 @@
+package brotli
+
+import "math"
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+func initialEntropyCodesCommand(data []uint16, length uint, stride uint, num_histograms uint, histograms []histogramCommand) {
+	var seed uint32 = 7
+	var block_length uint = length / num_histograms
+	var i uint
+	clearHistogramsCommand(histograms, num_histograms)
+	for i = 0; i < num_histograms; i++ {
+		var pos uint = length * i / num_histograms
+		if i != 0 {
+			pos += uint(myRand(&seed) % uint32(block_length))
+		}
+
+		if pos+stride >= length {
+			pos = length - stride - 1
+		}
+
+		histogramAddVectorCommand(&histograms[i], data[pos:], stride)
+	}
+}
+
+func randomSampleCommand(seed *uint32, data []uint16, length uint, stride uint, sample *histogramCommand) {
+	var pos uint = 0
+	if stride >= length {
+		stride = length
+	} else {
+		pos = uint(myRand(seed) % uint32(length-stride+1))
+	}
+
+	histogramAddVectorCommand(sample, data[pos:], stride)
+}
+
+func refineEntropyCodesCommand(data []uint16, length uint, stride uint, num_histograms uint, histograms []histogramCommand) {
+	var iters uint = kIterMulForRefining*length/stride + kMinItersForRefining
+	var seed uint32 = 7
+	var iter uint
+	iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms
+	for iter = 0; iter < iters; iter++ {
+		var sample histogramCommand
+		histogramClearCommand(&sample)
+		randomSampleCommand(&seed, data, length, stride, &sample)
+		histogramAddHistogramCommand(&histograms[iter%num_histograms], &sample)
+	}
+}
+
+/* Assigns a block id from the range [0, num_histograms) to each data element
+   in data[0..length) and fills in block_id[0..length) with the assigned values.
+   Returns the number of blocks, i.e. one plus the number of block switches. */
+func findBlocksCommand(data []uint16, length uint, block_switch_bitcost float64, num_histograms uint, histograms []histogramCommand, insert_cost []float64, cost []float64, switch_signal []byte, block_id []byte) uint {
+	var data_size uint = histogramDataSizeCommand()
+	var bitmaplen uint = (num_histograms + 7) >> 3
+	var num_blocks uint = 1
+	var i uint
+	var j uint
+	assert(num_histograms <= 256)
+	if num_histograms <= 1 {
+		for i = 0; i < length; i++ {
+			block_id[i] = 0
+		}
+
+		return 1
+	}
+
+	for i := 0; i < int(data_size*num_histograms); i++ {
+		insert_cost[i] = 0
+	}
+	for i = 0; i < num_histograms; i++ {
+		insert_cost[i] = fastLog2(uint(uint32(histograms[i].total_count_)))
+	}
+
+	for i = data_size; i != 0; {
+		i--
+		for j = 0; j < num_histograms; j++ {
+			insert_cost[i*num_histograms+j] = insert_cost[j] - bitCost(uint(histograms[j].data_[i]))
+		}
+	}
+
+	for i := 0; i < int(num_histograms); i++ {
+		cost[i] = 0
+	}
+	for i := 0; i < int(length*bitmaplen); i++ {
+		switch_signal[i] = 0
+	}
+
+	/* After each iteration of this loop, cost[k] will contain the difference
+	   between the minimum cost of arriving at the current byte position using
+	   entropy code k, and the minimum cost of arriving at the current byte
+	   position. This difference is capped at the block switch cost, and if it
+	   reaches block switch cost, it means that when we trace back from the last
+	   position, we need to switch here. */
+	for i = 0; i < length; i++ {
+		var byte_ix uint = i
+		var ix uint = byte_ix * bitmaplen
+		var insert_cost_ix uint = uint(data[byte_ix]) * num_histograms
+		var min_cost float64 = 1e99
+		var block_switch_cost float64 = block_switch_bitcost
+		var k uint
+		for k = 0; k < num_histograms; k++ {
+			/* We are coding the symbol in data[byte_ix] with entropy code k. */
+			cost[k] += insert_cost[insert_cost_ix+k]
+
+			if cost[k] < min_cost {
+				min_cost = cost[k]
+				block_id[byte_ix] = byte(k)
+			}
+		}
+
+		/* More blocks for the beginning. */
+		if byte_ix < 2000 {
+			block_switch_cost *= 0.77 + 0.07*float64(byte_ix)/2000
+		}
+
+		for k = 0; k < num_histograms; k++ {
+			cost[k] -= min_cost
+			if cost[k] >= block_switch_cost {
+				var mask byte = byte(1 << (k & 7))
+				cost[k] = block_switch_cost
+				assert(k>>3 < bitmaplen)
+				switch_signal[ix+(k>>3)] |= mask
+				/* Trace back from the last position and switch at the marked places. */
+			}
+		}
+	}
+	{
+		var byte_ix uint = length - 1
+		var ix uint = byte_ix * bitmaplen
+		var cur_id byte = block_id[byte_ix]
+		for byte_ix > 0 {
+			var mask byte = byte(1 << (cur_id & 7))
+			assert(uint(cur_id)>>3 < bitmaplen)
+			byte_ix--
+			ix -= bitmaplen
+			if switch_signal[ix+uint(cur_id>>3)]&mask != 0 {
+				if cur_id != block_id[byte_ix] {
+					cur_id = block_id[byte_ix]
+					num_blocks++
+				}
+			}
+
+			block_id[byte_ix] = cur_id
+		}
+	}
+
+	return num_blocks
+}
+
+var remapBlockIdsCommand_kInvalidId uint16 = 256
+
+func remapBlockIdsCommand(block_ids []byte, length uint, new_id []uint16, num_histograms uint) uint {
+	var next_id uint16 = 0
+	var i uint
+	for i = 0; i < num_histograms; i++ {
+		new_id[i] = remapBlockIdsCommand_kInvalidId
+	}
+
+	for i = 0; i < length; i++ {
+		assert(uint(block_ids[i]) < num_histograms)
+		if new_id[block_ids[i]] == remapBlockIdsCommand_kInvalidId {
+			new_id[block_ids[i]] = next_id
+			next_id++
+		}
+	}
+
+	for i = 0; i < length; i++ {
+		block_ids[i] = byte(new_id[block_ids[i]])
+		assert(uint(block_ids[i]) < num_histograms)
+	}
+
+	assert(uint(next_id) <= num_histograms)
+	return uint(next_id)
+}
+
+func buildBlockHistogramsCommand(data []uint16, length uint, block_ids []byte, num_histograms uint, histograms []histogramCommand) {
+	var i uint
+	clearHistogramsCommand(histograms, num_histograms)
+	for i = 0; i < length; i++ {
+		histogramAddCommand(&histograms[block_ids[i]], uint(data[i]))
+	}
+}
+
+var clusterBlocksCommand_kInvalidIndex uint32 = math.MaxUint32
+
+func clusterBlocksCommand(data []uint16, length uint, num_blocks uint, block_ids []byte, split *blockSplit) {
+	var histogram_symbols []uint32 = make([]uint32, num_blocks)
+	var block_lengths []uint32 = make([]uint32, num_blocks)
+	var expected_num_clusters uint = clustersPerBatch * (num_blocks + histogramsPerBatch - 1) / histogramsPerBatch
+	var all_histograms_size uint = 0
+	var all_histograms_capacity uint = expected_num_clusters
+	var all_histograms []histogramCommand = make([]histogramCommand, all_histograms_capacity)
+	var cluster_size_size uint = 0
+	var cluster_size_capacity uint = expected_num_clusters
+	var cluster_size []uint32 = make([]uint32, cluster_size_capacity)
+	var num_clusters uint = 0
+	var histograms []histogramCommand = make([]histogramCommand, brotli_min_size_t(num_blocks, histogramsPerBatch))
+	var max_num_pairs uint = histogramsPerBatch * histogramsPerBatch / 2
+	var pairs_capacity uint = max_num_pairs + 1
+	var pairs []histogramPair = make([]histogramPair, pairs_capacity)
+	var pos uint = 0
+	var clusters []uint32
+	var num_final_clusters uint
+	var new_index []uint32
+	var i uint
+	var sizes = [histogramsPerBatch]uint32{0}
+	var new_clusters = [histogramsPerBatch]uint32{0}
+	var symbols = [histogramsPerBatch]uint32{0}
+	var remap = [histogramsPerBatch]uint32{0}
+
+	for i := 0; i < int(num_blocks); i++ {
+		block_lengths[i] = 0
+	}
+	{
+		var block_idx uint = 0
+		for i = 0; i < length; i++ {
+			assert(block_idx < num_blocks)
+			block_lengths[block_idx]++
+			if i+1 == length || block_ids[i] != block_ids[i+1] {
+				block_idx++
+			}
+		}
+
+		assert(block_idx == num_blocks)
+	}
+
+	for i = 0; i < num_blocks; i += histogramsPerBatch {
+		var num_to_combine uint = brotli_min_size_t(num_blocks-i, histogramsPerBatch)
+		var num_new_clusters uint
+		var j uint
+		for j = 0; j < num_to_combine; j++ {
+			var k uint
+			histogramClearCommand(&histograms[j])
+			for k = 0; uint32(k) < block_lengths[i+j]; k++ {
+				histogramAddCommand(&histograms[j], uint(data[pos]))
+				pos++
+			}
+
+			histograms[j].bit_cost_ = populationCostCommand(&histograms[j])
+			new_clusters[j] = uint32(j)
+			symbols[j] = uint32(j)
+			sizes[j] = 1
+		}
+
+		num_new_clusters = histogramCombineCommand(histograms, sizes[:], symbols[:], new_clusters[:], []histogramPair(pairs), num_to_combine, num_to_combine, histogramsPerBatch, max_num_pairs)
+		if all_histograms_capacity < (all_histograms_size + num_new_clusters) {
+			var _new_size uint
+			if all_histograms_capacity == 0 {
+				_new_size = all_histograms_size + num_new_clusters
+			} else {
+				_new_size = all_histograms_capacity
+			}
+			var new_array []histogramCommand
+			for _new_size < (all_histograms_size + num_new_clusters) {
+				_new_size *= 2
+			}
+			new_array = make([]histogramCommand, _new_size)
+			if all_histograms_capacity != 0 {
+				copy(new_array, all_histograms[:all_histograms_capacity])
+			}
+
+			all_histograms = new_array
+			all_histograms_capacity = _new_size
+		}
+
+		brotli_ensure_capacity_uint32_t(&cluster_size, &cluster_size_capacity, cluster_size_size+num_new_clusters)
+		for j = 0; j < num_new_clusters; j++ {
+			all_histograms[all_histograms_size] = histograms[new_clusters[j]]
+			all_histograms_size++
+			cluster_size[cluster_size_size] = sizes[new_clusters[j]]
+			cluster_size_size++
+			remap[new_clusters[j]] = uint32(j)
+		}
+
+		for j = 0; j < num_to_combine; j++ {
+			histogram_symbols[i+j] = uint32(num_clusters) + remap[symbols[j]]
+		}
+
+		num_clusters += num_new_clusters
+		assert(num_clusters == cluster_size_size)
+		assert(num_clusters == all_histograms_size)
+	}
+
+	histograms = nil
+
+	max_num_pairs = brotli_min_size_t(64*num_clusters, (num_clusters/2)*num_clusters)
+	if pairs_capacity < max_num_pairs+1 {
+		pairs = nil
+		pairs = make([]histogramPair, (max_num_pairs + 1))
+	}
+
+	clusters = make([]uint32, num_clusters)
+	for i = 0; i < num_clusters; i++ {
+		clusters[i] = uint32(i)
+	}
+
+	num_final_clusters = histogramCombineCommand(all_histograms, cluster_size, histogram_symbols, clusters, pairs, num_clusters, num_blocks, maxNumberOfBlockTypes, max_num_pairs)
+	pairs = nil
+	cluster_size = nil
+
+	new_index = make([]uint32, num_clusters)
+	for i = 0; i < num_clusters; i++ {
+		new_index[i] = clusterBlocksCommand_kInvalidIndex
+	}
+	pos = 0
+	{
+		var next_index uint32 = 0
+		for i = 0; i < num_blocks; i++ {
+			var histo histogramCommand
+			var j uint
+			var best_out uint32
+			var best_bits float64
+			histogramClearCommand(&histo)
+			for j = 0; uint32(j) < block_lengths[i]; j++ {
+				histogramAddCommand(&histo, uint(data[pos]))
+				pos++
+			}
+
+			if i == 0 {
+				best_out = histogram_symbols[0]
+			} else {
+				best_out = histogram_symbols[i-1]
+			}
+			best_bits = histogramBitCostDistanceCommand(&histo, &all_histograms[best_out])
+			for j = 0; j < num_final_clusters; j++ {
+				var cur_bits float64 = histogramBitCostDistanceCommand(&histo, &all_histograms[clusters[j]])
+				if cur_bits < best_bits {
+					best_bits = cur_bits
+					best_out = clusters[j]
+				}
+			}
+
+			histogram_symbols[i] = best_out
+			if new_index[best_out] == clusterBlocksCommand_kInvalidIndex {
+				new_index[best_out] = next_index
+				next_index++
+			}
+		}
+	}
+
+	clusters = nil
+	all_histograms = nil
+	brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, num_blocks)
+	brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, num_blocks)
+	{
+		var cur_length uint32 = 0
+		var block_idx uint = 0
+		var max_type byte = 0
+		for i = 0; i < num_blocks; i++ {
+			cur_length += block_lengths[i]
+			if i+1 == num_blocks || histogram_symbols[i] != histogram_symbols[i+1] {
+				var id byte = byte(new_index[histogram_symbols[i]])
+				split.types[block_idx] = id
+				split.lengths[block_idx] = cur_length
+				max_type = brotli_max_uint8_t(max_type, id)
+				cur_length = 0
+				block_idx++
+			}
+		}
+
+		split.num_blocks = block_idx
+		split.num_types = uint(max_type) + 1
+	}
+
+	new_index = nil
+	block_lengths = nil
+	histogram_symbols = nil
+}
+
+func splitByteVectorCommand(data []uint16, literals_per_histogram uint, max_histograms uint, sampling_stride_length uint, block_switch_cost float64, params *encoderParams, split *blockSplit) {
+	length := uint(len(data))
+	var data_size uint = histogramDataSizeCommand()
+	var num_histograms uint = length/literals_per_histogram + 1
+	var histograms []histogramCommand
+	if num_histograms > max_histograms {
+		num_histograms = max_histograms
+	}
+
+	if length == 0 {
+		split.num_types = 1
+		return
+	} else if length < kMinLengthForBlockSplitting {
+		brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, split.num_blocks+1)
+		brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, split.num_blocks+1)
+		split.num_types = 1
+		split.types[split.num_blocks] = 0
+		split.lengths[split.num_blocks] = uint32(length)
+		split.num_blocks++
+		return
+	}
+
+	histograms = make([]histogramCommand, num_histograms)
+
+	/* Find good entropy codes. */
+	initialEntropyCodesCommand(data, length, sampling_stride_length, num_histograms, histograms)
+
+	refineEntropyCodesCommand(data, length, sampling_stride_length, num_histograms, histograms)
+	{
+		var block_ids []byte = make([]byte, length)
+		var num_blocks uint = 0
+		var bitmaplen uint = (num_histograms + 7) >> 3
+		var insert_cost []float64 = make([]float64, (data_size * num_histograms))
+		var cost []float64 = make([]float64, num_histograms)
+		var switch_signal []byte = make([]byte, (length * bitmaplen))
+		var new_id []uint16 = make([]uint16, num_histograms)
+		var iters uint
+		if params.quality < hqZopflificationQuality {
+			iters = 3
+		} else {
+			iters = 10
+		}
+		/* Find a good path through literals with the good entropy codes. */
+
+		var i uint
+		for i = 0; i < iters; i++ {
+			num_blocks = findBlocksCommand(data, length, block_switch_cost, num_histograms, histograms, insert_cost, cost, switch_signal, block_ids)
+			num_histograms = remapBlockIdsCommand(block_ids, length, new_id, num_histograms)
+			buildBlockHistogramsCommand(data, length, block_ids, num_histograms, histograms)
+		}
+
+		insert_cost = nil
+		cost = nil
+		switch_signal = nil
+		new_id = nil
+		histograms = nil
+		clusterBlocksCommand(data, length, num_blocks, block_ids, split)
+		block_ids = nil
+	}
+}
@@ -0,0 +1,433 @@
+package brotli
+
+import "math"
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+func initialEntropyCodesDistance(data []uint16, length uint, stride uint, num_histograms uint, histograms []histogramDistance) {
+	var seed uint32 = 7
+	var block_length uint = length / num_histograms
+	var i uint
+	clearHistogramsDistance(histograms, num_histograms)
+	for i = 0; i < num_histograms; i++ {
+		var pos uint = length * i / num_histograms
+		if i != 0 {
+			pos += uint(myRand(&seed) % uint32(block_length))
+		}
+
+		if pos+stride >= length {
+			pos = length - stride - 1
+		}
+
+		histogramAddVectorDistance(&histograms[i], data[pos:], stride)
+	}
+}
+
+func randomSampleDistance(seed *uint32, data []uint16, length uint, stride uint, sample *histogramDistance) {
+	var pos uint = 0
+	if stride >= length {
+		stride = length
+	} else {
+		pos = uint(myRand(seed) % uint32(length-stride+1))
+	}
+
+	histogramAddVectorDistance(sample, data[pos:], stride)
+}
+
+func refineEntropyCodesDistance(data []uint16, length uint, stride uint, num_histograms uint, histograms []histogramDistance) {
+	var iters uint = kIterMulForRefining*length/stride + kMinItersForRefining
+	var seed uint32 = 7
+	var iter uint
+	iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms
+	for iter = 0; iter < iters; iter++ {
+		var sample histogramDistance
+		histogramClearDistance(&sample)
+		randomSampleDistance(&seed, data, length, stride, &sample)
+		histogramAddHistogramDistance(&histograms[iter%num_histograms], &sample)
+	}
+}
+
+/* Assigns a block id from the range [0, num_histograms) to each data element
+   in data[0..length) and fills in block_id[0..length) with the assigned values.
+   Returns the number of blocks, i.e. one plus the number of block switches. */
+func findBlocksDistance(data []uint16, length uint, block_switch_bitcost float64, num_histograms uint, histograms []histogramDistance, insert_cost []float64, cost []float64, switch_signal []byte, block_id []byte) uint {
+	var data_size uint = histogramDataSizeDistance()
+	var bitmaplen uint = (num_histograms + 7) >> 3
+	var num_blocks uint = 1
+	var i uint
+	var j uint
+	assert(num_histograms <= 256)
+	if num_histograms <= 1 {
+		for i = 0; i < length; i++ {
+			block_id[i] = 0
+		}
+
+		return 1
+	}
+
+	for i := 0; i < int(data_size*num_histograms); i++ {
+		insert_cost[i] = 0
+	}
+	for i = 0; i < num_histograms; i++ {
+		insert_cost[i] = fastLog2(uint(uint32(histograms[i].total_count_)))
+	}
+
+	for i = data_size; i != 0; {
+		i--
+		for j = 0; j < num_histograms; j++ {
+			insert_cost[i*num_histograms+j] = insert_cost[j] - bitCost(uint(histograms[j].data_[i]))
+		}
+	}
+
+	for i := 0; i < int(num_histograms); i++ {
+		cost[i] = 0
+	}
+	for i := 0; i < int(length*bitmaplen); i++ {
+		switch_signal[i] = 0
+	}
+
+	/* After each iteration of this loop, cost[k] will contain the difference
+	   between the minimum cost of arriving at the current byte position using
+	   entropy code k, and the minimum cost of arriving at the current byte
+	   position. This difference is capped at the block switch cost, and if it
+	   reaches block switch cost, it means that when we trace back from the last
+	   position, we need to switch here. */
+	for i = 0; i < length; i++ {
+		var byte_ix uint = i
+		var ix uint = byte_ix * bitmaplen
+		var insert_cost_ix uint = uint(data[byte_ix]) * num_histograms
+		var min_cost float64 = 1e99
+		var block_switch_cost float64 = block_switch_bitcost
+		var k uint
+		for k = 0; k < num_histograms; k++ {
+			/* We are coding the symbol in data[byte_ix] with entropy code k. */
+			cost[k] += insert_cost[insert_cost_ix+k]
+
+			if cost[k] < min_cost {
+				min_cost = cost[k]
+				block_id[byte_ix] = byte(k)
+			}
+		}
+
+		/* More blocks for the beginning. */
+		if byte_ix < 2000 {
+			block_switch_cost *= 0.77 + 0.07*float64(byte_ix)/2000
+		}
+
+		for k = 0; k < num_histograms; k++ {
+			cost[k] -= min_cost
+			if cost[k] >= block_switch_cost {
+				var mask byte = byte(1 << (k & 7))
+				cost[k] = block_switch_cost
+				assert(k>>3 < bitmaplen)
+				switch_signal[ix+(k>>3)] |= mask
+				/* Trace back from the last position and switch at the marked places. */
+			}
+		}
+	}
+	{
+		var byte_ix uint = length - 1
+		var ix uint = byte_ix * bitmaplen
+		var cur_id byte = block_id[byte_ix]
+		for byte_ix > 0 {
+			var mask byte = byte(1 << (cur_id & 7))
+			assert(uint(cur_id)>>3 < bitmaplen)
+			byte_ix--
+			ix -= bitmaplen
+			if switch_signal[ix+uint(cur_id>>3)]&mask != 0 {
+				if cur_id != block_id[byte_ix] {
+					cur_id = block_id[byte_ix]
+					num_blocks++
+				}
+			}
+
+			block_id[byte_ix] = cur_id
+		}
+	}
+
+	return num_blocks
+}
+
+var remapBlockIdsDistance_kInvalidId uint16 = 256
+
+func remapBlockIdsDistance(block_ids []byte, length uint, new_id []uint16, num_histograms uint) uint {
+	var next_id uint16 = 0
+	var i uint
+	for i = 0; i < num_histograms; i++ {
+		new_id[i] = remapBlockIdsDistance_kInvalidId
+	}
+
+	for i = 0; i < length; i++ {
+		assert(uint(block_ids[i]) < num_histograms)
+		if new_id[block_ids[i]] == remapBlockIdsDistance_kInvalidId {
+			new_id[block_ids[i]] = next_id
+			next_id++
+		}
+	}
+
+	for i = 0; i < length; i++ {
+		block_ids[i] = byte(new_id[block_ids[i]])
+		assert(uint(block_ids[i]) < num_histograms)
+	}
+
+	assert(uint(next_id) <= num_histograms)
+	return uint(next_id)
+}
+
+func buildBlockHistogramsDistance(data []uint16, length uint, block_ids []byte, num_histograms uint, histograms []histogramDistance) {
+	var i uint
+	clearHistogramsDistance(histograms, num_histograms)
+	for i = 0; i < length; i++ {
+		histogramAddDistance(&histograms[block_ids[i]], uint(data[i]))
+	}
+}
+
+var clusterBlocksDistance_kInvalidIndex uint32 = math.MaxUint32
+
+func clusterBlocksDistance(data []uint16, length uint, num_blocks uint, block_ids []byte, split *blockSplit) {
+	var histogram_symbols []uint32 = make([]uint32, num_blocks)
+	var block_lengths []uint32 = make([]uint32, num_blocks)
+	var expected_num_clusters uint = clustersPerBatch * (num_blocks + histogramsPerBatch - 1) / histogramsPerBatch
+	var all_histograms_size uint = 0
+	var all_histograms_capacity uint = expected_num_clusters
+	var all_histograms []histogramDistance = make([]histogramDistance, all_histograms_capacity)
+	var cluster_size_size uint = 0
+	var cluster_size_capacity uint = expected_num_clusters
+	var cluster_size []uint32 = make([]uint32, cluster_size_capacity)
+	var num_clusters uint = 0
+	var histograms []histogramDistance = make([]histogramDistance, brotli_min_size_t(num_blocks, histogramsPerBatch))
+	var max_num_pairs uint = histogramsPerBatch * histogramsPerBatch / 2
+	var pairs_capacity uint = max_num_pairs + 1
+	var pairs []histogramPair = make([]histogramPair, pairs_capacity)
+	var pos uint = 0
+	var clusters []uint32
+	var num_final_clusters uint
+	var new_index []uint32
+	var i uint
+	var sizes = [histogramsPerBatch]uint32{0}
+	var new_clusters = [histogramsPerBatch]uint32{0}
+	var symbols = [histogramsPerBatch]uint32{0}
+	var remap = [histogramsPerBatch]uint32{0}
+
+	for i := 0; i < int(num_blocks); i++ {
+		block_lengths[i] = 0
+	}
+	{
+		var block_idx uint = 0
+		for i = 0; i < length; i++ {
+			assert(block_idx < num_blocks)
+			block_lengths[block_idx]++
+			if i+1 == length || block_ids[i] != block_ids[i+1] {
+				block_idx++
+			}
+		}
+
+		assert(block_idx == num_blocks)
+	}
+
+	for i = 0; i < num_blocks; i += histogramsPerBatch {
+		var num_to_combine uint = brotli_min_size_t(num_blocks-i, histogramsPerBatch)
+		var num_new_clusters uint
+		var j uint
+		for j = 0; j < num_to_combine; j++ {
+			var k uint
+			histogramClearDistance(&histograms[j])
+			for k = 0; uint32(k) < block_lengths[i+j]; k++ {
+				histogramAddDistance(&histograms[j], uint(data[pos]))
+				pos++
+			}
+
+			histograms[j].bit_cost_ = populationCostDistance(&histograms[j])
+			new_clusters[j] = uint32(j)
+			symbols[j] = uint32(j)
+			sizes[j] = 1
+		}
+
+		num_new_clusters = histogramCombineDistance(histograms, sizes[:], symbols[:], new_clusters[:], []histogramPair(pairs), num_to_combine, num_to_combine, histogramsPerBatch, max_num_pairs)
+		if all_histograms_capacity < (all_histograms_size + num_new_clusters) {
+			var _new_size uint
+			if all_histograms_capacity == 0 {
+				_new_size = all_histograms_size + num_new_clusters
+			} else {
+				_new_size = all_histograms_capacity
+			}
+			var new_array []histogramDistance
+			for _new_size < (all_histograms_size + num_new_clusters) {
+				_new_size *= 2
+			}
+			new_array = make([]histogramDistance, _new_size)
+			if all_histograms_capacity != 0 {
+				copy(new_array, all_histograms[:all_histograms_capacity])
+			}
+
+			all_histograms = new_array
+			all_histograms_capacity = _new_size
+		}
+
+		brotli_ensure_capacity_uint32_t(&cluster_size, &cluster_size_capacity, cluster_size_size+num_new_clusters)
+		for j = 0; j < num_new_clusters; j++ {
+			all_histograms[all_histograms_size] = histograms[new_clusters[j]]
+			all_histograms_size++
+			cluster_size[cluster_size_size] = sizes[new_clusters[j]]
+			cluster_size_size++
+			remap[new_clusters[j]] = uint32(j)
+		}
+
+		for j = 0; j < num_to_combine; j++ {
+			histogram_symbols[i+j] = uint32(num_clusters) + remap[symbols[j]]
+		}
+
+		num_clusters += num_new_clusters
+		assert(num_clusters == cluster_size_size)
+		assert(num_clusters == all_histograms_size)
+	}
+
+	histograms = nil
+
+	max_num_pairs = brotli_min_size_t(64*num_clusters, (num_clusters/2)*num_clusters)
+	if pairs_capacity < max_num_pairs+1 {
+		pairs = nil
+		pairs = make([]histogramPair, (max_num_pairs + 1))
+	}
+
+	clusters = make([]uint32, num_clusters)
+	for i = 0; i < num_clusters; i++ {
+		clusters[i] = uint32(i)
+	}
+
+	num_final_clusters = histogramCombineDistance(all_histograms, cluster_size, histogram_symbols, clusters, pairs, num_clusters, num_blocks, maxNumberOfBlockTypes, max_num_pairs)
+	pairs = nil
+	cluster_size = nil
+
+	new_index = make([]uint32, num_clusters)
+	for i = 0; i < num_clusters; i++ {
+		new_index[i] = clusterBlocksDistance_kInvalidIndex
+	}
+	pos = 0
+	{
+		var next_index uint32 = 0
+		for i = 0; i < num_blocks; i++ {
+			var histo histogramDistance
+			var j uint
+			var best_out uint32
+			var best_bits float64
+			histogramClearDistance(&histo)
+			for j = 0; uint32(j) < block_lengths[i]; j++ {
+				histogramAddDistance(&histo, uint(data[pos]))
+				pos++
+			}
+
+			if i == 0 {
+				best_out = histogram_symbols[0]
+			} else {
+				best_out = histogram_symbols[i-1]
+			}
+			best_bits = histogramBitCostDistanceDistance(&histo, &all_histograms[best_out])
+			for j = 0; j < num_final_clusters; j++ {
+				var cur_bits float64 = histogramBitCostDistanceDistance(&histo, &all_histograms[clusters[j]])
+				if cur_bits < best_bits {
+					best_bits = cur_bits
+					best_out = clusters[j]
+				}
+			}
+
+			histogram_symbols[i] = best_out
+			if new_index[best_out] == clusterBlocksDistance_kInvalidIndex {
+				new_index[best_out] = next_index
+				next_index++
+			}
+		}
+	}
+
+	clusters = nil
+	all_histograms = nil
+	brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, num_blocks)
+	brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, num_blocks)
+	{
+		var cur_length uint32 = 0
+		var block_idx uint = 0
+		var max_type byte = 0
+		for i = 0; i < num_blocks; i++ {
+			cur_length += block_lengths[i]
+			if i+1 == num_blocks || histogram_symbols[i] != histogram_symbols[i+1] {
+				var id byte = byte(new_index[histogram_symbols[i]])
+				split.types[block_idx] = id
+				split.lengths[block_idx] = cur_length
+				max_type = brotli_max_uint8_t(max_type, id)
+				cur_length = 0
+				block_idx++
+			}
+		}
+
+		split.num_blocks = block_idx
+		split.num_types = uint(max_type) + 1
+	}
+
+	new_index = nil
+	block_lengths = nil
+	histogram_symbols = nil
+}
+
+func splitByteVectorDistance(data []uint16, length uint, literals_per_histogram uint, max_histograms uint, sampling_stride_length uint, block_switch_cost float64, params *encoderParams, split *blockSplit) {
+	var data_size uint = histogramDataSizeDistance()
+	var num_histograms uint = length/literals_per_histogram + 1
+	var histograms []histogramDistance
+	if num_histograms > max_histograms {
+		num_histograms = max_histograms
+	}
+
+	if length == 0 {
+		split.num_types = 1
+		return
+	} else if length < kMinLengthForBlockSplitting {
+		brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, split.num_blocks+1)
+		brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, split.num_blocks+1)
+		split.num_types = 1
+		split.types[split.num_blocks] = 0
+		split.lengths[split.num_blocks] = uint32(length)
+		split.num_blocks++
+		return
+	}
+
+	histograms = make([]histogramDistance, num_histograms)
+
+	/* Find good entropy codes. */
+	initialEntropyCodesDistance(data, length, sampling_stride_length, num_histograms, histograms)
+
+	refineEntropyCodesDistance(data, length, sampling_stride_length, num_histograms, histograms)
+	{
+		var block_ids []byte = make([]byte, length)
+		var num_blocks uint = 0
+		var bitmaplen uint = (num_histograms + 7) >> 3
+		var insert_cost []float64 = make([]float64, (data_size * num_histograms))
+		var cost []float64 = make([]float64, num_histograms)
+		var switch_signal []byte = make([]byte, (length * bitmaplen))
+		var new_id []uint16 = make([]uint16, num_histograms)
+		var iters uint
+		if params.quality < hqZopflificationQuality {
+			iters = 3
+		} else {
+			iters = 10
+		}
+		/* Find a good path through literals with the good entropy codes. */
+
+		var i uint
+		for i = 0; i < iters; i++ {
+			num_blocks = findBlocksDistance(data, length, block_switch_cost, num_histograms, histograms, insert_cost, cost, switch_signal, block_ids)
+			num_histograms = remapBlockIdsDistance(block_ids, length, new_id, num_histograms)
+			buildBlockHistogramsDistance(data, length, block_ids, num_histograms, histograms)
+		}
+
+		insert_cost = nil
+		cost = nil
+		switch_signal = nil
+		new_id = nil
+		histograms = nil
+		clusterBlocksDistance(data, length, num_blocks, block_ids, split)
+		block_ids = nil
+	}
+}
@@ -0,0 +1,433 @@
+package brotli
+
+import "math"
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+func initialEntropyCodesLiteral(data []byte, length uint, stride uint, num_histograms uint, histograms []histogramLiteral) {
+	var seed uint32 = 7
+	var block_length uint = length / num_histograms
+	var i uint
+	clearHistogramsLiteral(histograms, num_histograms)
+	for i = 0; i < num_histograms; i++ {
+		var pos uint = length * i / num_histograms
+		if i != 0 {
+			pos += uint(myRand(&seed) % uint32(block_length))
+		}
+
+		if pos+stride >= length {
+			pos = length - stride - 1
+		}
+
+		histogramAddVectorLiteral(&histograms[i], data[pos:], stride)
+	}
+}
+
+func randomSampleLiteral(seed *uint32, data []byte, length uint, stride uint, sample *histogramLiteral) {
+	var pos uint = 0
+	if stride >= length {
+		stride = length
+	} else {
+		pos = uint(myRand(seed) % uint32(length-stride+1))
+	}
+
+	histogramAddVectorLiteral(sample, data[pos:], stride)
+}
+
+func refineEntropyCodesLiteral(data []byte, length uint, stride uint, num_histograms uint, histograms []histogramLiteral) {
+	var iters uint = kIterMulForRefining*length/stride + kMinItersForRefining
+	var seed uint32 = 7
+	var iter uint
+	iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms
+	for iter = 0; iter < iters; iter++ {
+		var sample histogramLiteral
+		histogramClearLiteral(&sample)
+		randomSampleLiteral(&seed, data, length, stride, &sample)
+		histogramAddHistogramLiteral(&histograms[iter%num_histograms], &sample)
+	}
+}
+
+/* Assigns a block id from the range [0, num_histograms) to each data element
+   in data[0..length) and fills in block_id[0..length) with the assigned values.
+   Returns the number of blocks, i.e. one plus the number of block switches. */
+func findBlocksLiteral(data []byte, length uint, block_switch_bitcost float64, num_histograms uint, histograms []histogramLiteral, insert_cost []float64, cost []float64, switch_signal []byte, block_id []byte) uint {
+	var data_size uint = histogramDataSizeLiteral()
+	var bitmaplen uint = (num_histograms + 7) >> 3
+	var num_blocks uint = 1
+	var i uint
+	var j uint
+	assert(num_histograms <= 256)
+	if num_histograms <= 1 {
+		for i = 0; i < length; i++ {
+			block_id[i] = 0
+		}
+
+		return 1
+	}
+
+	for i := 0; i < int(data_size*num_histograms); i++ {
+		insert_cost[i] = 0
+	}
+	for i = 0; i < num_histograms; i++ {
+		insert_cost[i] = fastLog2(uint(uint32(histograms[i].total_count_)))
+	}
+
+	for i = data_size; i != 0; {
+		i--
+		for j = 0; j < num_histograms; j++ {
+			insert_cost[i*num_histograms+j] = insert_cost[j] - bitCost(uint(histograms[j].data_[i]))
+		}
+	}
+
+	for i := 0; i < int(num_histograms); i++ {
+		cost[i] = 0
+	}
+	for i := 0; i < int(length*bitmaplen); i++ {
+		switch_signal[i] = 0
+	}
+
+	/* After each iteration of this loop, cost[k] will contain the difference
+	   between the minimum cost of arriving at the current byte position using
+	   entropy code k, and the minimum cost of arriving at the current byte
+	   position. This difference is capped at the block switch cost, and if it
+	   reaches block switch cost, it means that when we trace back from the last
+	   position, we need to switch here. */
+	for i = 0; i < length; i++ {
+		var byte_ix uint = i
+		var ix uint = byte_ix * bitmaplen
+		var insert_cost_ix uint = uint(data[byte_ix]) * num_histograms
+		var min_cost float64 = 1e99
+		var block_switch_cost float64 = block_switch_bitcost
+		var k uint
+		for k = 0; k < num_histograms; k++ {
+			/* We are coding the symbol in data[byte_ix] with entropy code k. */
+			cost[k] += insert_cost[insert_cost_ix+k]
+
+			if cost[k] < min_cost {
+				min_cost = cost[k]
+				block_id[byte_ix] = byte(k)
+			}
+		}
+
+		/* More blocks for the beginning. */
+		if byte_ix < 2000 {
+			block_switch_cost *= 0.77 + 0.07*float64(byte_ix)/2000
+		}
+
+		for k = 0; k < num_histograms; k++ {
+			cost[k] -= min_cost
+			if cost[k] >= block_switch_cost {
+				var mask byte = byte(1 << (k & 7))
+				cost[k] = block_switch_cost
+				assert(k>>3 < bitmaplen)
+				switch_signal[ix+(k>>3)] |= mask
+				/* Trace back from the last position and switch at the marked places. */
+			}
+		}
+	}
+	{
+		var byte_ix uint = length - 1
+		var ix uint = byte_ix * bitmaplen
+		var cur_id byte = block_id[byte_ix]
+		for byte_ix > 0 {
+			var mask byte = byte(1 << (cur_id & 7))
+			assert(uint(cur_id)>>3 < bitmaplen)
+			byte_ix--
+			ix -= bitmaplen
+			if switch_signal[ix+uint(cur_id>>3)]&mask != 0 {
+				if cur_id != block_id[byte_ix] {
+					cur_id = block_id[byte_ix]
+					num_blocks++
+				}
+			}
+
+			block_id[byte_ix] = cur_id
+		}
+	}
+
+	return num_blocks
+}
+
+var remapBlockIdsLiteral_kInvalidId uint16 = 256
+
+func remapBlockIdsLiteral(block_ids []byte, length uint, new_id []uint16, num_histograms uint) uint {
+	var next_id uint16 = 0
+	var i uint
+	for i = 0; i < num_histograms; i++ {
+		new_id[i] = remapBlockIdsLiteral_kInvalidId
+	}
+
+	for i = 0; i < length; i++ {
+		assert(uint(block_ids[i]) < num_histograms)
+		if new_id[block_ids[i]] == remapBlockIdsLiteral_kInvalidId {
+			new_id[block_ids[i]] = next_id
+			next_id++
+		}
+	}
+
+	for i = 0; i < length; i++ {
+		block_ids[i] = byte(new_id[block_ids[i]])
+		assert(uint(block_ids[i]) < num_histograms)
+	}
+
+	assert(uint(next_id) <= num_histograms)
+	return uint(next_id)
+}
+
+func buildBlockHistogramsLiteral(data []byte, length uint, block_ids []byte, num_histograms uint, histograms []histogramLiteral) {
+	var i uint
+	clearHistogramsLiteral(histograms, num_histograms)
+	for i = 0; i < length; i++ {
+		histogramAddLiteral(&histograms[block_ids[i]], uint(data[i]))
+	}
+}
+
+var clusterBlocksLiteral_kInvalidIndex uint32 = math.MaxUint32
+
+func clusterBlocksLiteral(data []byte, length uint, num_blocks uint, block_ids []byte, split *blockSplit) {
+	var histogram_symbols []uint32 = make([]uint32, num_blocks)
+	var block_lengths []uint32 = make([]uint32, num_blocks)
+	var expected_num_clusters uint = clustersPerBatch * (num_blocks + histogramsPerBatch - 1) / histogramsPerBatch
+	var all_histograms_size uint = 0
+	var all_histograms_capacity uint = expected_num_clusters
+	var all_histograms []histogramLiteral = make([]histogramLiteral, all_histograms_capacity)
+	var cluster_size_size uint = 0
+	var cluster_size_capacity uint = expected_num_clusters
+	var cluster_size []uint32 = make([]uint32, cluster_size_capacity)
+	var num_clusters uint = 0
+	var histograms []histogramLiteral = make([]histogramLiteral, brotli_min_size_t(num_blocks, histogramsPerBatch))
+	var max_num_pairs uint = histogramsPerBatch * histogramsPerBatch / 2
+	var pairs_capacity uint = max_num_pairs + 1
+	var pairs []histogramPair = make([]histogramPair, pairs_capacity)
+	var pos uint = 0
+	var clusters []uint32
+	var num_final_clusters uint
+	var new_index []uint32
+	var i uint
+	var sizes = [histogramsPerBatch]uint32{0}
+	var new_clusters = [histogramsPerBatch]uint32{0}
+	var symbols = [histogramsPerBatch]uint32{0}
+	var remap = [histogramsPerBatch]uint32{0}
+
+	for i := 0; i < int(num_blocks); i++ {
+		block_lengths[i] = 0
+	}
+	{
+		var block_idx uint = 0
+		for i = 0; i < length; i++ {
+			assert(block_idx < num_blocks)
+			block_lengths[block_idx]++
+			if i+1 == length || block_ids[i] != block_ids[i+1] {
+				block_idx++
+			}
+		}
+
+		assert(block_idx == num_blocks)
+	}
+
+	for i = 0; i < num_blocks; i += histogramsPerBatch {
+		var num_to_combine uint = brotli_min_size_t(num_blocks-i, histogramsPerBatch)
+		var num_new_clusters uint
+		var j uint
+		for j = 0; j < num_to_combine; j++ {
+			var k uint
+			histogramClearLiteral(&histograms[j])
+			for k = 0; uint32(k) < block_lengths[i+j]; k++ {
+				histogramAddLiteral(&histograms[j], uint(data[pos]))
+				pos++
+			}
+
+			histograms[j].bit_cost_ = populationCostLiteral(&histograms[j])
+			new_clusters[j] = uint32(j)
+			symbols[j] = uint32(j)
+			sizes[j] = 1
+		}
+
+		num_new_clusters = histogramCombineLiteral(histograms, sizes[:], symbols[:], new_clusters[:], []histogramPair(pairs), num_to_combine, num_to_combine, histogramsPerBatch, max_num_pairs)
+		if all_histograms_capacity < (all_histograms_size + num_new_clusters) {
+			var _new_size uint
+			if all_histograms_capacity == 0 {
+				_new_size = all_histograms_size + num_new_clusters
+			} else {
+				_new_size = all_histograms_capacity
+			}
+			var new_array []histogramLiteral
+			for _new_size < (all_histograms_size + num_new_clusters) {
+				_new_size *= 2
+			}
+			new_array = make([]histogramLiteral, _new_size)
+			if all_histograms_capacity != 0 {
+				copy(new_array, all_histograms[:all_histograms_capacity])
+			}
+
+			all_histograms = new_array
+			all_histograms_capacity = _new_size
+		}
+
+		brotli_ensure_capacity_uint32_t(&cluster_size, &cluster_size_capacity, cluster_size_size+num_new_clusters)
+		for j = 0; j < num_new_clusters; j++ {
+			all_histograms[all_histograms_size] = histograms[new_clusters[j]]
+			all_histograms_size++
+			cluster_size[cluster_size_size] = sizes[new_clusters[j]]
+			cluster_size_size++
+			remap[new_clusters[j]] = uint32(j)
+		}
+
+		for j = 0; j < num_to_combine; j++ {
+			histogram_symbols[i+j] = uint32(num_clusters) + remap[symbols[j]]
+		}
+
+		num_clusters += num_new_clusters
+		assert(num_clusters == cluster_size_size)
+		assert(num_clusters == all_histograms_size)
+	}
+
+	histograms = nil
+
+	max_num_pairs = brotli_min_size_t(64*num_clusters, (num_clusters/2)*num_clusters)
+	if pairs_capacity < max_num_pairs+1 {
+		pairs = nil
+		pairs = make([]histogramPair, (max_num_pairs + 1))
+	}
+
+	clusters = make([]uint32, num_clusters)
+	for i = 0; i < num_clusters; i++ {
+		clusters[i] = uint32(i)
+	}
+
+	num_final_clusters = histogramCombineLiteral(all_histograms, cluster_size, histogram_symbols, clusters, pairs, num_clusters, num_blocks, maxNumberOfBlockTypes, max_num_pairs)
+	pairs = nil
+	cluster_size = nil
+
+	new_index = make([]uint32, num_clusters)
+	for i = 0; i < num_clusters; i++ {
+		new_index[i] = clusterBlocksLiteral_kInvalidIndex
+	}
+	pos = 0
+	{
+		var next_index uint32 = 0
+		for i = 0; i < num_blocks; i++ {
+			var histo histogramLiteral
+			var j uint
+			var best_out uint32
+			var best_bits float64
+			histogramClearLiteral(&histo)
+			for j = 0; uint32(j) < block_lengths[i]; j++ {
+				histogramAddLiteral(&histo, uint(data[pos]))
+				pos++
+			}
+
+			if i == 0 {
+				best_out = histogram_symbols[0]
+			} else {
+				best_out = histogram_symbols[i-1]
+			}
+			best_bits = histogramBitCostDistanceLiteral(&histo, &all_histograms[best_out])
+			for j = 0; j < num_final_clusters; j++ {
+				var cur_bits float64 = histogramBitCostDistanceLiteral(&histo, &all_histograms[clusters[j]])
+				if cur_bits < best_bits {
+					best_bits = cur_bits
+					best_out = clusters[j]
+				}
+			}
+
+			histogram_symbols[i] = best_out
+			if new_index[best_out] == clusterBlocksLiteral_kInvalidIndex {
+				new_index[best_out] = next_index
+				next_index++
+			}
+		}
+	}
+
+	clusters = nil
+	all_histograms = nil
+	brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, num_blocks)
+	brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, num_blocks)
+	{
+		var cur_length uint32 = 0
+		var block_idx uint = 0
+		var max_type byte = 0
+		for i = 0; i < num_blocks; i++ {
+			cur_length += block_lengths[i]
+			if i+1 == num_blocks || histogram_symbols[i] != histogram_symbols[i+1] {
+				var id byte = byte(new_index[histogram_symbols[i]])
+				split.types[block_idx] = id
+				split.lengths[block_idx] = cur_length
+				max_type = brotli_max_uint8_t(max_type, id)
+				cur_length = 0
+				block_idx++
+			}
+		}
+
+		split.num_blocks = block_idx
+		split.num_types = uint(max_type) + 1
+	}
+
+	new_index = nil
+	block_lengths = nil
+	histogram_symbols = nil
+}
+
+func splitByteVectorLiteral(data []byte, length uint, literals_per_histogram uint, max_histograms uint, sampling_stride_length uint, block_switch_cost float64, params *encoderParams, split *blockSplit) {
+	var data_size uint = histogramDataSizeLiteral()
+	var num_histograms uint = length/literals_per_histogram + 1
+	var histograms []histogramLiteral
+	if num_histograms > max_histograms {
+		num_histograms = max_histograms
+	}
+
+	if length == 0 {
+		split.num_types = 1
+		return
+	} else if length < kMinLengthForBlockSplitting {
+		brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, split.num_blocks+1)
+		brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, split.num_blocks+1)
+		split.num_types = 1
+		split.types[split.num_blocks] = 0
+		split.lengths[split.num_blocks] = uint32(length)
+		split.num_blocks++
+		return
+	}
+
+	histograms = make([]histogramLiteral, num_histograms)
+
+	/* Find good entropy codes. */
+	initialEntropyCodesLiteral(data, length, sampling_stride_length, num_histograms, histograms)
+
+	refineEntropyCodesLiteral(data, length, sampling_stride_length, num_histograms, histograms)
+	{
+		var block_ids []byte = make([]byte, length)
+		var num_blocks uint = 0
+		var bitmaplen uint = (num_histograms + 7) >> 3
+		var insert_cost []float64 = make([]float64, (data_size * num_histograms))
+		var cost []float64 = make([]float64, num_histograms)
+		var switch_signal []byte = make([]byte, (length * bitmaplen))
+		var new_id []uint16 = make([]uint16, num_histograms)
+		var iters uint
+		if params.quality < hqZopflificationQuality {
+			iters = 3
+		} else {
+			iters = 10
+		}
+		/* Find a good path through literals with the good entropy codes. */
+
+		var i uint
+		for i = 0; i < iters; i++ {
+			num_blocks = findBlocksLiteral(data, length, block_switch_cost, num_histograms, histograms, insert_cost, cost, switch_signal, block_ids)
+			num_histograms = remapBlockIdsLiteral(block_ids, length, new_id, num_histograms)
+			buildBlockHistogramsLiteral(data, length, block_ids, num_histograms, histograms)
+		}
+
+		insert_cost = nil
+		cost = nil
+		switch_signal = nil
+		new_id = nil
+		histograms = nil
+		clusterBlocksLiteral(data, length, num_blocks, block_ids, split)
+		block_ids = nil
+	}
+}
@@ -0,0 +1,30 @@
+package brotli
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions for clustering similar histograms together. */
+
+type histogramPair struct {
+	idx1       uint32
+	idx2       uint32
+	cost_combo float64
+	cost_diff  float64
+}
+
+func histogramPairIsLess(p1 *histogramPair, p2 *histogramPair) bool {
+	if p1.cost_diff != p2.cost_diff {
+		return p1.cost_diff > p2.cost_diff
+	}
+
+	return (p1.idx2 - p1.idx1) > (p2.idx2 - p2.idx1)
+}
+
+/* Returns entropy reduction of the context map when we combine two clusters. */
+func clusterCostDiff(size_a uint, size_b uint) float64 {
+	var size_c uint = size_a + size_b
+	return float64(size_a)*fastLog2(size_a) + float64(size_b)*fastLog2(size_b) - float64(size_c)*fastLog2(size_c)
+}
@@ -0,0 +1,164 @@
+package brotli
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
+   it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */
+func compareAndPushToQueueCommand(out []histogramCommand, cluster_size []uint32, idx1 uint32, idx2 uint32, max_num_pairs uint, pairs []histogramPair, num_pairs *uint) {
+	var is_good_pair bool = false
+	var p histogramPair
+	p.idx2 = 0
+	p.idx1 = p.idx2
+	p.cost_combo = 0
+	p.cost_diff = p.cost_combo
+	if idx1 == idx2 {
+		return
+	}
+
+	if idx2 < idx1 {
+		var t uint32 = idx2
+		idx2 = idx1
+		idx1 = t
+	}
+
+	p.idx1 = idx1
+	p.idx2 = idx2
+	p.cost_diff = 0.5 * clusterCostDiff(uint(cluster_size[idx1]), uint(cluster_size[idx2]))
+	p.cost_diff -= out[idx1].bit_cost_
+	p.cost_diff -= out[idx2].bit_cost_
+
+	if out[idx1].total_count_ == 0 {
+		p.cost_combo = out[idx2].bit_cost_
+		is_good_pair = true
+	} else if out[idx2].total_count_ == 0 {
+		p.cost_combo = out[idx1].bit_cost_
+		is_good_pair = true
+	} else {
+		var threshold float64
+		if *num_pairs == 0 {
+			threshold = 1e99
+		} else {
+			threshold = brotli_max_double(0.0, pairs[0].cost_diff)
+		}
+		var combo histogramCommand = out[idx1]
+		var cost_combo float64
+		histogramAddHistogramCommand(&combo, &out[idx2])
+		cost_combo = populationCostCommand(&combo)
+		if cost_combo < threshold-p.cost_diff {
+			p.cost_combo = cost_combo
+			is_good_pair = true
+		}
+	}
+
+	if is_good_pair {
+		p.cost_diff += p.cost_combo
+		if *num_pairs > 0 && histogramPairIsLess(&pairs[0], &p) {
+			/* Replace the top of the queue if needed. */
+			if *num_pairs < max_num_pairs {
+				pairs[*num_pairs] = pairs[0]
+				(*num_pairs)++
+			}
+
+			pairs[0] = p
+		} else if *num_pairs < max_num_pairs {
+			pairs[*num_pairs] = p
+			(*num_pairs)++
+		}
+	}
+}
+
+func histogramCombineCommand(out []histogramCommand, cluster_size []uint32, symbols []uint32, clusters []uint32, pairs []histogramPair, num_clusters uint, symbols_size uint, max_clusters uint, max_num_pairs uint) uint {
+	var cost_diff_threshold float64 = 0.0
+	var min_cluster_size uint = 1
+	var num_pairs uint = 0
+	{
+		/* We maintain a vector of histogram pairs, with the property that the pair
+		   with the maximum bit cost reduction is the first. */
+		var idx1 uint
+		for idx1 = 0; idx1 < num_clusters; idx1++ {
+			var idx2 uint
+			for idx2 = idx1 + 1; idx2 < num_clusters; idx2++ {
+				compareAndPushToQueueCommand(out, cluster_size, clusters[idx1], clusters[idx2], max_num_pairs, pairs[0:], &num_pairs)
+			}
+		}
+	}
+
+	for num_clusters > min_cluster_size {
+		var best_idx1 uint32
+		var best_idx2 uint32
+		var i uint
+		if pairs[0].cost_diff >= cost_diff_threshold {
+			cost_diff_threshold = 1e99
+			min_cluster_size = max_clusters
+			continue
+		}
+
+		/* Take the best pair from the top of heap. */
+		best_idx1 = pairs[0].idx1
+
+		best_idx2 = pairs[0].idx2
+		histogramAddHistogramCommand(&out[best_idx1], &out[best_idx2])
+		out[best_idx1].bit_cost_ = pairs[0].cost_combo
+		cluster_size[best_idx1] += cluster_size[best_idx2]
+		for i = 0; i < symbols_size; i++ {
+			if symbols[i] == best_idx2 {
+				symbols[i] = best_idx1
+			}
+		}
+
+		for i = 0; i < num_clusters; i++ {
+			if clusters[i] == best_idx2 {
+				copy(clusters[i:], clusters[i+1:][:num_clusters-i-1])
+				break
+			}
+		}
+
+		num_clusters--
+		{
+			/* Remove pairs intersecting the just combined best pair. */
+			var copy_to_idx uint = 0
+			for i = 0; i < num_pairs; i++ {
+				var p *histogramPair = &pairs[i]
+				if p.idx1 == best_idx1 || p.idx2 == best_idx1 || p.idx1 == best_idx2 || p.idx2 == best_idx2 {
+					/* Remove invalid pair from the queue. */
+					continue
+				}
+
+				if histogramPairIsLess(&pairs[0], p) {
+					/* Replace the top of the queue if needed. */
+					var front histogramPair = pairs[0]
+					pairs[0] = *p
+					pairs[copy_to_idx] = front
+				} else {
+					pairs[copy_to_idx] = *p
+				}
+
+				copy_to_idx++
+			}
+
+			num_pairs = copy_to_idx
+		}
+
+		/* Push new pairs formed with the combined histogram to the heap. */
+		for i = 0; i < num_clusters; i++ {
+			compareAndPushToQueueCommand(out, cluster_size, best_idx1, clusters[i], max_num_pairs, pairs[0:], &num_pairs)
+		}
+	}
+
+	return num_clusters
+}
+
+/* What is the bit cost of moving histogram from cur_symbol to candidate. */
+func histogramBitCostDistanceCommand(histogram *histogramCommand, candidate *histogramCommand) float64 {
+	if histogram.total_count_ == 0 {
+		return 0.0
+	} else {
+		var tmp histogramCommand = *histogram
+		histogramAddHistogramCommand(&tmp, candidate)
+		return populationCostCommand(&tmp) - candidate.bit_cost_
+	}
+}
@@ -0,0 +1,326 @@
+package brotli
+
+import "math"
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
+   it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */
+func compareAndPushToQueueDistance(out []histogramDistance, cluster_size []uint32, idx1 uint32, idx2 uint32, max_num_pairs uint, pairs []histogramPair, num_pairs *uint) {
+	var is_good_pair bool = false
+	var p histogramPair
+	p.idx2 = 0
+	p.idx1 = p.idx2
+	p.cost_combo = 0
+	p.cost_diff = p.cost_combo
+	if idx1 == idx2 {
+		return
+	}
+
+	if idx2 < idx1 {
+		var t uint32 = idx2
+		idx2 = idx1
+		idx1 = t
+	}
+
+	p.idx1 = idx1
+	p.idx2 = idx2
+	p.cost_diff = 0.5 * clusterCostDiff(uint(cluster_size[idx1]), uint(cluster_size[idx2]))
+	p.cost_diff -= out[idx1].bit_cost_
+	p.cost_diff -= out[idx2].bit_cost_
+
+	if out[idx1].total_count_ == 0 {
+		p.cost_combo = out[idx2].bit_cost_
+		is_good_pair = true
+	} else if out[idx2].total_count_ == 0 {
+		p.cost_combo = out[idx1].bit_cost_
+		is_good_pair = true
+	} else {
+		var threshold float64
+		if *num_pairs == 0 {
+			threshold = 1e99
+		} else {
+			threshold = brotli_max_double(0.0, pairs[0].cost_diff)
+		}
+		var combo histogramDistance = out[idx1]
+		var cost_combo float64
+		histogramAddHistogramDistance(&combo, &out[idx2])
+		cost_combo = populationCostDistance(&combo)
+		if cost_combo < threshold-p.cost_diff {
+			p.cost_combo = cost_combo
+			is_good_pair = true
+		}
+	}
+
+	if is_good_pair {
+		p.cost_diff += p.cost_combo
+		if *num_pairs > 0 && histogramPairIsLess(&pairs[0], &p) {
+			/* Replace the top of the queue if needed. */
+			if *num_pairs < max_num_pairs {
+				pairs[*num_pairs] = pairs[0]
+				(*num_pairs)++
+			}
+
+			pairs[0] = p
+		} else if *num_pairs < max_num_pairs {
+			pairs[*num_pairs] = p
+			(*num_pairs)++
+		}
+	}
+}
+
+func histogramCombineDistance(out []histogramDistance, cluster_size []uint32, symbols []uint32, clusters []uint32, pairs []histogramPair, num_clusters uint, symbols_size uint, max_clusters uint, max_num_pairs uint) uint {
+	var cost_diff_threshold float64 = 0.0
+	var min_cluster_size uint = 1
+	var num_pairs uint = 0
+	{
+		/* We maintain a vector of histogram pairs, with the property that the pair
+		   with the maximum bit cost reduction is the first. */
+		var idx1 uint
+		for idx1 = 0; idx1 < num_clusters; idx1++ {
+			var idx2 uint
+			for idx2 = idx1 + 1; idx2 < num_clusters; idx2++ {
+				compareAndPushToQueueDistance(out, cluster_size, clusters[idx1], clusters[idx2], max_num_pairs, pairs[0:], &num_pairs)
+			}
+		}
+	}
+
+	for num_clusters > min_cluster_size {
+		var best_idx1 uint32
+		var best_idx2 uint32
+		var i uint
+		if pairs[0].cost_diff >= cost_diff_threshold {
+			cost_diff_threshold = 1e99
+			min_cluster_size = max_clusters
+			continue
+		}
+
+		/* Take the best pair from the top of heap. */
+		best_idx1 = pairs[0].idx1
+
+		best_idx2 = pairs[0].idx2
+		histogramAddHistogramDistance(&out[best_idx1], &out[best_idx2])
+		out[best_idx1].bit_cost_ = pairs[0].cost_combo
+		cluster_size[best_idx1] += cluster_size[best_idx2]
+		for i = 0; i < symbols_size; i++ {
+			if symbols[i] == best_idx2 {
+				symbols[i] = best_idx1
+			}
+		}
+
+		for i = 0; i < num_clusters; i++ {
+			if clusters[i] == best_idx2 {
+				copy(clusters[i:], clusters[i+1:][:num_clusters-i-1])
+				break
+			}
+		}
+
+		num_clusters--
+		{
+			/* Remove pairs intersecting the just combined best pair. */
+			var copy_to_idx uint = 0
+			for i = 0; i < num_pairs; i++ {
+				var p *histogramPair = &pairs[i]
+				if p.idx1 == best_idx1 || p.idx2 == best_idx1 || p.idx1 == best_idx2 || p.idx2 == best_idx2 {
+					/* Remove invalid pair from the queue. */
+					continue
+				}
+
+				if histogramPairIsLess(&pairs[0], p) {
+					/* Replace the top of the queue if needed. */
+					var front histogramPair = pairs[0]
+					pairs[0] = *p
+					pairs[copy_to_idx] = front
+				} else {
+					pairs[copy_to_idx] = *p
+				}
+
+				copy_to_idx++
+			}
+
+			num_pairs = copy_to_idx
+		}
+
+		/* Push new pairs formed with the combined histogram to the heap. */
+		for i = 0; i < num_clusters; i++ {
+			compareAndPushToQueueDistance(out, cluster_size, best_idx1, clusters[i], max_num_pairs, pairs[0:], &num_pairs)
+		}
+	}
+
+	return num_clusters
+}
+
+/* What is the bit cost of moving histogram from cur_symbol to candidate. */
+func histogramBitCostDistanceDistance(histogram *histogramDistance, candidate *histogramDistance) float64 {
+	if histogram.total_count_ == 0 {
+		return 0.0
+	} else {
+		var tmp histogramDistance = *histogram
+		histogramAddHistogramDistance(&tmp, candidate)
+		return populationCostDistance(&tmp) - candidate.bit_cost_
+	}
+}
+
+/* Find the best 'out' histogram for each of the 'in' histograms.
+   When called, clusters[0..num_clusters) contains the unique values from
+   symbols[0..in_size), but this property is not preserved in this function.
+   Note: we assume that out[]->bit_cost_ is already up-to-date. */
+func histogramRemapDistance(in []histogramDistance, in_size uint, clusters []uint32, num_clusters uint, out []histogramDistance, symbols []uint32) {
+	var i uint
+	for i = 0; i < in_size; i++ {
+		var best_out uint32
+		if i == 0 {
+			best_out = symbols[0]
+		} else {
+			best_out = symbols[i-1]
+		}
+		var best_bits float64 = histogramBitCostDistanceDistance(&in[i], &out[best_out])
+		var j uint
+		for j = 0; j < num_clusters; j++ {
+			var cur_bits float64 = histogramBitCostDistanceDistance(&in[i], &out[clusters[j]])
+			if cur_bits < best_bits {
+				best_bits = cur_bits
+				best_out = clusters[j]
+			}
+		}
+
+		symbols[i] = best_out
+	}
+
+	/* Recompute each out based on raw and symbols. */
+	for i = 0; i < num_clusters; i++ {
+		histogramClearDistance(&out[clusters[i]])
+	}
+
+	for i = 0; i < in_size; i++ {
+		histogramAddHistogramDistance(&out[symbols[i]], &in[i])
+	}
+}
+
+/* Reorders elements of the out[0..length) array and changes values in
+   symbols[0..length) array in the following way:
+     * when called, symbols[] contains indexes into out[], and has N unique
+       values (possibly N < length)
+     * on return, symbols'[i] = f(symbols[i]) and
+                  out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
+       where f is a bijection between the range of symbols[] and [0..N), and
+       the first occurrences of values in symbols'[i] come in consecutive
+       increasing order.
+   Returns N, the number of unique values in symbols[]. */
+
+var histogramReindexDistance_kInvalidIndex uint32 = math.MaxUint32
+
+func histogramReindexDistance(out []histogramDistance, symbols []uint32, length uint) uint {
+	var new_index []uint32 = make([]uint32, length)
+	var next_index uint32
+	var tmp []histogramDistance
+	var i uint
+	for i = 0; i < length; i++ {
+		new_index[i] = histogramReindexDistance_kInvalidIndex
+	}
+
+	next_index = 0
+	for i = 0; i < length; i++ {
+		if new_index[symbols[i]] == histogramReindexDistance_kInvalidIndex {
+			new_index[symbols[i]] = next_index
+			next_index++
+		}
+	}
+
+	/* TODO: by using idea of "cycle-sort" we can avoid allocation of
+	   tmp and reduce the number of copying by the factor of 2. */
+	tmp = make([]histogramDistance, next_index)
+
+	next_index = 0
+	for i = 0; i < length; i++ {
+		if new_index[symbols[i]] == next_index {
+			tmp[next_index] = out[symbols[i]]
+			next_index++
+		}
+
+		symbols[i] = new_index[symbols[i]]
+	}
+
+	new_index = nil
+	for i = 0; uint32(i) < next_index; i++ {
+		out[i] = tmp[i]
+	}
+
+	tmp = nil
+	return uint(next_index)
+}
+
+func clusterHistogramsDistance(in []histogramDistance, in_size uint, max_histograms uint, out []histogramDistance, out_size *uint, histogram_symbols []uint32) {
+	var cluster_size []uint32 = make([]uint32, in_size)
+	var clusters []uint32 = make([]uint32, in_size)
+	var num_clusters uint = 0
+	var max_input_histograms uint = 64
+	var pairs_capacity uint = max_input_histograms * max_input_histograms / 2
+	var pairs []histogramPair = make([]histogramPair, (pairs_capacity + 1))
+	var i uint
+
+	/* For the first pass of clustering, we allow all pairs. */
+	for i = 0; i < in_size; i++ {
+		cluster_size[i] = 1
+	}
+
+	for i = 0; i < in_size; i++ {
+		out[i] = in[i]
+		out[i].bit_cost_ = populationCostDistance(&in[i])
+		histogram_symbols[i] = uint32(i)
+	}
+
+	for i = 0; i < in_size; i += max_input_histograms {
+		var num_to_combine uint = brotli_min_size_t(in_size-i, max_input_histograms)
+		var num_new_clusters uint
+		var j uint
+		for j = 0; j < num_to_combine; j++ {
+			clusters[num_clusters+j] = uint32(i + j)
+		}
+
+		num_new_clusters = histogramCombineDistance(out, cluster_size, histogram_symbols[i:], clusters[num_clusters:], pairs, num_to_combine, num_to_combine, max_histograms, pairs_capacity)
+		num_clusters += num_new_clusters
+	}
+	{
+		/* For the second pass, we limit the total number of histogram pairs.
+		   After this limit is reached, we only keep searching for the best pair. */
+		var max_num_pairs uint = brotli_min_size_t(64*num_clusters, (num_clusters/2)*num_clusters)
+		if pairs_capacity < (max_num_pairs + 1) {
+			var _new_size uint
+			if pairs_capacity == 0 {
+				_new_size = max_num_pairs + 1
+			} else {
+				_new_size = pairs_capacity
+			}
+			var new_array []histogramPair
+			for _new_size < (max_num_pairs + 1) {
+				_new_size *= 2
+			}
+			new_array = make([]histogramPair, _new_size)
+			if pairs_capacity != 0 {
+				copy(new_array, pairs[:pairs_capacity])
+			}
+
+			pairs = new_array
+			pairs_capacity = _new_size
+		}
+
+		/* Collapse similar histograms. */
+		num_clusters = histogramCombineDistance(out, cluster_size, histogram_symbols, clusters, pairs, num_clusters, in_size, max_histograms, max_num_pairs)
+	}
+
+	pairs = nil
+	cluster_size = nil
+
+	/* Find the optimal map from original histograms to the final ones. */
+	histogramRemapDistance(in, in_size, clusters, num_clusters, out, histogram_symbols)
+
+	clusters = nil
+
+	/* Convert the context map to a canonical form. */
+	*out_size = histogramReindexDistance(out, histogram_symbols, in_size)
+}
@@ -0,0 +1,326 @@
+package brotli
+
+import "math"
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
+   it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */
+func compareAndPushToQueueLiteral(out []histogramLiteral, cluster_size []uint32, idx1 uint32, idx2 uint32, max_num_pairs uint, pairs []histogramPair, num_pairs *uint) {
+	var is_good_pair bool = false
+	var p histogramPair
+	p.idx2 = 0
+	p.idx1 = p.idx2
+	p.cost_combo = 0
+	p.cost_diff = p.cost_combo
+	if idx1 == idx2 {
+		return
+	}
+
+	if idx2 < idx1 {
+		var t uint32 = idx2
+		idx2 = idx1
+		idx1 = t
+	}
+
+	p.idx1 = idx1
+	p.idx2 = idx2
+	p.cost_diff = 0.5 * clusterCostDiff(uint(cluster_size[idx1]), uint(cluster_size[idx2]))
+	p.cost_diff -= out[idx1].bit_cost_
+	p.cost_diff -= out[idx2].bit_cost_
+
+	if out[idx1].total_count_ == 0 {
+		p.cost_combo = out[idx2].bit_cost_
+		is_good_pair = true
+	} else if out[idx2].total_count_ == 0 {
+		p.cost_combo = out[idx1].bit_cost_
+		is_good_pair = true
+	} else {
+		var threshold float64
+		if *num_pairs == 0 {
+			threshold = 1e99
+		} else {
+			threshold = brotli_max_double(0.0, pairs[0].cost_diff)
+		}
+		var combo histogramLiteral = out[idx1]
+		var cost_combo float64
+		histogramAddHistogramLiteral(&combo, &out[idx2])
+		cost_combo = populationCostLiteral(&combo)
+		if cost_combo < threshold-p.cost_diff {
+			p.cost_combo = cost_combo
+			is_good_pair = true
+		}
+	}
+
+	if is_good_pair {
+		p.cost_diff += p.cost_combo
+		if *num_pairs > 0 && histogramPairIsLess(&pairs[0], &p) {
+			/* Replace the top of the queue if needed. */
+			if *num_pairs < max_num_pairs {
+				pairs[*num_pairs] = pairs[0]
+				(*num_pairs)++
+			}
+
+			pairs[0] = p
+		} else if *num_pairs < max_num_pairs {
+			pairs[*num_pairs] = p
+			(*num_pairs)++
+		}
+	}
+}
+
+func histogramCombineLiteral(out []histogramLiteral, cluster_size []uint32, symbols []uint32, clusters []uint32, pairs []histogramPair, num_clusters uint, symbols_size uint, max_clusters uint, max_num_pairs uint) uint {
+	var cost_diff_threshold float64 = 0.0
+	var min_cluster_size uint = 1
+	var num_pairs uint = 0
+	{
+		/* We maintain a vector of histogram pairs, with the property that the pair
+		   with the maximum bit cost reduction is the first. */
+		var idx1 uint
+		for idx1 = 0; idx1 < num_clusters; idx1++ {
+			var idx2 uint
+			for idx2 = idx1 + 1; idx2 < num_clusters; idx2++ {
+				compareAndPushToQueueLiteral(out, cluster_size, clusters[idx1], clusters[idx2], max_num_pairs, pairs[0:], &num_pairs)
+			}
+		}
+	}
+
+	for num_clusters > min_cluster_size {
+		var best_idx1 uint32
+		var best_idx2 uint32
+		var i uint
+		if pairs[0].cost_diff >= cost_diff_threshold {
+			cost_diff_threshold = 1e99
+			min_cluster_size = max_clusters
+			continue
+		}
+
+		/* Take the best pair from the top of heap. */
+		best_idx1 = pairs[0].idx1
+
+		best_idx2 = pairs[0].idx2
+		histogramAddHistogramLiteral(&out[best_idx1], &out[best_idx2])
+		out[best_idx1].bit_cost_ = pairs[0].cost_combo
+		cluster_size[best_idx1] += cluster_size[best_idx2]
+		for i = 0; i < symbols_size; i++ {
+			if symbols[i] == best_idx2 {
+				symbols[i] = best_idx1
+			}
+		}
+
+		for i = 0; i < num_clusters; i++ {
+			if clusters[i] == best_idx2 {
+				copy(clusters[i:], clusters[i+1:][:num_clusters-i-1])
+				break
+			}
+		}
+
+		num_clusters--
+		{
+			/* Remove pairs intersecting the just combined best pair. */
+			var copy_to_idx uint = 0
+			for i = 0; i < num_pairs; i++ {
+				var p *histogramPair = &pairs[i]
+				if p.idx1 == best_idx1 || p.idx2 == best_idx1 || p.idx1 == best_idx2 || p.idx2 == best_idx2 {
+					/* Remove invalid pair from the queue. */
+					continue
+				}
+
+				if histogramPairIsLess(&pairs[0], p) {
+					/* Replace the top of the queue if needed. */
+					var front histogramPair = pairs[0]
+					pairs[0] = *p
+					pairs[copy_to_idx] = front
+				} else {
+					pairs[copy_to_idx] = *p
+				}
+
+				copy_to_idx++
+			}
+
+			num_pairs = copy_to_idx
+		}
+
+		/* Push new pairs formed with the combined histogram to the heap. */
+		for i = 0; i < num_clusters; i++ {
+			compareAndPushToQueueLiteral(out, cluster_size, best_idx1, clusters[i], max_num_pairs, pairs[0:], &num_pairs)
+		}
+	}
+
+	return num_clusters
+}
+
+/* What is the bit cost of moving histogram from cur_symbol to candidate. */
+func histogramBitCostDistanceLiteral(histogram *histogramLiteral, candidate *histogramLiteral) float64 {
+	if histogram.total_count_ == 0 {
+		return 0.0
+	} else {
+		var tmp histogramLiteral = *histogram
+		histogramAddHistogramLiteral(&tmp, candidate)
+		return populationCostLiteral(&tmp) - candidate.bit_cost_
+	}
+}
+
+/* Find the best 'out' histogram for each of the 'in' histograms.
+   When called, clusters[0..num_clusters) contains the unique values from
+   symbols[0..in_size), but this property is not preserved in this function.
+   Note: we assume that out[]->bit_cost_ is already up-to-date. */
+func histogramRemapLiteral(in []histogramLiteral, in_size uint, clusters []uint32, num_clusters uint, out []histogramLiteral, symbols []uint32) {
+	var i uint
+	for i = 0; i < in_size; i++ {
+		var best_out uint32
+		if i == 0 {
+			best_out = symbols[0]
+		} else {
+			best_out = symbols[i-1]
+		}
+		var best_bits float64 = histogramBitCostDistanceLiteral(&in[i], &out[best_out])
+		var j uint
+		for j = 0; j < num_clusters; j++ {
+			var cur_bits float64 = histogramBitCostDistanceLiteral(&in[i], &out[clusters[j]])
+			if cur_bits < best_bits {
+				best_bits = cur_bits
+				best_out = clusters[j]
+			}
+		}
+
+		symbols[i] = best_out
+	}
+
+	/* Recompute each out based on raw and symbols. */
+	for i = 0; i < num_clusters; i++ {
+		histogramClearLiteral(&out[clusters[i]])
+	}
+
+	for i = 0; i < in_size; i++ {
+		histogramAddHistogramLiteral(&out[symbols[i]], &in[i])
+	}
+}
+
+/* Reorders elements of the out[0..length) array and changes values in
+   symbols[0..length) array in the following way:
+     * when called, symbols[] contains indexes into out[], and has N unique
+       values (possibly N < length)
+     * on return, symbols'[i] = f(symbols[i]) and
+                  out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
+       where f is a bijection between the range of symbols[] and [0..N), and
+       the first occurrences of values in symbols'[i] come in consecutive
+       increasing order.
+   Returns N, the number of unique values in symbols[]. */
+
+var histogramReindexLiteral_kInvalidIndex uint32 = math.MaxUint32
+
+func histogramReindexLiteral(out []histogramLiteral, symbols []uint32, length uint) uint {
+	var new_index []uint32 = make([]uint32, length)
+	var next_index uint32
+	var tmp []histogramLiteral
+	var i uint
+	for i = 0; i < length; i++ {
+		new_index[i] = histogramReindexLiteral_kInvalidIndex
+	}
+
+	next_index = 0
+	for i = 0; i < length; i++ {
+		if new_index[symbols[i]] == histogramReindexLiteral_kInvalidIndex {
+			new_index[symbols[i]] = next_index
+			next_index++
+		}
+	}
+
+	/* TODO: by using idea of "cycle-sort" we can avoid allocation of
+	   tmp and reduce the number of copying by the factor of 2. */
+	tmp = make([]histogramLiteral, next_index)
+
+	next_index = 0
+	for i = 0; i < length; i++ {
+		if new_index[symbols[i]] == next_index {
+			tmp[next_index] = out[symbols[i]]
+			next_index++
+		}
+
+		symbols[i] = new_index[symbols[i]]
+	}
+
+	new_index = nil
+	for i = 0; uint32(i) < next_index; i++ {
+		out[i] = tmp[i]
+	}
+
+	tmp = nil
+	return uint(next_index)
+}
+
+func clusterHistogramsLiteral(in []histogramLiteral, in_size uint, max_histograms uint, out []histogramLiteral, out_size *uint, histogram_symbols []uint32) {
+	var cluster_size []uint32 = make([]uint32, in_size)
+	var clusters []uint32 = make([]uint32, in_size)
+	var num_clusters uint = 0
+	var max_input_histograms uint = 64
+	var pairs_capacity uint = max_input_histograms * max_input_histograms / 2
+	var pairs []histogramPair = make([]histogramPair, (pairs_capacity + 1))
+	var i uint
+
+	/* For the first pass of clustering, we allow all pairs. */
+	for i = 0; i < in_size; i++ {
+		cluster_size[i] = 1
+	}
+
+	for i = 0; i < in_size; i++ {
+		out[i] = in[i]
+		out[i].bit_cost_ = populationCostLiteral(&in[i])
+		histogram_symbols[i] = uint32(i)
+	}
+
+	for i = 0; i < in_size; i += max_input_histograms {
+		var num_to_combine uint = brotli_min_size_t(in_size-i, max_input_histograms)
+		var num_new_clusters uint
+		var j uint
+		for j = 0; j < num_to_combine; j++ {
+			clusters[num_clusters+j] = uint32(i + j)
+		}
+
+		num_new_clusters = histogramCombineLiteral(out, cluster_size, histogram_symbols[i:], clusters[num_clusters:], pairs, num_to_combine, num_to_combine, max_histograms, pairs_capacity)
+		num_clusters += num_new_clusters
+	}
+	{
+		/* For the second pass, we limit the total number of histogram pairs.
+		   After this limit is reached, we only keep searching for the best pair. */
+		var max_num_pairs uint = brotli_min_size_t(64*num_clusters, (num_clusters/2)*num_clusters)
+		if pairs_capacity < (max_num_pairs + 1) {
+			var _new_size uint
+			if pairs_capacity == 0 {
+				_new_size = max_num_pairs + 1
+			} else {
+				_new_size = pairs_capacity
+			}
+			var new_array []histogramPair
+			for _new_size < (max_num_pairs + 1) {
+				_new_size *= 2
+			}
+			new_array = make([]histogramPair, _new_size)
+			if pairs_capacity != 0 {
+				copy(new_array, pairs[:pairs_capacity])
+			}
+
+			pairs = new_array
+			pairs_capacity = _new_size
+		}
+
+		/* Collapse similar histograms. */
+		num_clusters = histogramCombineLiteral(out, cluster_size, histogram_symbols, clusters, pairs, num_clusters, in_size, max_histograms, max_num_pairs)
+	}
+
+	pairs = nil
+	cluster_size = nil
+
+	/* Find the optimal map from original histograms to the final ones. */
+	histogramRemapLiteral(in, in_size, clusters, num_clusters, out, histogram_symbols)
+
+	clusters = nil
+
+	/* Convert the context map to a canonical form. */
+	*out_size = histogramReindexLiteral(out, histogram_symbols, in_size)
+}
@@ -0,0 +1,254 @@
+package brotli
+
+var kInsBase = []uint32{
+	0,
+	1,
+	2,
+	3,
+	4,
+	5,
+	6,
+	8,
+	10,
+	14,
+	18,
+	26,
+	34,
+	50,
+	66,
+	98,
+	130,
+	194,
+	322,
+	578,
+	1090,
+	2114,
+	6210,
+	22594,
+}
+
+var kInsExtra = []uint32{
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	1,
+	1,
+	2,
+	2,
+	3,
+	3,
+	4,
+	4,
+	5,
+	5,
+	6,
+	7,
+	8,
+	9,
+	10,
+	12,
+	14,
+	24,
+}
+
+var kCopyBase = []uint32{
+	2,
+	3,
+	4,
+	5,
+	6,
+	7,
+	8,
+	9,
+	10,
+	12,
+	14,
+	18,
+	22,
+	30,
+	38,
+	54,
+	70,
+	102,
+	134,
+	198,
+	326,
+	582,
+	1094,
+	2118,
+}
+
+var kCopyExtra = []uint32{
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	1,
+	1,
+	2,
+	2,
+	3,
+	3,
+	4,
+	4,
+	5,
+	5,
+	6,
+	7,
+	8,
+	9,
+	10,
+	24,
+}
+
+func getInsertLengthCode(insertlen uint) uint16 {
+	if insertlen < 6 {
+		return uint16(insertlen)
+	} else if insertlen < 130 {
+		var nbits uint32 = log2FloorNonZero(insertlen-2) - 1
+		return uint16((nbits << 1) + uint32((insertlen-2)>>nbits) + 2)
+	} else if insertlen < 2114 {
+		return uint16(log2FloorNonZero(insertlen-66) + 10)
+	} else if insertlen < 6210 {
+		return 21
+	} else if insertlen < 22594 {
+		return 22
+	} else {
+		return 23
+	}
+}
+
+func getCopyLengthCode(copylen uint) uint16 {
+	if copylen < 10 {
+		return uint16(copylen - 2)
+	} else if copylen < 134 {
+		var nbits uint32 = log2FloorNonZero(copylen-6) - 1
+		return uint16((nbits << 1) + uint32((copylen-6)>>nbits) + 4)
+	} else if copylen < 2118 {
+		return uint16(log2FloorNonZero(copylen-70) + 12)
+	} else {
+		return 23
+	}
+}
+
+func combineLengthCodes(inscode uint16, copycode uint16, use_last_distance bool) uint16 {
+	var bits64 uint16 = uint16(copycode&0x7 | (inscode&0x7)<<3)
+	if use_last_distance && inscode < 8 && copycode < 16 {
+		if copycode < 8 {
+			return bits64
+		} else {
+			return bits64 | 64
+		}
+	} else {
+		/* Specification: 5 Encoding of ... (last table) */
+		/* offset = 2 * index, where index is in range [0..8] */
+		var offset uint32 = 2 * ((uint32(copycode) >> 3) + 3*(uint32(inscode)>>3))
+
+		/* All values in specification are K * 64,
+		   where   K = [2, 3, 6, 4, 5, 8, 7, 9, 10],
+		       i + 1 = [1, 2, 3, 4, 5, 6, 7, 8,  9],
+		   K - i - 1 = [1, 1, 3, 0, 0, 2, 0, 1,  2] = D.
+		   All values in D require only 2 bits to encode.
+		   Magic constant is shifted 6 bits left, to avoid final multiplication. */
+		offset = (offset << 5) + 0x40 + ((0x520D40 >> offset) & 0xC0)
+
+		return uint16(offset | uint32(bits64))
+	}
+}
+
+func getLengthCode(insertlen uint, copylen uint, use_last_distance bool, code *uint16) {
+	var inscode uint16 = getInsertLengthCode(insertlen)
+	var copycode uint16 = getCopyLengthCode(copylen)
+	*code = combineLengthCodes(inscode, copycode, use_last_distance)
+}
+
+func getInsertBase(inscode uint16) uint32 {
+	return kInsBase[inscode]
+}
+
+func getInsertExtra(inscode uint16) uint32 {
+	return kInsExtra[inscode]
+}
+
+func getCopyBase(copycode uint16) uint32 {
+	return kCopyBase[copycode]
+}
+
+func getCopyExtra(copycode uint16) uint32 {
+	return kCopyExtra[copycode]
+}
+
+type command struct {
+	insert_len_  uint32
+	copy_len_    uint32
+	dist_extra_  uint32
+	cmd_prefix_  uint16
+	dist_prefix_ uint16
+}
+
+/* distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1. */
+func makeCommand(dist *distanceParams, insertlen uint, copylen uint, copylen_code_delta int, distance_code uint) (cmd command) {
+	/* Don't rely on signed int representation, use honest casts. */
+	var delta uint32 = uint32(byte(int8(copylen_code_delta)))
+	cmd.insert_len_ = uint32(insertlen)
+	cmd.copy_len_ = uint32(uint32(copylen) | delta<<25)
+
+	/* The distance prefix and extra bits are stored in this Command as if
+	   npostfix and ndirect were 0, they are only recomputed later after the
+	   clustering if needed. */
+	prefixEncodeCopyDistance(distance_code, uint(dist.num_direct_distance_codes), uint(dist.distance_postfix_bits), &cmd.dist_prefix_, &cmd.dist_extra_)
+	getLengthCode(insertlen, uint(int(copylen)+copylen_code_delta), (cmd.dist_prefix_&0x3FF == 0), &cmd.cmd_prefix_)
+
+	return cmd
+}
+
+func makeInsertCommand(insertlen uint) (cmd command) {
+	cmd.insert_len_ = uint32(insertlen)
+	cmd.copy_len_ = 4 << 25
+	cmd.dist_extra_ = 0
+	cmd.dist_prefix_ = numDistanceShortCodes
+	getLengthCode(insertlen, 4, false, &cmd.cmd_prefix_)
+	return cmd
+}
+
+func commandRestoreDistanceCode(self *command, dist *distanceParams) uint32 {
+	if uint32(self.dist_prefix_&0x3FF) < numDistanceShortCodes+dist.num_direct_distance_codes {
+		return uint32(self.dist_prefix_) & 0x3FF
+	} else {
+		var dcode uint32 = uint32(self.dist_prefix_) & 0x3FF
+		var nbits uint32 = uint32(self.dist_prefix_) >> 10
+		var extra uint32 = self.dist_extra_
+		var postfix_mask uint32 = (1 << dist.distance_postfix_bits) - 1
+		var hcode uint32 = (dcode - dist.num_direct_distance_codes - numDistanceShortCodes) >> dist.distance_postfix_bits
+		var lcode uint32 = (dcode - dist.num_direct_distance_codes - numDistanceShortCodes) & postfix_mask
+		var offset uint32 = ((2 + (hcode & 1)) << nbits) - 4
+		return ((offset + extra) << dist.distance_postfix_bits) + lcode + dist.num_direct_distance_codes + numDistanceShortCodes
+	}
+}
+
+func commandDistanceContext(self *command) uint32 {
+	var r uint32 = uint32(self.cmd_prefix_) >> 6
+	var c uint32 = uint32(self.cmd_prefix_) & 7
+	if (r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2) {
+		return c
+	}
+
+	return 3
+}
+
+func commandCopyLen(self *command) uint32 {
+	return self.copy_len_ & 0x1FFFFFF
+}
+
+func commandCopyLenCode(self *command) uint32 {
+	var modifier uint32 = self.copy_len_ >> 25
+	var delta int32 = int32(int8(byte(modifier | (modifier&0x40)<<1)))
+	return uint32(int32(self.copy_len_&0x1FFFFFF) + delta)
+}
@@ -0,0 +1,834 @@
+package brotli
+
+import "encoding/binary"
+
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function for fast encoding of an input fragment, independently from the input
+   history. This function uses one-pass processing: when we find a backward
+   match, we immediately emit the corresponding command and literal codes to
+   the bit stream.
+
+   Adapted from the CompressFragment() function in
+   https://github.com/google/snappy/blob/master/snappy.cc */
+
+const maxDistance_compress_fragment = 262128
+
+func hash5(p []byte, shift uint) uint32 {
+	var h uint64 = (binary.LittleEndian.Uint64(p) << 24) * uint64(kHashMul32)
+	return uint32(h >> shift)
+}
+
+func hashBytesAtOffset5(v uint64, offset int, shift uint) uint32 {
+	assert(offset >= 0)
+	assert(offset <= 3)
+	{
+		var h uint64 = ((v >> uint(8*offset)) << 24) * uint64(kHashMul32)
+		return uint32(h >> shift)
+	}
+}
+
+func isMatch5(p1 []byte, p2 []byte) bool {
+	return binary.LittleEndian.Uint32(p1) == binary.LittleEndian.Uint32(p2) &&
+		p1[4] == p2[4]
+}
+
+/* Builds a literal prefix code into "depths" and "bits" based on the statistics
+   of the "input" string and stores it into the bit stream.
+   Note that the prefix code here is built from the pre-LZ77 input, therefore
+   we can only approximate the statistics of the actual literal stream.
+   Moreover, for long inputs we build a histogram from a sample of the input
+   and thus have to assign a non-zero depth for each literal.
+   Returns estimated compression ratio millibytes/char for encoding given input
+   with generated code. */
+func buildAndStoreLiteralPrefixCode(input []byte, input_size uint, depths []byte, bits []uint16, storage_ix *uint, storage []byte) uint {
+	var histogram = [256]uint32{0}
+	var histogram_total uint
+	var i uint
+	if input_size < 1<<15 {
+		for i = 0; i < input_size; i++ {
+			histogram[input[i]]++
+		}
+
+		histogram_total = input_size
+		for i = 0; i < 256; i++ {
+			/* We weigh the first 11 samples with weight 3 to account for the
+			   balancing effect of the LZ77 phase on the histogram. */
+			var adjust uint32 = 2 * brotli_min_uint32_t(histogram[i], 11)
+			histogram[i] += adjust
+			histogram_total += uint(adjust)
+		}
+	} else {
+		const kSampleRate uint = 29
+		for i = 0; i < input_size; i += kSampleRate {
+			histogram[input[i]]++
+		}
+
+		histogram_total = (input_size + kSampleRate - 1) / kSampleRate
+		for i = 0; i < 256; i++ {
+			/* We add 1 to each population count to avoid 0 bit depths (since this is
+			   only a sample and we don't know if the symbol appears or not), and we
+			   weigh the first 11 samples with weight 3 to account for the balancing
+			   effect of the LZ77 phase on the histogram (more frequent symbols are
+			   more likely to be in backward references instead as literals). */
+			var adjust uint32 = 1 + 2*brotli_min_uint32_t(histogram[i], 11)
+			histogram[i] += adjust
+			histogram_total += uint(adjust)
+		}
+	}
+
+	buildAndStoreHuffmanTreeFast(histogram[:], histogram_total, /* max_bits = */
+		8, depths, bits, storage_ix, storage)
+	{
+		var literal_ratio uint = 0
+		for i = 0; i < 256; i++ {
+			if histogram[i] != 0 {
+				literal_ratio += uint(histogram[i] * uint32(depths[i]))
+			}
+		}
+
+		/* Estimated encoding ratio, millibytes per symbol. */
+		return (literal_ratio * 125) / histogram_total
+	}
+}
+
+/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
+   "bits" based on "histogram" and stores it into the bit stream. */
+func buildAndStoreCommandPrefixCode1(histogram []uint32, depth []byte, bits []uint16, storage_ix *uint, storage []byte) {
+	var tree [129]huffmanTree
+	var cmd_depth = [numCommandSymbols]byte{0}
+	/* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
+
+	var cmd_bits [64]uint16
+
+	createHuffmanTree(histogram, 64, 15, tree[:], depth)
+	createHuffmanTree(histogram[64:], 64, 14, tree[:], depth[64:])
+
+	/* We have to jump through a few hoops here in order to compute
+	   the command bits because the symbols are in a different order than in
+	   the full alphabet. This looks complicated, but having the symbols
+	   in this order in the command bits saves a few branches in the Emit*
+	   functions. */
+	copy(cmd_depth[:], depth[:24])
+
+	copy(cmd_depth[24:][:], depth[40:][:8])
+	copy(cmd_depth[32:][:], depth[24:][:8])
+	copy(cmd_depth[40:][:], depth[48:][:8])
+	copy(cmd_depth[48:][:], depth[32:][:8])
+	copy(cmd_depth[56:][:], depth[56:][:8])
+	convertBitDepthsToSymbols(cmd_depth[:], 64, cmd_bits[:])
+	copy(bits, cmd_bits[:24])
+	copy(bits[24:], cmd_bits[32:][:8])
+	copy(bits[32:], cmd_bits[48:][:8])
+	copy(bits[40:], cmd_bits[24:][:8])
+	copy(bits[48:], cmd_bits[40:][:8])
+	copy(bits[56:], cmd_bits[56:][:8])
+	convertBitDepthsToSymbols(depth[64:], 64, bits[64:])
+	{
+		/* Create the bit length array for the full command alphabet. */
+		var i uint
+		for i := 0; i < int(64); i++ {
+			cmd_depth[i] = 0
+		} /* only 64 first values were used */
+		copy(cmd_depth[:], depth[:8])
+		copy(cmd_depth[64:][:], depth[8:][:8])
+		copy(cmd_depth[128:][:], depth[16:][:8])
+		copy(cmd_depth[192:][:], depth[24:][:8])
+		copy(cmd_depth[384:][:], depth[32:][:8])
+		for i = 0; i < 8; i++ {
+			cmd_depth[128+8*i] = depth[40+i]
+			cmd_depth[256+8*i] = depth[48+i]
+			cmd_depth[448+8*i] = depth[56+i]
+		}
+
+		storeHuffmanTree(cmd_depth[:], numCommandSymbols, tree[:], storage_ix, storage)
+	}
+
+	storeHuffmanTree(depth[64:], 64, tree[:], storage_ix, storage)
+}
+
+/* REQUIRES: insertlen < 6210 */
+func emitInsertLen1(insertlen uint, depth []byte, bits []uint16, histo []uint32, storage_ix *uint, storage []byte) {
+	if insertlen < 6 {
+		var code uint = insertlen + 40
+		writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
+		histo[code]++
+	} else if insertlen < 130 {
+		var tail uint = insertlen - 2
+		var nbits uint32 = log2FloorNonZero(tail) - 1
+		var prefix uint = tail >> nbits
+		var inscode uint = uint((nbits << 1) + uint32(prefix) + 42)
+		writeBits(uint(depth[inscode]), uint64(bits[inscode]), storage_ix, storage)
+		writeBits(uint(nbits), uint64(tail)-(uint64(prefix)<<nbits), storage_ix, storage)
+		histo[inscode]++
+	} else if insertlen < 2114 {
+		var tail uint = insertlen - 66
+		var nbits uint32 = log2FloorNonZero(tail)
+		var code uint = uint(nbits + 50)
+		writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
+		writeBits(uint(nbits), uint64(tail)-(uint64(uint(1))<<nbits), storage_ix, storage)
+		histo[code]++
+	} else {
+		writeBits(uint(depth[61]), uint64(bits[61]), storage_ix, storage)
+		writeBits(12, uint64(insertlen)-2114, storage_ix, storage)
+		histo[61]++
+	}
+}
+
+func emitLongInsertLen(insertlen uint, depth []byte, bits []uint16, histo []uint32, storage_ix *uint, storage []byte) {
+	if insertlen < 22594 {
+		writeBits(uint(depth[62]), uint64(bits[62]), storage_ix, storage)
+		writeBits(14, uint64(insertlen)-6210, storage_ix, storage)
+		histo[62]++
+	} else {
+		writeBits(uint(depth[63]), uint64(bits[63]), storage_ix, storage)
+		writeBits(24, uint64(insertlen)-22594, storage_ix, storage)
+		histo[63]++
+	}
+}
+
+func emitCopyLen1(copylen uint, depth []byte, bits []uint16, histo []uint32, storage_ix *uint, storage []byte) {
+	if copylen < 10 {
+		writeBits(uint(depth[copylen+14]), uint64(bits[copylen+14]), storage_ix, storage)
+		histo[copylen+14]++
+	} else if copylen < 134 {
+		var tail uint = copylen - 6
+		var nbits uint32 = log2FloorNonZero(tail) - 1
+		var prefix uint = tail >> nbits
+		var code uint = uint((nbits << 1) + uint32(prefix) + 20)
+		writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
+		writeBits(uint(nbits), uint64(tail)-(uint64(prefix)<<nbits), storage_ix, storage)
+		histo[code]++
+	} else if copylen < 2118 {
+		var tail uint = copylen - 70
+		var nbits uint32 = log2FloorNonZero(tail)
+		var code uint = uint(nbits + 28)
+		writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
+		writeBits(uint(nbits), uint64(tail)-(uint64(uint(1))<<nbits), storage_ix, storage)
+		histo[code]++
+	} else {
+		writeBits(uint(depth[39]), uint64(bits[39]), storage_ix, storage)
+		writeBits(24, uint64(copylen)-2118, storage_ix, storage)
+		histo[39]++
+	}
+}
+
+func emitCopyLenLastDistance1(copylen uint, depth []byte, bits []uint16, histo []uint32, storage_ix *uint, storage []byte) {
+	if copylen < 12 {
+		writeBits(uint(depth[copylen-4]), uint64(bits[copylen-4]), storage_ix, storage)
+		histo[copylen-4]++
+	} else if copylen < 72 {
+		var tail uint = copylen - 8
+		var nbits uint32 = log2FloorNonZero(tail) - 1
+		var prefix uint = tail >> nbits
+		var code uint = uint((nbits << 1) + uint32(prefix) + 4)
+		writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
+		writeBits(uint(nbits), uint64(tail)-(uint64(prefix)<<nbits), storage_ix, storage)
+		histo[code]++
+	} else if copylen < 136 {
+		var tail uint = copylen - 8
+		var code uint = (tail >> 5) + 30
+		writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
+		writeBits(5, uint64(tail)&31, storage_ix, storage)
+		writeBits(uint(depth[64]), uint64(bits[64]), storage_ix, storage)
+		histo[code]++
+		histo[64]++
+	} else if copylen < 2120 {
+		var tail uint = copylen - 72
+		var nbits uint32 = log2FloorNonZero(tail)
+		var code uint = uint(nbits + 28)
+		writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
+		writeBits(uint(nbits), uint64(tail)-(uint64(uint(1))<<nbits), storage_ix, storage)
+		writeBits(uint(depth[64]), uint64(bits[64]), storage_ix, storage)
+		histo[code]++
+		histo[64]++
+	} else {
+		writeBits(uint(depth[39]), uint64(bits[39]), storage_ix, storage)
+		writeBits(24, uint64(copylen)-2120, storage_ix, storage)
+		writeBits(uint(depth[64]), uint64(bits[64]), storage_ix, storage)
+		histo[39]++
+		histo[64]++
+	}
+}
+
+func emitDistance1(distance uint, depth []byte, bits []uint16, histo []uint32, storage_ix *uint, storage []byte) {
+	var d uint = distance + 3
+	var nbits uint32 = log2FloorNonZero(d) - 1
+	var prefix uint = (d >> nbits) & 1
+	var offset uint = (2 + prefix) << nbits
+	var distcode uint = uint(2*(nbits-1) + uint32(prefix) + 80)
+	writeBits(uint(depth[distcode]), uint64(bits[distcode]), storage_ix, storage)
+	writeBits(uint(nbits), uint64(d)-uint64(offset), storage_ix, storage)
+	histo[distcode]++
+}
+
+func emitLiterals(input []byte, len uint, depth []byte, bits []uint16, storage_ix *uint, storage []byte) {
+	var j uint
+	for j = 0; j < len; j++ {
+		var lit byte = input[j]
+		writeBits(uint(depth[lit]), uint64(bits[lit]), storage_ix, storage)
+	}
+}
+
+/* REQUIRES: len <= 1 << 24. */
+func storeMetaBlockHeader1(len uint, is_uncompressed bool, storage_ix *uint, storage []byte) {
+	var nibbles uint = 6
+
+	/* ISLAST */
+	writeBits(1, 0, storage_ix, storage)
+
+	if len <= 1<<16 {
+		nibbles = 4
+	} else if len <= 1<<20 {
+		nibbles = 5
+	}
+
+	writeBits(2, uint64(nibbles)-4, storage_ix, storage)
+	writeBits(nibbles*4, uint64(len)-1, storage_ix, storage)
+
+	/* ISUNCOMPRESSED */
+	writeSingleBit(is_uncompressed, storage_ix, storage)
+}
+
+func updateBits(n_bits uint, bits uint32, pos uint, array []byte) {
+	for n_bits > 0 {
+		var byte_pos uint = pos >> 3
+		var n_unchanged_bits uint = pos & 7
+		var n_changed_bits uint = brotli_min_size_t(n_bits, 8-n_unchanged_bits)
+		var total_bits uint = n_unchanged_bits + n_changed_bits
+		var mask uint32 = (^((1 << total_bits) - 1)) | ((1 << n_unchanged_bits) - 1)
+		var unchanged_bits uint32 = uint32(array[byte_pos]) & mask
+		var changed_bits uint32 = bits & ((1 << n_changed_bits) - 1)
+		array[byte_pos] = byte(changed_bits<<n_unchanged_bits | unchanged_bits)
+		n_bits -= n_changed_bits
+		bits >>= n_changed_bits
+		pos += n_changed_bits
+	}
+}
+
+func rewindBitPosition1(new_storage_ix uint, storage_ix *uint, storage []byte) {
+	var bitpos uint = new_storage_ix & 7
+	var mask uint = (1 << bitpos) - 1
+	storage[new_storage_ix>>3] &= byte(mask)
+	*storage_ix = new_storage_ix
+}
+
+var shouldMergeBlock_kSampleRate uint = 43
+
+func shouldMergeBlock(data []byte, len uint, depths []byte) bool {
+	var histo = [256]uint{0}
+	var i uint
+	for i = 0; i < len; i += shouldMergeBlock_kSampleRate {
+		histo[data[i]]++
+	}
+	{
+		var total uint = (len + shouldMergeBlock_kSampleRate - 1) / shouldMergeBlock_kSampleRate
+		var r float64 = (fastLog2(total)+0.5)*float64(total) + 200
+		for i = 0; i < 256; i++ {
+			r -= float64(histo[i]) * (float64(depths[i]) + fastLog2(histo[i]))
+		}
+
+		return r >= 0.0
+	}
+}
+
+func shouldUseUncompressedMode(metablock_start []byte, next_emit []byte, insertlen uint, literal_ratio uint) bool {
+	var compressed uint = uint(-cap(next_emit) + cap(metablock_start))
+	if compressed*50 > insertlen {
+		return false
+	} else {
+		return literal_ratio > 980
+	}
+}
+
+func emitUncompressedMetaBlock1(begin []byte, end []byte, storage_ix_start uint, storage_ix *uint, storage []byte) {
+	var len uint = uint(-cap(end) + cap(begin))
+	rewindBitPosition1(storage_ix_start, storage_ix, storage)
+	storeMetaBlockHeader1(uint(len), true, storage_ix, storage)
+	*storage_ix = (*storage_ix + 7) &^ 7
+	copy(storage[*storage_ix>>3:], begin[:len])
+	*storage_ix += uint(len << 3)
+	storage[*storage_ix>>3] = 0
+}
+
+var kCmdHistoSeed = [128]uint32{
+	0,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	0,
+	0,
+	0,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	0,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	1,
+	0,
+	0,
+	0,
+	0,
+}
+
+var compressFragmentFastImpl_kFirstBlockSize uint = 3 << 15
+var compressFragmentFastImpl_kMergeBlockSize uint = 1 << 16
+
+func compressFragmentFastImpl(in []byte, input_size uint, is_last bool, table []int, table_bits uint, cmd_depth []byte, cmd_bits []uint16, cmd_code_numbits *uint, cmd_code []byte, storage_ix *uint, storage []byte) {
+	var cmd_histo [128]uint32
+	var ip_end int
+	var next_emit int = 0
+	var base_ip int = 0
+	var input int = 0
+	const kInputMarginBytes uint = windowGap
+	const kMinMatchLen uint = 5
+	var metablock_start int = input
+	var block_size uint = brotli_min_size_t(input_size, compressFragmentFastImpl_kFirstBlockSize)
+	var total_block_size uint = block_size
+	var mlen_storage_ix uint = *storage_ix + 3
+	var lit_depth [256]byte
+	var lit_bits [256]uint16
+	var literal_ratio uint
+	var ip int
+	var last_distance int
+	var shift uint = 64 - table_bits
+
+	/* "next_emit" is a pointer to the first byte that is not covered by a
+	   previous copy. Bytes between "next_emit" and the start of the next copy or
+	   the end of the input will be emitted as literal bytes. */
+
+	/* Save the start of the first block for position and distance computations.
+	 */
+
+	/* Save the bit position of the MLEN field of the meta-block header, so that
+	   we can update it later if we decide to extend this meta-block. */
+	storeMetaBlockHeader1(block_size, false, storage_ix, storage)
+
+	/* No block splits, no contexts. */
+	writeBits(13, 0, storage_ix, storage)
+
+	literal_ratio = buildAndStoreLiteralPrefixCode(in[input:], block_size, lit_depth[:], lit_bits[:], storage_ix, storage)
+	{
+		/* Store the pre-compressed command and distance prefix codes. */
+		var i uint
+		for i = 0; i+7 < *cmd_code_numbits; i += 8 {
+			writeBits(8, uint64(cmd_code[i>>3]), storage_ix, storage)
+		}
+	}
+
+	writeBits(*cmd_code_numbits&7, uint64(cmd_code[*cmd_code_numbits>>3]), storage_ix, storage)
+
+	/* Initialize the command and distance histograms. We will gather
+	   statistics of command and distance codes during the processing
+	   of this block and use it to update the command and distance
+	   prefix codes for the next block. */
+emit_commands:
+	copy(cmd_histo[:], kCmdHistoSeed[:])
+
+	/* "ip" is the input pointer. */
+	ip = input
+
+	last_distance = -1
+	ip_end = int(uint(input) + block_size)
+
+	if block_size >= kInputMarginBytes {
+		var len_limit uint = brotli_min_size_t(block_size-kMinMatchLen, input_size-kInputMarginBytes)
+		var ip_limit int = int(uint(input) + len_limit)
+		/* For the last block, we need to keep a 16 bytes margin so that we can be
+		   sure that all distances are at most window size - 16.
+		   For all other blocks, we only need to keep a margin of 5 bytes so that
+		   we don't go over the block size with a copy. */
+
+		var next_hash uint32
+		ip++
+		for next_hash = hash5(in[ip:], shift); ; {
+			var skip uint32 = 32
+			var next_ip int = ip
+			/* Step 1: Scan forward in the input looking for a 5-byte-long match.
+			   If we get close to exhausting the input then goto emit_remainder.
+
+			   Heuristic match skipping: If 32 bytes are scanned with no matches
+			   found, start looking only at every other byte. If 32 more bytes are
+			   scanned, look at every third byte, etc.. When a match is found,
+			   immediately go back to looking at every byte. This is a small loss
+			   (~5% performance, ~0.1% density) for compressible data due to more
+			   bookkeeping, but for non-compressible data (such as JPEG) it's a huge
+			   win since the compressor quickly "realizes" the data is incompressible
+			   and doesn't bother looking for matches everywhere.
+
+			   The "skip" variable keeps track of how many bytes there are since the
+			   last match; dividing it by 32 (i.e. right-shifting by five) gives the
+			   number of bytes to move ahead for each iteration. */
+
+			var candidate int
+			assert(next_emit < ip)
+
+		trawl:
+			for {
+				var hash uint32 = next_hash
+				var bytes_between_hash_lookups uint32 = skip >> 5
+				skip++
+				assert(hash == hash5(in[next_ip:], shift))
+				ip = next_ip
+				next_ip = int(uint32(ip) + bytes_between_hash_lookups)
+				if next_ip > ip_limit {
+					goto emit_remainder
+				}
+
+				next_hash = hash5(in[next_ip:], shift)
+				candidate = ip - last_distance
+				if isMatch5(in[ip:], in[candidate:]) {
+					if candidate < ip {
+						table[hash] = int(ip - base_ip)
+						break
+					}
+				}
+
+				candidate = base_ip + table[hash]
+				assert(candidate >= base_ip)
+				assert(candidate < ip)
+
+				table[hash] = int(ip - base_ip)
+				if isMatch5(in[ip:], in[candidate:]) {
+					break
+				}
+			}
+
+			/* Check copy distance. If candidate is not feasible, continue search.
+			   Checking is done outside of hot loop to reduce overhead. */
+			if ip-candidate > maxDistance_compress_fragment {
+				goto trawl
+			}
+
+			/* Step 2: Emit the found match together with the literal bytes from
+			   "next_emit" to the bit stream, and then see if we can find a next match
+			   immediately afterwards. Repeat until we find no match for the input
+			   without emitting some literal bytes. */
+			{
+				var base int = ip
+				/* > 0 */
+				var matched uint = 5 + findMatchLengthWithLimit(in[candidate+5:], in[ip+5:], uint(ip_end-ip)-5)
+				var distance int = int(base - candidate)
+				/* We have a 5-byte match at ip, and we need to emit bytes in
+				   [next_emit, ip). */
+
+				var insert uint = uint(base - next_emit)
+				ip += int(matched)
+				if insert < 6210 {
+					emitInsertLen1(insert, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
+				} else if shouldUseUncompressedMode(in[metablock_start:], in[next_emit:], insert, literal_ratio) {
+					emitUncompressedMetaBlock1(in[metablock_start:], in[base:], mlen_storage_ix-3, storage_ix, storage)
+					input_size -= uint(base - input)
+					input = base
+					next_emit = input
+					goto next_block
+				} else {
+					emitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
+				}
+
+				emitLiterals(in[next_emit:], insert, lit_depth[:], lit_bits[:], storage_ix, storage)
+				if distance == last_distance {
+					writeBits(uint(cmd_depth[64]), uint64(cmd_bits[64]), storage_ix, storage)
+					cmd_histo[64]++
+				} else {
+					emitDistance1(uint(distance), cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
+					last_distance = distance
+				}
+
+				emitCopyLenLastDistance1(matched, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
+
+				next_emit = ip
+				if ip >= ip_limit {
+					goto emit_remainder
+				}
+
+				/* We could immediately start working at ip now, but to improve
+				   compression we first update "table" with the hashes of some positions
+				   within the last copy. */
+				{
+					var input_bytes uint64 = binary.LittleEndian.Uint64(in[ip-3:])
+					var prev_hash uint32 = hashBytesAtOffset5(input_bytes, 0, shift)
+					var cur_hash uint32 = hashBytesAtOffset5(input_bytes, 3, shift)
+					table[prev_hash] = int(ip - base_ip - 3)
+					prev_hash = hashBytesAtOffset5(input_bytes, 1, shift)
+					table[prev_hash] = int(ip - base_ip - 2)
+					prev_hash = hashBytesAtOffset5(input_bytes, 2, shift)
+					table[prev_hash] = int(ip - base_ip - 1)
+
+					candidate = base_ip + table[cur_hash]
+					table[cur_hash] = int(ip - base_ip)
+				}
+			}
+
+			for isMatch5(in[ip:], in[candidate:]) {
+				var base int = ip
+				/* We have a 5-byte match at ip, and no need to emit any literal bytes
+				   prior to ip. */
+
+				var matched uint = 5 + findMatchLengthWithLimit(in[candidate+5:], in[ip+5:], uint(ip_end-ip)-5)
+				if ip-candidate > maxDistance_compress_fragment {
+					break
+				}
+				ip += int(matched)
+				last_distance = int(base - candidate) /* > 0 */
+				emitCopyLen1(matched, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
+				emitDistance1(uint(last_distance), cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
+
+				next_emit = ip
+				if ip >= ip_limit {
+					goto emit_remainder
+				}
+
+				/* We could immediately start working at ip now, but to improve
+				   compression we first update "table" with the hashes of some positions
+				   within the last copy. */
+				{
+					var input_bytes uint64 = binary.LittleEndian.Uint64(in[ip-3:])
+					var prev_hash uint32 = hashBytesAtOffset5(input_bytes, 0, shift)
+					var cur_hash uint32 = hashBytesAtOffset5(input_bytes, 3, shift)
+					table[prev_hash] = int(ip - base_ip - 3)
+					prev_hash = hashBytesAtOffset5(input_bytes, 1, shift)
+					table[prev_hash] = int(ip - base_ip - 2)
+					prev_hash = hashBytesAtOffset5(input_bytes, 2, shift)
+					table[prev_hash] = int(ip - base_ip - 1)
+
+					candidate = base_ip + table[cur_hash]
+					table[cur_hash] = int(ip - base_ip)
+				}
+			}
+
+			ip++
+			next_hash = hash5(in[ip:], shift)
+		}
+	}
+
+emit_remainder:
+	assert(next_emit <= ip_end)
+	input += int(block_size)
+	input_size -= block_size
+	block_size = brotli_min_size_t(input_size, compressFragmentFastImpl_kMergeBlockSize)
+
+	/* Decide if we want to continue this meta-block instead of emitting the
+	   last insert-only command. */
+	if input_size > 0 && total_block_size+block_size <= 1<<20 && shouldMergeBlock(in[input:], block_size, lit_depth[:]) {
+		assert(total_block_size > 1<<16)
+
+		/* Update the size of the current meta-block and continue emitting commands.
+		   We can do this because the current size and the new size both have 5
+		   nibbles. */
+		total_block_size += block_size
+
+		updateBits(20, uint32(total_block_size-1), mlen_storage_ix, storage)
+		goto emit_commands
+	}
+
+	/* Emit the remaining bytes as literals. */
+	if next_emit < ip_end {
+		var insert uint = uint(ip_end - next_emit)
+		if insert < 6210 {
+			emitInsertLen1(insert, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
+			emitLiterals(in[next_emit:], insert, lit_depth[:], lit_bits[:], storage_ix, storage)
+		} else if shouldUseUncompressedMode(in[metablock_start:], in[next_emit:], insert, literal_ratio) {
+			emitUncompressedMetaBlock1(in[metablock_start:], in[ip_end:], mlen_storage_ix-3, storage_ix, storage)
+		} else {
+			emitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
+			emitLiterals(in[next_emit:], insert, lit_depth[:], lit_bits[:], storage_ix, storage)
+		}
+	}
+
+	next_emit = ip_end
+
+	/* If we have more data, write a new meta-block header and prefix codes and
+	   then continue emitting commands. */
+next_block:
+	if input_size > 0 {
+		metablock_start = input
+		block_size = brotli_min_size_t(input_size, compressFragmentFastImpl_kFirstBlockSize)
+		total_block_size = block_size
+
+		/* Save the bit position of the MLEN field of the meta-block header, so that
+		   we can update it later if we decide to extend this meta-block. */
+		mlen_storage_ix = *storage_ix + 3
+
+		storeMetaBlockHeader1(block_size, false, storage_ix, storage)
+
+		/* No block splits, no contexts. */
+		writeBits(13, 0, storage_ix, storage)
+
+		literal_ratio = buildAndStoreLiteralPrefixCode(in[input:], block_size, lit_depth[:], lit_bits[:], storage_ix, storage)
+		buildAndStoreCommandPrefixCode1(cmd_histo[:], cmd_depth, cmd_bits, storage_ix, storage)
+		goto emit_commands
+	}
+
+	if !is_last {
+		/* If this is not the last block, update the command and distance prefix
+		   codes for the next block and store the compressed forms. */
+		cmd_code[0] = 0
+
+		*cmd_code_numbits = 0
+		buildAndStoreCommandPrefixCode1(cmd_histo[:], cmd_depth, cmd_bits, cmd_code_numbits, cmd_code)
+	}
+}
+
+/* Compresses "input" string to the "*storage" buffer as one or more complete
+   meta-blocks, and updates the "*storage_ix" bit position.
+
+   If "is_last" is 1, emits an additional empty last meta-block.
+
+   "cmd_depth" and "cmd_bits" contain the command and distance prefix codes
+   (see comment in encode.h) used for the encoding of this input fragment.
+   If "is_last" is 0, they are updated to reflect the statistics
+   of this input fragment, to be used for the encoding of the next fragment.
+
+   "*cmd_code_numbits" is the number of bits of the compressed representation
+   of the command and distance prefix codes, and "cmd_code" is an array of
+   at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed
+   command and distance prefix codes. If "is_last" is 0, these are also
+   updated to represent the updated "cmd_depth" and "cmd_bits".
+
+   REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
+   REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
+   REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
+   REQUIRES: "table_size" is an odd (9, 11, 13, 15) power of two
+   OUTPUT: maximal copy distance <= |input_size|
+   OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
+func compressFragmentFast(input []byte, input_size uint, is_last bool, table []int, table_size uint, cmd_depth []byte, cmd_bits []uint16, cmd_code_numbits *uint, cmd_code []byte, storage_ix *uint, storage []byte) {
+	var initial_storage_ix uint = *storage_ix
+	var table_bits uint = uint(log2FloorNonZero(table_size))
+
+	if input_size == 0 {
+		assert(is_last)
+		writeBits(1, 1, storage_ix, storage) /* islast */
+		writeBits(1, 1, storage_ix, storage) /* isempty */
+		*storage_ix = (*storage_ix + 7) &^ 7
+		return
+	}
+
+	compressFragmentFastImpl(input, input_size, is_last, table, table_bits, cmd_depth, cmd_bits, cmd_code_numbits, cmd_code, storage_ix, storage)
+
+	/* If output is larger than single uncompressed block, rewrite it. */
+	if *storage_ix-initial_storage_ix > 31+(input_size<<3) {
+		emitUncompressedMetaBlock1(input, input[input_size:], initial_storage_ix, storage_ix, storage)
+	}
+
+	if is_last {
+		writeBits(1, 1, storage_ix, storage) /* islast */
+		writeBits(1, 1, storage_ix, storage) /* isempty */
+		*storage_ix = (*storage_ix + 7) &^ 7
+	}
+}
@@ -0,0 +1,773 @@
+package brotli
+
+import "encoding/binary"
+
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function for fast encoding of an input fragment, independently from the input
+   history. This function uses two-pass processing: in the first pass we save
+   the found backward matches and literal bytes into a buffer, and in the
+   second pass we emit them into the bit stream using prefix codes built based
+   on the actual command and literal byte histograms. */
+
+const kCompressFragmentTwoPassBlockSize uint = 1 << 17
+
+func hash1(p []byte, shift uint, length uint) uint32 {
+	var h uint64 = (binary.LittleEndian.Uint64(p) << ((8 - length) * 8)) * uint64(kHashMul32)
+	return uint32(h >> shift)
+}
+
+func hashBytesAtOffset(v uint64, offset uint, shift uint, length uint) uint32 {
+	assert(offset <= 8-length)
+	{
+		var h uint64 = ((v >> (8 * offset)) << ((8 - length) * 8)) * uint64(kHashMul32)
+		return uint32(h >> shift)
+	}
+}
+
+func isMatch1(p1 []byte, p2 []byte, length uint) bool {
+	if binary.LittleEndian.Uint32(p1) != binary.LittleEndian.Uint32(p2) {
+		return false
+	}
+	if length == 4 {
+		return true
+	}
+	return p1[4] == p2[4] && p1[5] == p2[5]
+}
+
+/*
+Builds a command and distance prefix code (each 64 symbols) into "depth" and
+
+	"bits" based on "histogram" and stores it into the bit stream.
+*/
+func buildAndStoreCommandPrefixCode(histogram []uint32, depth []byte, bits []uint16, storage_ix *uint, storage []byte) {
+	var tree [129]huffmanTree
+	var cmd_depth = [numCommandSymbols]byte{0}
+	/* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
+
+	var cmd_bits [64]uint16
+	createHuffmanTree(histogram, 64, 15, tree[:], depth)
+	createHuffmanTree(histogram[64:], 64, 14, tree[:], depth[64:])
+
+	/* We have to jump through a few hoops here in order to compute
+	   the command bits because the symbols are in a different order than in
+	   the full alphabet. This looks complicated, but having the symbols
+	   in this order in the command bits saves a few branches in the Emit*
+	   functions. */
+	copy(cmd_depth[:], depth[24:][:24])
+
+	copy(cmd_depth[24:][:], depth[:8])
+	copy(cmd_depth[32:][:], depth[48:][:8])
+	copy(cmd_depth[40:][:], depth[8:][:8])
+	copy(cmd_depth[48:][:], depth[56:][:8])
+	copy(cmd_depth[56:][:], depth[16:][:8])
+	convertBitDepthsToSymbols(cmd_depth[:], 64, cmd_bits[:])
+	copy(bits, cmd_bits[24:][:8])
+	copy(bits[8:], cmd_bits[40:][:8])
+	copy(bits[16:], cmd_bits[56:][:8])
+	copy(bits[24:], cmd_bits[:24])
+	copy(bits[48:], cmd_bits[32:][:8])
+	copy(bits[56:], cmd_bits[48:][:8])
+	convertBitDepthsToSymbols(depth[64:], 64, bits[64:])
+	{
+		/* Create the bit length array for the full command alphabet. */
+		var i uint
+		for i := 0; i < int(64); i++ {
+			cmd_depth[i] = 0
+		} /* only 64 first values were used */
+		copy(cmd_depth[:], depth[24:][:8])
+		copy(cmd_depth[64:][:], depth[32:][:8])
+		copy(cmd_depth[128:][:], depth[40:][:8])
+		copy(cmd_depth[192:][:], depth[48:][:8])
+		copy(cmd_depth[384:][:], depth[56:][:8])
+		for i = 0; i < 8; i++ {
+			cmd_depth[128+8*i] = depth[i]
+			cmd_depth[256+8*i] = depth[8+i]
+			cmd_depth[448+8*i] = depth[16+i]
+		}
+
+		storeHuffmanTree(cmd_depth[:], numCommandSymbols, tree[:], storage_ix, storage)
+	}
+
+	storeHuffmanTree(depth[64:], 64, tree[:], storage_ix, storage)
+}
+
+func emitInsertLen(insertlen uint32, commands *[]uint32) {
+	if insertlen < 6 {
+		(*commands)[0] = insertlen
+	} else if insertlen < 130 {
+		var tail uint32 = insertlen - 2
+		var nbits uint32 = log2FloorNonZero(uint(tail)) - 1
+		var prefix uint32 = tail >> nbits
+		var inscode uint32 = (nbits << 1) + prefix + 2
+		var extra uint32 = tail - (prefix << nbits)
+		(*commands)[0] = inscode | extra<<8
+	} else if insertlen < 2114 {
+		var tail uint32 = insertlen - 66
+		var nbits uint32 = log2FloorNonZero(uint(tail))
+		var code uint32 = nbits + 10
+		var extra uint32 = tail - (1 << nbits)
+		(*commands)[0] = code | extra<<8
+	} else if insertlen < 6210 {
+		var extra uint32 = insertlen - 2114
+		(*commands)[0] = 21 | extra<<8
+	} else if insertlen < 22594 {
+		var extra uint32 = insertlen - 6210
+		(*commands)[0] = 22 | extra<<8
+	} else {
+		var extra uint32 = insertlen - 22594
+		(*commands)[0] = 23 | extra<<8
+	}
+
+	*commands = (*commands)[1:]
+}
+
+func emitCopyLen(copylen uint, commands *[]uint32) {
+	if copylen < 10 {
+		(*commands)[0] = uint32(copylen + 38)
+	} else if copylen < 134 {
+		var tail uint = copylen - 6
+		var nbits uint = uint(log2FloorNonZero(tail) - 1)
+		var prefix uint = tail >> nbits
+		var code uint = (nbits << 1) + prefix + 44
+		var extra uint = tail - (prefix << nbits)
+		(*commands)[0] = uint32(code | extra<<8)
+	} else if copylen < 2118 {
+		var tail uint = copylen - 70
+		var nbits uint = uint(log2FloorNonZero(tail))
+		var code uint = nbits + 52
+		var extra uint = tail - (uint(1) << nbits)
+		(*commands)[0] = uint32(code | extra<<8)
+	} else {
+		var extra uint = copylen - 2118
+		(*commands)[0] = uint32(63 | extra<<8)
+	}
+
+	*commands = (*commands)[1:]
+}
+
+func emitCopyLenLastDistance(copylen uint, commands *[]uint32) {
+	if copylen < 12 {
+		(*commands)[0] = uint32(copylen + 20)
+		*commands = (*commands)[1:]
+	} else if copylen < 72 {
+		var tail uint = copylen - 8
+		var nbits uint = uint(log2FloorNonZero(tail) - 1)
+		var prefix uint = tail >> nbits
+		var code uint = (nbits << 1) + prefix + 28
+		var extra uint = tail - (prefix << nbits)
+		(*commands)[0] = uint32(code | extra<<8)
+		*commands = (*commands)[1:]
+	} else if copylen < 136 {
+		var tail uint = copylen - 8
+		var code uint = (tail >> 5) + 54
+		var extra uint = tail & 31
+		(*commands)[0] = uint32(code | extra<<8)
+		*commands = (*commands)[1:]
+		(*commands)[0] = 64
+		*commands = (*commands)[1:]
+	} else if copylen < 2120 {
+		var tail uint = copylen - 72
+		var nbits uint = uint(log2FloorNonZero(tail))
+		var code uint = nbits + 52
+		var extra uint = tail - (uint(1) << nbits)
+		(*commands)[0] = uint32(code | extra<<8)
+		*commands = (*commands)[1:]
+		(*commands)[0] = 64
+		*commands = (*commands)[1:]
+	} else {
+		var extra uint = copylen - 2120
+		(*commands)[0] = uint32(63 | extra<<8)
+		*commands = (*commands)[1:]
+		(*commands)[0] = 64
+		*commands = (*commands)[1:]
+	}
+}
+
+func emitDistance(distance uint32, commands *[]uint32) {
+	var d uint32 = distance + 3
+	var nbits uint32 = log2FloorNonZero(uint(d)) - 1
+	var prefix uint32 = (d >> nbits) & 1
+	var offset uint32 = (2 + prefix) << nbits
+	var distcode uint32 = 2*(nbits-1) + prefix + 80
+	var extra uint32 = d - offset
+	(*commands)[0] = distcode | extra<<8
+	*commands = (*commands)[1:]
+}
+
+/* REQUIRES: len <= 1 << 24. */
+func storeMetaBlockHeader(len uint, is_uncompressed bool, storage_ix *uint, storage []byte) {
+	var nibbles uint = 6
+
+	/* ISLAST */
+	writeBits(1, 0, storage_ix, storage)
+
+	if len <= 1<<16 {
+		nibbles = 4
+	} else if len <= 1<<20 {
+		nibbles = 5
+	}
+
+	writeBits(2, uint64(nibbles)-4, storage_ix, storage)
+	writeBits(nibbles*4, uint64(len)-1, storage_ix, storage)
+
+	/* ISUNCOMPRESSED */
+	writeSingleBit(is_uncompressed, storage_ix, storage)
+}
+
+func storeMetaBlockHeaderBW(len uint, is_uncompressed bool, bw *bitWriter) {
+	var nibbles uint = 6
+
+	/* ISLAST */
+	bw.writeBits(1, 0)
+
+	if len <= 1<<16 {
+		nibbles = 4
+	} else if len <= 1<<20 {
+		nibbles = 5
+	}
+
+	bw.writeBits(2, uint64(nibbles)-4)
+	bw.writeBits(nibbles*4, uint64(len)-1)
+
+	/* ISUNCOMPRESSED */
+	bw.writeSingleBit(is_uncompressed)
+}
+
+func createCommands(input []byte, block_size uint, input_size uint, base_ip_ptr []byte, table []int, table_bits uint, min_match uint, literals *[]byte, commands *[]uint32) {
+	var ip int = 0
+	var shift uint = 64 - table_bits
+	var ip_end int = int(block_size)
+	var base_ip int = -cap(base_ip_ptr) + cap(input)
+	var next_emit int = 0
+	var last_distance int = -1
+	/* "ip" is the input pointer. */
+
+	const kInputMarginBytes uint = windowGap
+
+	/* "next_emit" is a pointer to the first byte that is not covered by a
+	   previous copy. Bytes between "next_emit" and the start of the next copy or
+	   the end of the input will be emitted as literal bytes. */
+	if block_size >= kInputMarginBytes {
+		var len_limit uint = brotli_min_size_t(block_size-min_match, input_size-kInputMarginBytes)
+		var ip_limit int = int(len_limit)
+		/* For the last block, we need to keep a 16 bytes margin so that we can be
+		   sure that all distances are at most window size - 16.
+		   For all other blocks, we only need to keep a margin of 5 bytes so that
+		   we don't go over the block size with a copy. */
+
+		var next_hash uint32
+		ip++
+		for next_hash = hash1(input[ip:], shift, min_match); ; {
+			var skip uint32 = 32
+			var next_ip int = ip
+			/* Step 1: Scan forward in the input looking for a 6-byte-long match.
+			   If we get close to exhausting the input then goto emit_remainder.
+
+			   Heuristic match skipping: If 32 bytes are scanned with no matches
+			   found, start looking only at every other byte. If 32 more bytes are
+			   scanned, look at every third byte, etc.. When a match is found,
+			   immediately go back to looking at every byte. This is a small loss
+			   (~5% performance, ~0.1% density) for compressible data due to more
+			   bookkeeping, but for non-compressible data (such as JPEG) it's a huge
+			   win since the compressor quickly "realizes" the data is incompressible
+			   and doesn't bother looking for matches everywhere.
+
+			   The "skip" variable keeps track of how many bytes there are since the
+			   last match; dividing it by 32 (ie. right-shifting by five) gives the
+			   number of bytes to move ahead for each iteration. */
+
+			var candidate int
+
+			assert(next_emit < ip)
+
+		trawl:
+			for {
+				var hash uint32 = next_hash
+				var bytes_between_hash_lookups uint32 = skip >> 5
+				skip++
+				ip = next_ip
+				assert(hash == hash1(input[ip:], shift, min_match))
+				next_ip = int(uint32(ip) + bytes_between_hash_lookups)
+				if next_ip > ip_limit {
+					goto emit_remainder
+				}
+
+				next_hash = hash1(input[next_ip:], shift, min_match)
+				candidate = ip - last_distance
+				if isMatch1(input[ip:], base_ip_ptr[candidate-base_ip:], min_match) {
+					if candidate < ip {
+						table[hash] = int(ip - base_ip)
+						break
+					}
+				}
+
+				candidate = base_ip + table[hash]
+				assert(candidate >= base_ip)
+				assert(candidate < ip)
+
+				table[hash] = int(ip - base_ip)
+				if isMatch1(input[ip:], base_ip_ptr[candidate-base_ip:], min_match) {
+					break
+				}
+			}
+
+			/* Check copy distance. If candidate is not feasible, continue search.
+			   Checking is done outside of hot loop to reduce overhead. */
+			if ip-candidate > maxDistance_compress_fragment {
+				goto trawl
+			}
+
+			/* Step 2: Emit the found match together with the literal bytes from
+			   "next_emit", and then see if we can find a next match immediately
+			   afterwards. Repeat until we find no match for the input
+			   without emitting some literal bytes. */
+			{
+				var base int = ip
+				/* > 0 */
+				var matched uint = min_match + findMatchLengthWithLimit(base_ip_ptr[uint(candidate-base_ip)+min_match:], input[uint(ip)+min_match:], uint(ip_end-ip)-min_match)
+				var distance int = int(base - candidate)
+				/* We have a 6-byte match at ip, and we need to emit bytes in
+				   [next_emit, ip). */
+
+				var insert int = int(base - next_emit)
+				ip += int(matched)
+				emitInsertLen(uint32(insert), commands)
+				copy(*literals, input[next_emit:][:uint(insert)])
+				*literals = (*literals)[insert:]
+				if distance == last_distance {
+					(*commands)[0] = 64
+					*commands = (*commands)[1:]
+				} else {
+					emitDistance(uint32(distance), commands)
+					last_distance = distance
+				}
+
+				emitCopyLenLastDistance(matched, commands)
+
+				next_emit = ip
+				if ip >= ip_limit {
+					goto emit_remainder
+				}
+				{
+					var input_bytes uint64
+					var cur_hash uint32
+					/* We could immediately start working at ip now, but to improve
+					   compression we first update "table" with the hashes of some
+					   positions within the last copy. */
+
+					var prev_hash uint32
+					if min_match == 4 {
+						input_bytes = binary.LittleEndian.Uint64(input[ip-3:])
+						cur_hash = hashBytesAtOffset(input_bytes, 3, shift, min_match)
+						prev_hash = hashBytesAtOffset(input_bytes, 0, shift, min_match)
+						table[prev_hash] = int(ip - base_ip - 3)
+						prev_hash = hashBytesAtOffset(input_bytes, 1, shift, min_match)
+						table[prev_hash] = int(ip - base_ip - 2)
+						prev_hash = hashBytesAtOffset(input_bytes, 0, shift, min_match)
+						table[prev_hash] = int(ip - base_ip - 1)
+					} else {
+						input_bytes = binary.LittleEndian.Uint64(input[ip-5:])
+						prev_hash = hashBytesAtOffset(input_bytes, 0, shift, min_match)
+						table[prev_hash] = int(ip - base_ip - 5)
+						prev_hash = hashBytesAtOffset(input_bytes, 1, shift, min_match)
+						table[prev_hash] = int(ip - base_ip - 4)
+						prev_hash = hashBytesAtOffset(input_bytes, 2, shift, min_match)
+						table[prev_hash] = int(ip - base_ip - 3)
+						input_bytes = binary.LittleEndian.Uint64(input[ip-2:])
+						cur_hash = hashBytesAtOffset(input_bytes, 2, shift, min_match)
+						prev_hash = hashBytesAtOffset(input_bytes, 0, shift, min_match)
+						table[prev_hash] = int(ip - base_ip - 2)
+						prev_hash = hashBytesAtOffset(input_bytes, 1, shift, min_match)
+						table[prev_hash] = int(ip - base_ip - 1)
+					}
+
+					candidate = base_ip + table[cur_hash]
+					table[cur_hash] = int(ip - base_ip)
+				}
+			}
+
+			for ip-candidate <= maxDistance_compress_fragment && isMatch1(input[ip:], base_ip_ptr[candidate-base_ip:], min_match) {
+				var base int = ip
+				/* We have a 6-byte match at ip, and no need to emit any
+				   literal bytes prior to ip. */
+
+				var matched uint = min_match + findMatchLengthWithLimit(base_ip_ptr[uint(candidate-base_ip)+min_match:], input[uint(ip)+min_match:], uint(ip_end-ip)-min_match)
+				ip += int(matched)
+				last_distance = int(base - candidate) /* > 0 */
+				emitCopyLen(matched, commands)
+				emitDistance(uint32(last_distance), commands)
+
+				next_emit = ip
+				if ip >= ip_limit {
+					goto emit_remainder
+				}
+				{
+					var input_bytes uint64
+					var cur_hash uint32
+					/* We could immediately start working at ip now, but to improve
+					   compression we first update "table" with the hashes of some
+					   positions within the last copy. */
+
+					var prev_hash uint32
+					if min_match == 4 {
+						input_bytes = binary.LittleEndian.Uint64(input[ip-3:])
+						cur_hash = hashBytesAtOffset(input_bytes, 3, shift, min_match)
+						prev_hash = hashBytesAtOffset(input_bytes, 0, shift, min_match)
+						table[prev_hash] = int(ip - base_ip - 3)
+						prev_hash = hashBytesAtOffset(input_bytes, 1, shift, min_match)
+						table[prev_hash] = int(ip - base_ip - 2)
+						prev_hash = hashBytesAtOffset(input_bytes, 2, shift, min_match)
+						table[prev_hash] = int(ip - base_ip - 1)
+					} else {
+						input_bytes = binary.LittleEndian.Uint64(input[ip-5:])
+						prev_hash = hashBytesAtOffset(input_bytes, 0, shift, min_match)
+						table[prev_hash] = int(ip - base_ip - 5)
+						prev_hash = hashBytesAtOffset(input_bytes, 1, shift, min_match)
+						table[prev_hash] = int(ip - base_ip - 4)
+						prev_hash = hashBytesAtOffset(input_bytes, 2, shift, min_match)
+						table[prev_hash] = int(ip - base_ip - 3)
+						input_bytes = binary.LittleEndian.Uint64(input[ip-2:])
+						cur_hash = hashBytesAtOffset(input_bytes, 2, shift, min_match)
+						prev_hash = hashBytesAtOffset(input_bytes, 0, shift, min_match)
+						table[prev_hash] = int(ip - base_ip - 2)
+						prev_hash = hashBytesAtOffset(input_bytes, 1, shift, min_match)
+						table[prev_hash] = int(ip - base_ip - 1)
+					}
+
+					candidate = base_ip + table[cur_hash]
+					table[cur_hash] = int(ip - base_ip)
+				}
+			}
+
+			ip++
+			next_hash = hash1(input[ip:], shift, min_match)
+		}
+	}
+
+emit_remainder:
+	assert(next_emit <= ip_end)
+
+	/* Emit the remaining bytes as literals. */
+	if next_emit < ip_end {
+		var insert uint32 = uint32(ip_end - next_emit)
+		emitInsertLen(insert, commands)
+		copy(*literals, input[next_emit:][:insert])
+		*literals = (*literals)[insert:]
+	}
+}
+
+var storeCommands_kNumExtraBits = [128]uint32{
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	1,
+	1,
+	2,
+	2,
+	3,
+	3,
+	4,
+	4,
+	5,
+	5,
+	6,
+	7,
+	8,
+	9,
+	10,
+	12,
+	14,
+	24,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	1,
+	1,
+	2,
+	2,
+	3,
+	3,
+	4,
+	4,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	1,
+	1,
+	2,
+	2,
+	3,
+	3,
+	4,
+	4,
+	5,
+	5,
+	6,
+	7,
+	8,
+	9,
+	10,
+	24,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	1,
+	1,
+	2,
+	2,
+	3,
+	3,
+	4,
+	4,
+	5,
+	5,
+	6,
+	6,
+	7,
+	7,
+	8,
+	8,
+	9,
+	9,
+	10,
+	10,
+	11,
+	11,
+	12,
+	12,
+	13,
+	13,
+	14,
+	14,
+	15,
+	15,
+	16,
+	16,
+	17,
+	17,
+	18,
+	18,
+	19,
+	19,
+	20,
+	20,
+	21,
+	21,
+	22,
+	22,
+	23,
+	23,
+	24,
+	24,
+}
+var storeCommands_kInsertOffset = [24]uint32{
+	0,
+	1,
+	2,
+	3,
+	4,
+	5,
+	6,
+	8,
+	10,
+	14,
+	18,
+	26,
+	34,
+	50,
+	66,
+	98,
+	130,
+	194,
+	322,
+	578,
+	1090,
+	2114,
+	6210,
+	22594,
+}
+
+func storeCommands(literals []byte, num_literals uint, commands []uint32, num_commands uint, storage_ix *uint, storage []byte) {
+	var lit_depths [256]byte
+	var lit_bits [256]uint16
+	var lit_histo = [256]uint32{0}
+	var cmd_depths = [128]byte{0}
+	var cmd_bits = [128]uint16{0}
+	var cmd_histo = [128]uint32{0}
+	var i uint
+	for i = 0; i < num_literals; i++ {
+		lit_histo[literals[i]]++
+	}
+
+	buildAndStoreHuffmanTreeFast(lit_histo[:], num_literals, /* max_bits = */
+		8, lit_depths[:], lit_bits[:], storage_ix, storage)
+
+	for i = 0; i < num_commands; i++ {
+		var code uint32 = commands[i] & 0xFF
+		assert(code < 128)
+		cmd_histo[code]++
+	}
+
+	cmd_histo[1] += 1
+	cmd_histo[2] += 1
+	cmd_histo[64] += 1
+	cmd_histo[84] += 1
+	buildAndStoreCommandPrefixCode(cmd_histo[:], cmd_depths[:], cmd_bits[:], storage_ix, storage)
+
+	for i = 0; i < num_commands; i++ {
+		var cmd uint32 = commands[i]
+		var code uint32 = cmd & 0xFF
+		var extra uint32 = cmd >> 8
+		assert(code < 128)
+		writeBits(uint(cmd_depths[code]), uint64(cmd_bits[code]), storage_ix, storage)
+		writeBits(uint(storeCommands_kNumExtraBits[code]), uint64(extra), storage_ix, storage)
+		if code < 24 {
+			var insert uint32 = storeCommands_kInsertOffset[code] + extra
+			var j uint32
+			for j = 0; j < insert; j++ {
+				var lit byte = literals[0]
+				writeBits(uint(lit_depths[lit]), uint64(lit_bits[lit]), storage_ix, storage)
+				literals = literals[1:]
+			}
+		}
+	}
+}
+
+/* Acceptable loss for uncompressible speedup is 2% */
+const minRatio = 0.98
+
+const sampleRate = 43
+
+func shouldCompress(input []byte, input_size uint, num_literals uint) bool {
+	var corpus_size float64 = float64(input_size)
+	if float64(num_literals) < minRatio*corpus_size {
+		return true
+	} else {
+		var literal_histo = [256]uint32{0}
+		var max_total_bit_cost float64 = corpus_size * 8 * minRatio / sampleRate
+		var i uint
+		for i = 0; i < input_size; i += sampleRate {
+			literal_histo[input[i]]++
+		}
+
+		return bitsEntropy(literal_histo[:], 256) < max_total_bit_cost
+	}
+}
+
+func rewindBitPosition(new_storage_ix uint, storage_ix *uint, storage []byte) {
+	var bitpos uint = new_storage_ix & 7
+	var mask uint = (1 << bitpos) - 1
+	storage[new_storage_ix>>3] &= byte(mask)
+	*storage_ix = new_storage_ix
+}
+
+func emitUncompressedMetaBlock(input []byte, input_size uint, storage_ix *uint, storage []byte) {
+	storeMetaBlockHeader(input_size, true, storage_ix, storage)
+	*storage_ix = (*storage_ix + 7) &^ 7
+	copy(storage[*storage_ix>>3:], input[:input_size])
+	*storage_ix += input_size << 3
+	storage[*storage_ix>>3] = 0
+}
+
+func compressFragmentTwoPassImpl(input []byte, input_size uint, is_last bool, command_buf []uint32, literal_buf []byte, table []int, table_bits uint, min_match uint, storage_ix *uint, storage []byte) {
+	/* Save the start of the first block for position and distance computations.
+	 */
+	var base_ip []byte = input
+
+	for input_size > 0 {
+		var block_size uint = brotli_min_size_t(input_size, kCompressFragmentTwoPassBlockSize)
+		var commands []uint32 = command_buf
+		var literals []byte = literal_buf
+		var num_literals uint
+		createCommands(input, block_size, input_size, base_ip, table, table_bits, min_match, &literals, &commands)
+		num_literals = uint(-cap(literals) + cap(literal_buf))
+		if shouldCompress(input, block_size, num_literals) {
+			var num_commands uint = uint(-cap(commands) + cap(command_buf))
+			storeMetaBlockHeader(block_size, false, storage_ix, storage)
+
+			/* No block splits, no contexts. */
+			writeBits(13, 0, storage_ix, storage)
+
+			storeCommands(literal_buf, num_literals, command_buf, num_commands, storage_ix, storage)
+		} else {
+			/* Since we did not find many backward references and the entropy of
+			   the data is close to 8 bits, we can simply emit an uncompressed block.
+			   This makes compression speed of uncompressible data about 3x faster. */
+			emitUncompressedMetaBlock(input, block_size, storage_ix, storage)
+		}
+
+		input = input[block_size:]
+		input_size -= block_size
+	}
+}
+
+/*
+Compresses "input" string to the "*storage" buffer as one or more complete
+
+	meta-blocks, and updates the "*storage_ix" bit position.
+
+	If "is_last" is 1, emits an additional empty last meta-block.
+
+	REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
+	REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
+	REQUIRES: "command_buf" and "literal_buf" point to at least
+	           kCompressFragmentTwoPassBlockSize long arrays.
+	REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
+	REQUIRES: "table_size" is a power of two
+	OUTPUT: maximal copy distance <= |input_size|
+	OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18)
+*/
+func compressFragmentTwoPass(input []byte, input_size uint, is_last bool, command_buf []uint32, literal_buf []byte, table []int, table_size uint, storage_ix *uint, storage []byte) {
+	var initial_storage_ix uint = *storage_ix
+	var table_bits uint = uint(log2FloorNonZero(table_size))
+	var min_match uint
+	if table_bits <= 15 {
+		min_match = 4
+	} else {
+		min_match = 6
+	}
+	compressFragmentTwoPassImpl(input, input_size, is_last, command_buf, literal_buf, table, table_bits, min_match, storage_ix, storage)
+
+	/* If output is larger than single uncompressed block, rewrite it. */
+	if *storage_ix-initial_storage_ix > 31+(input_size<<3) {
+		rewindBitPosition(initial_storage_ix, storage_ix, storage)
+		emitUncompressedMetaBlock(input, input_size, storage_ix, storage)
+	}
+
+	if is_last {
+		writeBits(1, 1, storage_ix, storage) /* islast */
+		writeBits(1, 1, storage_ix, storage) /* isempty */
+		*storage_ix = (*storage_ix + 7) &^ 7
+	}
+}
@@ -0,0 +1,77 @@
+package brotli
+
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Specification: 7.3. Encoding of the context map */
+const contextMapMaxRle = 16
+
+/* Specification: 2. Compressed representation overview */
+const maxNumberOfBlockTypes = 256
+
+/* Specification: 3.3. Alphabet sizes: insert-and-copy length */
+const numLiteralSymbols = 256
+
+const numCommandSymbols = 704
+
+const numBlockLenSymbols = 26
+
+const maxContextMapSymbols = (maxNumberOfBlockTypes + contextMapMaxRle)
+
+const maxBlockTypeSymbols = (maxNumberOfBlockTypes + 2)
+
+/* Specification: 3.5. Complex prefix codes */
+const repeatPreviousCodeLength = 16
+
+const repeatZeroCodeLength = 17
+
+const codeLengthCodes = (repeatZeroCodeLength + 1)
+
+/* "code length of 8 is repeated" */
+const initialRepeatedCodeLength = 8
+
+/* "Large Window Brotli" */
+const largeMaxDistanceBits = 62
+
+const largeMinWbits = 10
+
+const largeMaxWbits = 30
+
+/* Specification: 4. Encoding of distances */
+const numDistanceShortCodes = 16
+
+const maxNpostfix = 3
+
+const maxNdirect = 120
+
+const maxDistanceBits = 24
+
+func distanceAlphabetSize(NPOSTFIX uint, NDIRECT uint, MAXNBITS uint) uint {
+	return numDistanceShortCodes + NDIRECT + uint(MAXNBITS<<(NPOSTFIX+1))
+}
+
+/* numDistanceSymbols == 1128 */
+const numDistanceSymbols = 1128
+
+const maxDistance = 0x3FFFFFC
+
+const maxAllowedDistance = 0x7FFFFFFC
+
+/* 7.1. Context modes and context ID lookup for literals */
+/* "context IDs for literals are in the range of 0..63" */
+const literalContextBits = 6
+
+/* 7.2. Context ID for distances */
+const distanceContextBits = 2
+
+/* 9.1. Format of the Stream Header */
+/* Number of slack bytes for window size. Don't confuse
+   with BROTLI_NUM_DISTANCE_SHORT_CODES. */
+const windowGap = 16
+
+func maxBackwardLimit(W uint) uint {
+	return (uint(1) << W) - windowGap
+}
@@ -0,0 +1,168 @@
+package brotli
+
+import "github.com/andybalholm/brotli/matchfinder"
+
+// An Encoder implements the matchfinder.Encoder interface, writing in Brotli format.
+type Encoder struct {
+	wroteHeader bool
+	bw          bitWriter
+	distCache   []distanceCode
+}
+
+func (e *Encoder) Reset() {
+	e.wroteHeader = false
+	e.bw = bitWriter{}
+}
+
+func (e *Encoder) Encode(dst []byte, src []byte, matches []matchfinder.Match, lastBlock bool) []byte {
+	e.bw.dst = dst
+	if !e.wroteHeader {
+		e.bw.writeBits(4, 15)
+		e.wroteHeader = true
+	}
+
+	var literalHisto [256]uint32
+	var commandHisto [704]uint32
+	var distanceHisto [64]uint32
+	literalCount := 0
+	commandCount := 0
+	distanceCount := 0
+
+	if len(e.distCache) < len(matches) {
+		e.distCache = make([]distanceCode, len(matches))
+	}
+
+	// first pass: build the histograms
+	pos := 0
+
+	// d is the ring buffer of the last 4 distances.
+	d := [4]int{-10, -10, -10, -10}
+	for i, m := range matches {
+		if m.Unmatched > 0 {
+			for _, c := range src[pos : pos+m.Unmatched] {
+				literalHisto[c]++
+			}
+			literalCount += m.Unmatched
+		}
+
+		insertCode := getInsertLengthCode(uint(m.Unmatched))
+		copyCode := getCopyLengthCode(uint(m.Length))
+		if m.Length == 0 {
+			// If the stream ends with unmatched bytes, we need a dummy copy length.
+			copyCode = 2
+		}
+		command := combineLengthCodes(insertCode, copyCode, false)
+		commandHisto[command]++
+		commandCount++
+
+		if command >= 128 && m.Length != 0 {
+			var distCode distanceCode
+			switch m.Distance {
+			case d[3]:
+				distCode.code = 0
+			case d[2]:
+				distCode.code = 1
+			case d[1]:
+				distCode.code = 2
+			case d[0]:
+				distCode.code = 3
+			case d[3] - 1:
+				distCode.code = 4
+			case d[3] + 1:
+				distCode.code = 5
+			case d[3] - 2:
+				distCode.code = 6
+			case d[3] + 2:
+				distCode.code = 7
+			case d[3] - 3:
+				distCode.code = 8
+			case d[3] + 3:
+				distCode.code = 9
+
+				// In my testing, codes 10–15 actually reduced the compression ratio.
+
+			default:
+				distCode = getDistanceCode(m.Distance)
+			}
+			e.distCache[i] = distCode
+			distanceHisto[distCode.code]++
+			distanceCount++
+			if distCode.code != 0 {
+				d[0], d[1], d[2], d[3] = d[1], d[2], d[3], m.Distance
+			}
+		}
+
+		pos += m.Unmatched + m.Length
+	}
+
+	storeMetaBlockHeaderBW(uint(len(src)), false, &e.bw)
+	e.bw.writeBits(13, 0)
+
+	var literalDepths [256]byte
+	var literalBits [256]uint16
+	buildAndStoreHuffmanTreeFastBW(literalHisto[:], uint(literalCount), 8, literalDepths[:], literalBits[:], &e.bw)
+
+	var commandDepths [704]byte
+	var commandBits [704]uint16
+	buildAndStoreHuffmanTreeFastBW(commandHisto[:], uint(commandCount), 10, commandDepths[:], commandBits[:], &e.bw)
+
+	var distanceDepths [64]byte
+	var distanceBits [64]uint16
+	buildAndStoreHuffmanTreeFastBW(distanceHisto[:], uint(distanceCount), 6, distanceDepths[:], distanceBits[:], &e.bw)
+
+	pos = 0
+	for i, m := range matches {
+		insertCode := getInsertLengthCode(uint(m.Unmatched))
+		copyCode := getCopyLengthCode(uint(m.Length))
+		if m.Length == 0 {
+			// If the stream ends with unmatched bytes, we need a dummy copy length.
+			copyCode = 2
+		}
+		command := combineLengthCodes(insertCode, copyCode, false)
+		e.bw.writeBits(uint(commandDepths[command]), uint64(commandBits[command]))
+		if kInsExtra[insertCode] > 0 {
+			e.bw.writeBits(uint(kInsExtra[insertCode]), uint64(m.Unmatched)-uint64(kInsBase[insertCode]))
+		}
+		if kCopyExtra[copyCode] > 0 {
+			e.bw.writeBits(uint(kCopyExtra[copyCode]), uint64(m.Length)-uint64(kCopyBase[copyCode]))
+		}
+
+		if m.Unmatched > 0 {
+			for _, c := range src[pos : pos+m.Unmatched] {
+				e.bw.writeBits(uint(literalDepths[c]), uint64(literalBits[c]))
+			}
+		}
+
+		if command >= 128 && m.Length != 0 {
+			distCode := e.distCache[i]
+			e.bw.writeBits(uint(distanceDepths[distCode.code]), uint64(distanceBits[distCode.code]))
+			if distCode.nExtra > 0 {
+				e.bw.writeBits(distCode.nExtra, distCode.extraBits)
+			}
+		}
+
+		pos += m.Unmatched + m.Length
+	}
+
+	if lastBlock {
+		e.bw.writeBits(2, 3) // islast + isempty
+		e.bw.jumpToByteBoundary()
+	}
+	return e.bw.dst
+}
+
+type distanceCode struct {
+	code      int
+	nExtra    uint
+	extraBits uint64
+}
+
+func getDistanceCode(distance int) distanceCode {
+	d := distance + 3
+	nbits := log2FloorNonZero(uint(d)) - 1
+	prefix := (d >> nbits) & 1
+	offset := (2 + prefix) << nbits
+	distcode := int(2*(nbits-1)) + prefix + 16
+	extra := d - offset
+	return distanceCode{distcode, uint(nbits), uint64(extra)}
+}
@@ -0,0 +1,22 @@
+package brotli
+
+/* Dictionary data (words and transforms) for 1 possible context */
+type encoderDictionary struct {
+	words                 *dictionary
+	cutoffTransformsCount uint32
+	cutoffTransforms      uint64
+	hash_table            []uint16
+	buckets               []uint16
+	dict_words            []dictWord
+}
+
+func initEncoderDictionary(dict *encoderDictionary) {
+	dict.words = getDictionary()
+
+	dict.hash_table = kStaticDictionaryHash[:]
+	dict.buckets = kStaticDictionaryBuckets[:]
+	dict.dict_words = kStaticDictionaryWords[:]
+
+	dict.cutoffTransformsCount = kCutoffTransformsCount
+	dict.cutoffTransforms = kCutoffTransforms
+}
@@ -0,0 +1,592 @@
+package brotli
+
+import "math"
+
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Entropy encoding (Huffman) utilities. */
+
+/* A node of a Huffman tree. */
+type huffmanTree struct {
+	total_count_          uint32
+	index_left_           int16
+	index_right_or_value_ int16
+}
+
+func initHuffmanTree(self *huffmanTree, count uint32, left int16, right int16) {
+	self.total_count_ = count
+	self.index_left_ = left
+	self.index_right_or_value_ = right
+}
+
+/* Input size optimized Shell sort. */
+type huffmanTreeComparator func(huffmanTree, huffmanTree) bool
+
+var sortHuffmanTreeItems_gaps = []uint{132, 57, 23, 10, 4, 1}
+
+func sortHuffmanTreeItems(items []huffmanTree, n uint, comparator huffmanTreeComparator) {
+	if n < 13 {
+		/* Insertion sort. */
+		var i uint
+		for i = 1; i < n; i++ {
+			var tmp huffmanTree = items[i]
+			var k uint = i
+			var j uint = i - 1
+			for comparator(tmp, items[j]) {
+				items[k] = items[j]
+				k = j
+				if j == 0 {
+					break
+				}
+				j--
+			}
+
+			items[k] = tmp
+		}
+
+		return
+	} else {
+		var g int
+		if n < 57 {
+			g = 2
+		} else {
+			g = 0
+		}
+		for ; g < 6; g++ {
+			var gap uint = sortHuffmanTreeItems_gaps[g]
+			var i uint
+			for i = gap; i < n; i++ {
+				var j uint = i
+				var tmp huffmanTree = items[i]
+				for ; j >= gap && comparator(tmp, items[j-gap]); j -= gap {
+					items[j] = items[j-gap]
+				}
+
+				items[j] = tmp
+			}
+		}
+	}
+}
+
+/* Returns 1 if assignment of depths succeeded, otherwise 0. */
+func setDepth(p0 int, pool []huffmanTree, depth []byte, max_depth int) bool {
+	var stack [16]int
+	var level int = 0
+	var p int = p0
+	assert(max_depth <= 15)
+	stack[0] = -1
+	for {
+		if pool[p].index_left_ >= 0 {
+			level++
+			if level > max_depth {
+				return false
+			}
+			stack[level] = int(pool[p].index_right_or_value_)
+			p = int(pool[p].index_left_)
+			continue
+		} else {
+			depth[pool[p].index_right_or_value_] = byte(level)
+		}
+
+		for level >= 0 && stack[level] == -1 {
+			level--
+		}
+		if level < 0 {
+			return true
+		}
+		p = stack[level]
+		stack[level] = -1
+	}
+}
+
+/* Sort the root nodes, least popular first. */
+func sortHuffmanTree(v0 huffmanTree, v1 huffmanTree) bool {
+	if v0.total_count_ != v1.total_count_ {
+		return v0.total_count_ < v1.total_count_
+	}
+
+	return v0.index_right_or_value_ > v1.index_right_or_value_
+}
+
+/* This function will create a Huffman tree.
+
+   The catch here is that the tree cannot be arbitrarily deep.
+   Brotli specifies a maximum depth of 15 bits for "code trees"
+   and 7 bits for "code length code trees."
+
+   count_limit is the value that is to be faked as the minimum value
+   and this minimum value is raised until the tree matches the
+   maximum length requirement.
+
+   This algorithm is not of excellent performance for very long data blocks,
+   especially when population counts are longer than 2**tree_limit, but
+   we are not planning to use this with extremely long blocks.
+
+   See http://en.wikipedia.org/wiki/Huffman_coding */
+func createHuffmanTree(data []uint32, length uint, tree_limit int, tree []huffmanTree, depth []byte) {
+	var count_limit uint32
+	var sentinel huffmanTree
+	initHuffmanTree(&sentinel, math.MaxUint32, -1, -1)
+
+	/* For block sizes below 64 kB, we never need to do a second iteration
+	   of this loop. Probably all of our block sizes will be smaller than
+	   that, so this loop is mostly of academic interest. If we actually
+	   would need this, we would be better off with the Katajainen algorithm. */
+	for count_limit = 1; ; count_limit *= 2 {
+		var n uint = 0
+		var i uint
+		var j uint
+		var k uint
+		for i = length; i != 0; {
+			i--
+			if data[i] != 0 {
+				var count uint32 = brotli_max_uint32_t(data[i], count_limit)
+				initHuffmanTree(&tree[n], count, -1, int16(i))
+				n++
+			}
+		}
+
+		if n == 1 {
+			depth[tree[0].index_right_or_value_] = 1 /* Only one element. */
+			break
+		}
+
+		sortHuffmanTreeItems(tree, n, huffmanTreeComparator(sortHuffmanTree))
+
+		/* The nodes are:
+		   [0, n): the sorted leaf nodes that we start with.
+		   [n]: we add a sentinel here.
+		   [n + 1, 2n): new parent nodes are added here, starting from
+		                (n+1). These are naturally in ascending order.
+		   [2n]: we add a sentinel at the end as well.
+		   There will be (2n+1) elements at the end. */
+		tree[n] = sentinel
+
+		tree[n+1] = sentinel
+
+		i = 0     /* Points to the next leaf node. */
+		j = n + 1 /* Points to the next non-leaf node. */
+		for k = n - 1; k != 0; k-- {
+			var left uint
+			var right uint
+			if tree[i].total_count_ <= tree[j].total_count_ {
+				left = i
+				i++
+			} else {
+				left = j
+				j++
+			}
+
+			if tree[i].total_count_ <= tree[j].total_count_ {
+				right = i
+				i++
+			} else {
+				right = j
+				j++
+			}
+			{
+				/* The sentinel node becomes the parent node. */
+				var j_end uint = 2*n - k
+				tree[j_end].total_count_ = tree[left].total_count_ + tree[right].total_count_
+				tree[j_end].index_left_ = int16(left)
+				tree[j_end].index_right_or_value_ = int16(right)
+
+				/* Add back the last sentinel node. */
+				tree[j_end+1] = sentinel
+			}
+		}
+
+		if setDepth(int(2*n-1), tree[0:], depth, tree_limit) {
+			/* We need to pack the Huffman tree in tree_limit bits. If this was not
+			   successful, add fake entities to the lowest values and retry. */
+			break
+		}
+	}
+}
+
+func reverse(v []byte, start uint, end uint) {
+	end--
+	for start < end {
+		var tmp byte = v[start]
+		v[start] = v[end]
+		v[end] = tmp
+		start++
+		end--
+	}
+}
+
+func writeHuffmanTreeRepetitions(previous_value byte, value byte, repetitions uint, tree_size *uint, tree []byte, extra_bits_data []byte) {
+	assert(repetitions > 0)
+	if previous_value != value {
+		tree[*tree_size] = value
+		extra_bits_data[*tree_size] = 0
+		(*tree_size)++
+		repetitions--
+	}
+
+	if repetitions == 7 {
+		tree[*tree_size] = value
+		extra_bits_data[*tree_size] = 0
+		(*tree_size)++
+		repetitions--
+	}
+
+	if repetitions < 3 {
+		var i uint
+		for i = 0; i < repetitions; i++ {
+			tree[*tree_size] = value
+			extra_bits_data[*tree_size] = 0
+			(*tree_size)++
+		}
+	} else {
+		var start uint = *tree_size
+		repetitions -= 3
+		for {
+			tree[*tree_size] = repeatPreviousCodeLength
+			extra_bits_data[*tree_size] = byte(repetitions & 0x3)
+			(*tree_size)++
+			repetitions >>= 2
+			if repetitions == 0 {
+				break
+			}
+
+			repetitions--
+		}
+
+		reverse(tree, start, *tree_size)
+		reverse(extra_bits_data, start, *tree_size)
+	}
+}
+
+func writeHuffmanTreeRepetitionsZeros(repetitions uint, tree_size *uint, tree []byte, extra_bits_data []byte) {
+	if repetitions == 11 {
+		tree[*tree_size] = 0
+		extra_bits_data[*tree_size] = 0
+		(*tree_size)++
+		repetitions--
+	}
+
+	if repetitions < 3 {
+		var i uint
+		for i = 0; i < repetitions; i++ {
+			tree[*tree_size] = 0
+			extra_bits_data[*tree_size] = 0
+			(*tree_size)++
+		}
+	} else {
+		var start uint = *tree_size
+		repetitions -= 3
+		for {
+			tree[*tree_size] = repeatZeroCodeLength
+			extra_bits_data[*tree_size] = byte(repetitions & 0x7)
+			(*tree_size)++
+			repetitions >>= 3
+			if repetitions == 0 {
+				break
+			}
+
+			repetitions--
+		}
+
+		reverse(tree, start, *tree_size)
+		reverse(extra_bits_data, start, *tree_size)
+	}
+}
+
+/* Change the population counts in a way that the consequent
+   Huffman tree compression, especially its RLE-part will be more
+   likely to compress this data more efficiently.
+
+   length contains the size of the histogram.
+   counts contains the population counts.
+   good_for_rle is a buffer of at least length size */
+func optimizeHuffmanCountsForRLE(length uint, counts []uint32, good_for_rle []byte) {
+	var nonzero_count uint = 0
+	var stride uint
+	var limit uint
+	var sum uint
+	var streak_limit uint = 1240
+	var i uint
+	/* Let's make the Huffman code more compatible with RLE encoding. */
+	for i = 0; i < length; i++ {
+		if counts[i] != 0 {
+			nonzero_count++
+		}
+	}
+
+	if nonzero_count < 16 {
+		return
+	}
+
+	for length != 0 && counts[length-1] == 0 {
+		length--
+	}
+
+	if length == 0 {
+		return /* All zeros. */
+	}
+
+	/* Now counts[0..length - 1] does not have trailing zeros. */
+	{
+		var nonzeros uint = 0
+		var smallest_nonzero uint32 = 1 << 30
+		for i = 0; i < length; i++ {
+			if counts[i] != 0 {
+				nonzeros++
+				if smallest_nonzero > counts[i] {
+					smallest_nonzero = counts[i]
+				}
+			}
+		}
+
+		if nonzeros < 5 {
+			/* Small histogram will model it well. */
+			return
+		}
+
+		if smallest_nonzero < 4 {
+			var zeros uint = length - nonzeros
+			if zeros < 6 {
+				for i = 1; i < length-1; i++ {
+					if counts[i-1] != 0 && counts[i] == 0 && counts[i+1] != 0 {
+						counts[i] = 1
+					}
+				}
+			}
+		}
+
+		if nonzeros < 28 {
+			return
+		}
+	}
+
+	/* 2) Let's mark all population counts that already can be encoded
+	   with an RLE code. */
+	for i := 0; i < int(length); i++ {
+		good_for_rle[i] = 0
+	}
+	{
+		var symbol uint32 = counts[0]
+		/* Let's not spoil any of the existing good RLE codes.
+		   Mark any seq of 0's that is longer as 5 as a good_for_rle.
+		   Mark any seq of non-0's that is longer as 7 as a good_for_rle. */
+
+		var step uint = 0
+		for i = 0; i <= length; i++ {
+			if i == length || counts[i] != symbol {
+				if (symbol == 0 && step >= 5) || (symbol != 0 && step >= 7) {
+					var k uint
+					for k = 0; k < step; k++ {
+						good_for_rle[i-k-1] = 1
+					}
+				}
+
+				step = 1
+				if i != length {
+					symbol = counts[i]
+				}
+			} else {
+				step++
+			}
+		}
+	}
+
+	/* 3) Let's replace those population counts that lead to more RLE codes.
+	   Math here is in 24.8 fixed point representation. */
+	stride = 0
+
+	limit = uint(256*(counts[0]+counts[1]+counts[2])/3 + 420)
+	sum = 0
+	for i = 0; i <= length; i++ {
+		if i == length || good_for_rle[i] != 0 || (i != 0 && good_for_rle[i-1] != 0) || (256*counts[i]-uint32(limit)+uint32(streak_limit)) >= uint32(2*streak_limit) {
+			if stride >= 4 || (stride >= 3 && sum == 0) {
+				var k uint
+				var count uint = (sum + stride/2) / stride
+				/* The stride must end, collapse what we have, if we have enough (4). */
+				if count == 0 {
+					count = 1
+				}
+
+				if sum == 0 {
+					/* Don't make an all zeros stride to be upgraded to ones. */
+					count = 0
+				}
+
+				for k = 0; k < stride; k++ {
+					/* We don't want to change value at counts[i],
+					   that is already belonging to the next stride. Thus - 1. */
+					counts[i-k-1] = uint32(count)
+				}
+			}
+
+			stride = 0
+			sum = 0
+			if i < length-2 {
+				/* All interesting strides have a count of at least 4, */
+				/* at least when non-zeros. */
+				limit = uint(256*(counts[i]+counts[i+1]+counts[i+2])/3 + 420)
+			} else if i < length {
+				limit = uint(256 * counts[i])
+			} else {
+				limit = 0
+			}
+		}
+
+		stride++
+		if i != length {
+			sum += uint(counts[i])
+			if stride >= 4 {
+				limit = (256*sum + stride/2) / stride
+			}
+
+			if stride == 4 {
+				limit += 120
+			}
+		}
+	}
+}
+
+func decideOverRLEUse(depth []byte, length uint, use_rle_for_non_zero *bool, use_rle_for_zero *bool) {
+	var total_reps_zero uint = 0
+	var total_reps_non_zero uint = 0
+	var count_reps_zero uint = 1
+	var count_reps_non_zero uint = 1
+	var i uint
+	for i = 0; i < length; {
+		var value byte = depth[i]
+		var reps uint = 1
+		var k uint
+		for k = i + 1; k < length && depth[k] == value; k++ {
+			reps++
+		}
+
+		if reps >= 3 && value == 0 {
+			total_reps_zero += reps
+			count_reps_zero++
+		}
+
+		if reps >= 4 && value != 0 {
+			total_reps_non_zero += reps
+			count_reps_non_zero++
+		}
+
+		i += reps
+	}
+
+	*use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero*2
+	*use_rle_for_zero = total_reps_zero > count_reps_zero*2
+}
+
+/* Write a Huffman tree from bit depths into the bit-stream representation
+   of a Huffman tree. The generated Huffman tree is to be compressed once
+   more using a Huffman tree */
+func writeHuffmanTree(depth []byte, length uint, tree_size *uint, tree []byte, extra_bits_data []byte) {
+	var previous_value byte = initialRepeatedCodeLength
+	var i uint
+	var use_rle_for_non_zero bool = false
+	var use_rle_for_zero bool = false
+	var new_length uint = length
+	/* Throw away trailing zeros. */
+	for i = 0; i < length; i++ {
+		if depth[length-i-1] == 0 {
+			new_length--
+		} else {
+			break
+		}
+	}
+
+	/* First gather statistics on if it is a good idea to do RLE. */
+	if length > 50 {
+		/* Find RLE coding for longer codes.
+		   Shorter codes seem not to benefit from RLE. */
+		decideOverRLEUse(depth, new_length, &use_rle_for_non_zero, &use_rle_for_zero)
+	}
+
+	/* Actual RLE coding. */
+	for i = 0; i < new_length; {
+		var value byte = depth[i]
+		var reps uint = 1
+		if (value != 0 && use_rle_for_non_zero) || (value == 0 && use_rle_for_zero) {
+			var k uint
+			for k = i + 1; k < new_length && depth[k] == value; k++ {
+				reps++
+			}
+		}
+
+		if value == 0 {
+			writeHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data)
+		} else {
+			writeHuffmanTreeRepetitions(previous_value, value, reps, tree_size, tree, extra_bits_data)
+			previous_value = value
+		}
+
+		i += reps
+	}
+}
+
+var reverseBits_kLut = [16]uint{
+	0x00,
+	0x08,
+	0x04,
+	0x0C,
+	0x02,
+	0x0A,
+	0x06,
+	0x0E,
+	0x01,
+	0x09,
+	0x05,
+	0x0D,
+	0x03,
+	0x0B,
+	0x07,
+	0x0F,
+}
+
+func reverseBits(num_bits uint, bits uint16) uint16 {
+	var retval uint = reverseBits_kLut[bits&0x0F]
+	var i uint
+	for i = 4; i < num_bits; i += 4 {
+		retval <<= 4
+		bits = uint16(bits >> 4)
+		retval |= reverseBits_kLut[bits&0x0F]
+	}
+
+	retval >>= ((0 - num_bits) & 0x03)
+	return uint16(retval)
+}
+
+/* 0..15 are values for bits */
+const maxHuffmanBits = 16
+
+/* Get the actual bit values for a tree of bit depths. */
+func convertBitDepthsToSymbols(depth []byte, len uint, bits []uint16) {
+	var bl_count = [maxHuffmanBits]uint16{0}
+	var next_code [maxHuffmanBits]uint16
+	var i uint
+	/* In Brotli, all bit depths are [1..15]
+	   0 bit depth means that the symbol does not exist. */
+
+	var code int = 0
+	for i = 0; i < len; i++ {
+		bl_count[depth[i]]++
+	}
+
+	bl_count[0] = 0
+	next_code[0] = 0
+	for i = 1; i < maxHuffmanBits; i++ {
+		code = (code + int(bl_count[i-1])) << 1
+		next_code[i] = uint16(code)
+	}
+
+	for i = 0; i < len; i++ {
+		if depth[i] != 0 {
+			bits[i] = reverseBits(uint(depth[i]), next_code[depth[i]])
+			next_code[depth[i]]++
+		}
+	}
+}
@@ -0,0 +1,290 @@
+package brotli
+
+import (
+	"math"
+	"math/bits"
+)
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Utilities for fast computation of logarithms. */
+
+func log2FloorNonZero(n uint) uint32 {
+	return uint32(bits.Len(n)) - 1
+}
+
+/* A lookup table for small values of log2(int) to be used in entropy
+   computation.
+
+   ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]]) */
+var kLog2Table = []float32{
+	0.0000000000000000,
+	0.0000000000000000,
+	1.0000000000000000,
+	1.5849625007211563,
+	2.0000000000000000,
+	2.3219280948873622,
+	2.5849625007211561,
+	2.8073549220576042,
+	3.0000000000000000,
+	3.1699250014423126,
+	3.3219280948873626,
+	3.4594316186372978,
+	3.5849625007211565,
+	3.7004397181410922,
+	3.8073549220576037,
+	3.9068905956085187,
+	4.0000000000000000,
+	4.0874628412503400,
+	4.1699250014423122,
+	4.2479275134435852,
+	4.3219280948873626,
+	4.3923174227787607,
+	4.4594316186372973,
+	4.5235619560570131,
+	4.5849625007211570,
+	4.6438561897747244,
+	4.7004397181410926,
+	4.7548875021634691,
+	4.8073549220576037,
+	4.8579809951275728,
+	4.9068905956085187,
+	4.9541963103868758,
+	5.0000000000000000,
+	5.0443941193584534,
+	5.0874628412503400,
+	5.1292830169449664,
+	5.1699250014423122,
+	5.2094533656289501,
+	5.2479275134435852,
+	5.2854022188622487,
+	5.3219280948873626,
+	5.3575520046180838,
+	5.3923174227787607,
+	5.4262647547020979,
+	5.4594316186372973,
+	5.4918530963296748,
+	5.5235619560570131,
+	5.5545888516776376,
+	5.5849625007211570,
+	5.6147098441152083,
+	5.6438561897747244,
+	5.6724253419714961,
+	5.7004397181410926,
+	5.7279204545631996,
+	5.7548875021634691,
+	5.7813597135246599,
+	5.8073549220576046,
+	5.8328900141647422,
+	5.8579809951275719,
+	5.8826430493618416,
+	5.9068905956085187,
+	5.9307373375628867,
+	5.9541963103868758,
+	5.9772799234999168,
+	6.0000000000000000,
+	6.0223678130284544,
+	6.0443941193584534,
+	6.0660891904577721,
+	6.0874628412503400,
+	6.1085244567781700,
+	6.1292830169449672,
+	6.1497471195046822,
+	6.1699250014423122,
+	6.1898245588800176,
+	6.2094533656289510,
+	6.2288186904958804,
+	6.2479275134435861,
+	6.2667865406949019,
+	6.2854022188622487,
+	6.3037807481771031,
+	6.3219280948873617,
+	6.3398500028846252,
+	6.3575520046180847,
+	6.3750394313469254,
+	6.3923174227787598,
+	6.4093909361377026,
+	6.4262647547020979,
+	6.4429434958487288,
+	6.4594316186372982,
+	6.4757334309663976,
+	6.4918530963296748,
+	6.5077946401986964,
+	6.5235619560570131,
+	6.5391588111080319,
+	6.5545888516776376,
+	6.5698556083309478,
+	6.5849625007211561,
+	6.5999128421871278,
+	6.6147098441152092,
+	6.6293566200796095,
+	6.6438561897747253,
+	6.6582114827517955,
+	6.6724253419714952,
+	6.6865005271832185,
+	6.7004397181410917,
+	6.7142455176661224,
+	6.7279204545631988,
+	6.7414669864011465,
+	6.7548875021634691,
+	6.7681843247769260,
+	6.7813597135246599,
+	6.7944158663501062,
+	6.8073549220576037,
+	6.8201789624151887,
+	6.8328900141647422,
+	6.8454900509443757,
+	6.8579809951275719,
+	6.8703647195834048,
+	6.8826430493618416,
+	6.8948177633079437,
+	6.9068905956085187,
+	6.9188632372745955,
+	6.9307373375628867,
+	6.9425145053392399,
+	6.9541963103868758,
+	6.9657842846620879,
+	6.9772799234999168,
+	6.9886846867721664,
+	7.0000000000000000,
+	7.0112272554232540,
+	7.0223678130284544,
+	7.0334230015374501,
+	7.0443941193584534,
+	7.0552824355011898,
+	7.0660891904577721,
+	7.0768155970508317,
+	7.0874628412503400,
+	7.0980320829605272,
+	7.1085244567781700,
+	7.1189410727235076,
+	7.1292830169449664,
+	7.1395513523987937,
+	7.1497471195046822,
+	7.1598713367783891,
+	7.1699250014423130,
+	7.1799090900149345,
+	7.1898245588800176,
+	7.1996723448363644,
+	7.2094533656289492,
+	7.2191685204621621,
+	7.2288186904958804,
+	7.2384047393250794,
+	7.2479275134435861,
+	7.2573878426926521,
+	7.2667865406949019,
+	7.2761244052742384,
+	7.2854022188622487,
+	7.2946207488916270,
+	7.3037807481771031,
+	7.3128829552843557,
+	7.3219280948873617,
+	7.3309168781146177,
+	7.3398500028846243,
+	7.3487281542310781,
+	7.3575520046180847,
+	7.3663222142458151,
+	7.3750394313469254,
+	7.3837042924740528,
+	7.3923174227787607,
+	7.4008794362821844,
+	7.4093909361377026,
+	7.4178525148858991,
+	7.4262647547020979,
+	7.4346282276367255,
+	7.4429434958487288,
+	7.4512111118323299,
+	7.4594316186372973,
+	7.4676055500829976,
+	7.4757334309663976,
+	7.4838157772642564,
+	7.4918530963296748,
+	7.4998458870832057,
+	7.5077946401986964,
+	7.5156998382840436,
+	7.5235619560570131,
+	7.5313814605163119,
+	7.5391588111080319,
+	7.5468944598876373,
+	7.5545888516776376,
+	7.5622424242210728,
+	7.5698556083309478,
+	7.5774288280357487,
+	7.5849625007211561,
+	7.5924570372680806,
+	7.5999128421871278,
+	7.6073303137496113,
+	7.6147098441152075,
+	7.6220518194563764,
+	7.6293566200796095,
+	7.6366246205436488,
+	7.6438561897747244,
+	7.6510516911789290,
+	7.6582114827517955,
+	7.6653359171851765,
+	7.6724253419714952,
+	7.6794800995054464,
+	7.6865005271832185,
+	7.6934869574993252,
+	7.7004397181410926,
+	7.7073591320808825,
+	7.7142455176661224,
+	7.7210991887071856,
+	7.7279204545631996,
+	7.7347096202258392,
+	7.7414669864011465,
+	7.7481928495894596,
+	7.7548875021634691,
+	7.7615512324444795,
+	7.7681843247769260,
+	7.7747870596011737,
+	7.7813597135246608,
+	7.7879025593914317,
+	7.7944158663501062,
+	7.8008998999203047,
+	7.8073549220576037,
+	7.8137811912170374,
+	7.8201789624151887,
+	7.8265484872909159,
+	7.8328900141647422,
+	7.8392037880969445,
+	7.8454900509443757,
+	7.8517490414160571,
+	7.8579809951275719,
+	7.8641861446542798,
+	7.8703647195834048,
+	7.8765169465650002,
+	7.8826430493618425,
+	7.8887432488982601,
+	7.8948177633079446,
+	7.9008668079807496,
+	7.9068905956085187,
+	7.9128893362299619,
+	7.9188632372745955,
+	7.9248125036057813,
+	7.9307373375628867,
+	7.9366379390025719,
+	7.9425145053392399,
+	7.9483672315846778,
+	7.9541963103868758,
+	7.9600019320680806,
+	7.9657842846620870,
+	7.9715435539507720,
+	7.9772799234999168,
+	7.9829935746943104,
+	7.9886846867721664,
+	7.9943534368588578,
+}
+
+/* Faster logarithm for small integers, with the property of log2(0) == 0. */
+func fastLog2(v uint) float64 {
+	if v < uint(len(kLog2Table)) {
+		return float64(kLog2Table[v])
+	}
+
+	return math.Log2(float64(v))
+}
@@ -0,0 +1,45 @@
+package brotli
+
+import (
+	"encoding/binary"
+	"math/bits"
+	"runtime"
+)
+
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function to find maximal matching prefixes of strings. */
+func findMatchLengthWithLimit(s1 []byte, s2 []byte, limit uint) uint {
+	var matched uint = 0
+	_, _ = s1[limit-1], s2[limit-1] // bounds check
+	switch runtime.GOARCH {
+	case "amd64":
+		// Compare 8 bytes at at time.
+		for matched+8 <= limit {
+			w1 := binary.LittleEndian.Uint64(s1[matched:])
+			w2 := binary.LittleEndian.Uint64(s2[matched:])
+			if w1 != w2 {
+				return matched + uint(bits.TrailingZeros64(w1^w2)>>3)
+			}
+			matched += 8
+		}
+	case "386":
+		// Compare 4 bytes at at time.
+		for matched+4 <= limit {
+			w1 := binary.LittleEndian.Uint32(s1[matched:])
+			w2 := binary.LittleEndian.Uint32(s2[matched:])
+			if w1 != w2 {
+				return matched + uint(bits.TrailingZeros32(w1^w2)>>3)
+			}
+			matched += 4
+		}
+	}
+	for matched < limit && s1[matched] == s2[matched] {
+		matched++
+	}
+	return matched
+}
@@ -0,0 +1,287 @@
+package brotli
+
+import "encoding/binary"
+
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+func (*h10) HashTypeLength() uint {
+	return 4
+}
+
+func (*h10) StoreLookahead() uint {
+	return 128
+}
+
+func hashBytesH10(data []byte) uint32 {
+	var h uint32 = binary.LittleEndian.Uint32(data) * kHashMul32
+
+	/* The higher bits contain more mixture from the multiplication,
+	   so we take our results from there. */
+	return h >> (32 - 17)
+}
+
+/* A (forgetful) hash table where each hash bucket contains a binary tree of
+   sequences whose first 4 bytes share the same hash code.
+   Each sequence is 128 long and is identified by its starting
+   position in the input data. The binary tree is sorted by the lexicographic
+   order of the sequences, and it is also a max-heap with respect to the
+   starting positions. */
+type h10 struct {
+	hasherCommon
+	window_mask_ uint
+	buckets_     [1 << 17]uint32
+	invalid_pos_ uint32
+	forest       []uint32
+}
+
+func (h *h10) Initialize(params *encoderParams) {
+	h.window_mask_ = (1 << params.lgwin) - 1
+	h.invalid_pos_ = uint32(0 - h.window_mask_)
+	var num_nodes uint = uint(1) << params.lgwin
+	h.forest = make([]uint32, 2*num_nodes)
+}
+
+func (h *h10) Prepare(one_shot bool, input_size uint, data []byte) {
+	var invalid_pos uint32 = h.invalid_pos_
+	var i uint32
+	for i = 0; i < 1<<17; i++ {
+		h.buckets_[i] = invalid_pos
+	}
+}
+
+func leftChildIndexH10(self *h10, pos uint) uint {
+	return 2 * (pos & self.window_mask_)
+}
+
+func rightChildIndexH10(self *h10, pos uint) uint {
+	return 2*(pos&self.window_mask_) + 1
+}
+
+/* Stores the hash of the next 4 bytes and in a single tree-traversal, the
+   hash bucket's binary tree is searched for matches and is re-rooted at the
+   current position.
+
+   If less than 128 data is available, the hash bucket of the
+   current position is searched for matches, but the state of the hash table
+   is not changed, since we can not know the final sorting order of the
+   current (incomplete) sequence.
+
+   This function must be called with increasing cur_ix positions. */
+func storeAndFindMatchesH10(self *h10, data []byte, cur_ix uint, ring_buffer_mask uint, max_length uint, max_backward uint, best_len *uint, matches []backwardMatch) []backwardMatch {
+	var cur_ix_masked uint = cur_ix & ring_buffer_mask
+	var max_comp_len uint = brotli_min_size_t(max_length, 128)
+	var should_reroot_tree bool = (max_length >= 128)
+	var key uint32 = hashBytesH10(data[cur_ix_masked:])
+	var forest []uint32 = self.forest
+	var prev_ix uint = uint(self.buckets_[key])
+	var node_left uint = leftChildIndexH10(self, cur_ix)
+	var node_right uint = rightChildIndexH10(self, cur_ix)
+	var best_len_left uint = 0
+	var best_len_right uint = 0
+	var depth_remaining uint
+	/* The forest index of the rightmost node of the left subtree of the new
+	   root, updated as we traverse and re-root the tree of the hash bucket. */
+
+	/* The forest index of the leftmost node of the right subtree of the new
+	   root, updated as we traverse and re-root the tree of the hash bucket. */
+
+	/* The match length of the rightmost node of the left subtree of the new
+	   root, updated as we traverse and re-root the tree of the hash bucket. */
+
+	/* The match length of the leftmost node of the right subtree of the new
+	   root, updated as we traverse and re-root the tree of the hash bucket. */
+	if should_reroot_tree {
+		self.buckets_[key] = uint32(cur_ix)
+	}
+
+	for depth_remaining = 64; ; depth_remaining-- {
+		var backward uint = cur_ix - prev_ix
+		var prev_ix_masked uint = prev_ix & ring_buffer_mask
+		if backward == 0 || backward > max_backward || depth_remaining == 0 {
+			if should_reroot_tree {
+				forest[node_left] = self.invalid_pos_
+				forest[node_right] = self.invalid_pos_
+			}
+
+			break
+		}
+		{
+			var cur_len uint = brotli_min_size_t(best_len_left, best_len_right)
+			var len uint
+			assert(cur_len <= 128)
+			len = cur_len + findMatchLengthWithLimit(data[cur_ix_masked+cur_len:], data[prev_ix_masked+cur_len:], max_length-cur_len)
+			if matches != nil && len > *best_len {
+				*best_len = uint(len)
+				initBackwardMatch(&matches[0], backward, uint(len))
+				matches = matches[1:]
+			}
+
+			if len >= max_comp_len {
+				if should_reroot_tree {
+					forest[node_left] = forest[leftChildIndexH10(self, prev_ix)]
+					forest[node_right] = forest[rightChildIndexH10(self, prev_ix)]
+				}
+
+				break
+			}
+
+			if data[cur_ix_masked+len] > data[prev_ix_masked+len] {
+				best_len_left = uint(len)
+				if should_reroot_tree {
+					forest[node_left] = uint32(prev_ix)
+				}
+
+				node_left = rightChildIndexH10(self, prev_ix)
+				prev_ix = uint(forest[node_left])
+			} else {
+				best_len_right = uint(len)
+				if should_reroot_tree {
+					forest[node_right] = uint32(prev_ix)
+				}
+
+				node_right = leftChildIndexH10(self, prev_ix)
+				prev_ix = uint(forest[node_right])
+			}
+		}
+	}
+
+	return matches
+}
+
+/* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
+   length of max_length and stores the position cur_ix in the hash table.
+
+   Sets *num_matches to the number of matches found, and stores the found
+   matches in matches[0] to matches[*num_matches - 1]. The matches will be
+   sorted by strictly increasing length and (non-strictly) increasing
+   distance. */
+func findAllMatchesH10(handle *h10, dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, cur_ix uint, max_length uint, max_backward uint, gap uint, params *encoderParams, matches []backwardMatch) uint {
+	var orig_matches []backwardMatch = matches
+	var cur_ix_masked uint = cur_ix & ring_buffer_mask
+	var best_len uint = 1
+	var short_match_max_backward uint
+	if params.quality != hqZopflificationQuality {
+		short_match_max_backward = 16
+	} else {
+		short_match_max_backward = 64
+	}
+	var stop uint = cur_ix - short_match_max_backward
+	var dict_matches [maxStaticDictionaryMatchLen + 1]uint32
+	var i uint
+	if cur_ix < short_match_max_backward {
+		stop = 0
+	}
+	for i = cur_ix - 1; i > stop && best_len <= 2; i-- {
+		var prev_ix uint = i
+		var backward uint = cur_ix - prev_ix
+		if backward > max_backward {
+			break
+		}
+
+		prev_ix &= ring_buffer_mask
+		if data[cur_ix_masked] != data[prev_ix] || data[cur_ix_masked+1] != data[prev_ix+1] {
+			continue
+		}
+		{
+			var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
+			if len > best_len {
+				best_len = uint(len)
+				initBackwardMatch(&matches[0], backward, uint(len))
+				matches = matches[1:]
+			}
+		}
+	}
+
+	if best_len < max_length {
+		matches = storeAndFindMatchesH10(handle, data, cur_ix, ring_buffer_mask, max_length, max_backward, &best_len, matches)
+	}
+
+	for i = 0; i <= maxStaticDictionaryMatchLen; i++ {
+		dict_matches[i] = kInvalidMatch
+	}
+	{
+		var minlen uint = brotli_max_size_t(4, best_len+1)
+		if findAllStaticDictionaryMatches(dictionary, data[cur_ix_masked:], minlen, max_length, dict_matches[0:]) {
+			var maxlen uint = brotli_min_size_t(maxStaticDictionaryMatchLen, max_length)
+			var l uint
+			for l = minlen; l <= maxlen; l++ {
+				var dict_id uint32 = dict_matches[l]
+				if dict_id < kInvalidMatch {
+					var distance uint = max_backward + gap + uint(dict_id>>5) + 1
+					if distance <= params.dist.max_distance {
+						initDictionaryBackwardMatch(&matches[0], distance, l, uint(dict_id&31))
+						matches = matches[1:]
+					}
+				}
+			}
+		}
+	}
+
+	return uint(-cap(matches) + cap(orig_matches))
+}
+
+/* Stores the hash of the next 4 bytes and re-roots the binary tree at the
+   current sequence, without returning any matches.
+   REQUIRES: ix + 128 <= end-of-current-block */
+func (h *h10) Store(data []byte, mask uint, ix uint) {
+	var max_backward uint = h.window_mask_ - windowGap + 1
+	/* Maximum distance is window size - 16, see section 9.1. of the spec. */
+	storeAndFindMatchesH10(h, data, ix, mask, 128, max_backward, nil, nil)
+}
+
+func (h *h10) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
+	var i uint = ix_start
+	var j uint = ix_start
+	if ix_start+63 <= ix_end {
+		i = ix_end - 63
+	}
+
+	if ix_start+512 <= i {
+		for ; j < i; j += 8 {
+			h.Store(data, mask, j)
+		}
+	}
+
+	for ; i < ix_end; i++ {
+		h.Store(data, mask, i)
+	}
+}
+
+func (h *h10) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
+	if num_bytes >= h.HashTypeLength()-1 && position >= 128 {
+		var i_start uint = position - 128 + 1
+		var i_end uint = brotli_min_size_t(position, i_start+num_bytes)
+		/* Store the last `128 - 1` positions in the hasher.
+		   These could not be calculated before, since they require knowledge
+		   of both the previous and the current block. */
+
+		var i uint
+		for i = i_start; i < i_end; i++ {
+			/* Maximum distance is window size - 16, see section 9.1. of the spec.
+			   Furthermore, we have to make sure that we don't look further back
+			   from the start of the next block than the window size, otherwise we
+			   could access already overwritten areas of the ring-buffer. */
+			var max_backward uint = h.window_mask_ - brotli_max_size_t(windowGap-1, position-i)
+
+			/* We know that i + 128 <= position + num_bytes, i.e. the
+			   end of the current block and that we have at least
+			   128 tail in the ring-buffer. */
+			storeAndFindMatchesH10(h, ringbuffer, i, ringbuffer_mask, 128, max_backward, nil, nil)
+		}
+	}
+}
+
+/* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */
+const maxNumMatchesH10 = 128
+
+func (*h10) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
+	panic("unimplemented")
+}
+
+func (*h10) PrepareDistanceCache(distance_cache []int) {
+	panic("unimplemented")
+}
@@ -0,0 +1,214 @@
+package brotli
+
+import "encoding/binary"
+
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data.
+
+   This is a hash map of fixed size (bucket_size_) to a ring buffer of
+   fixed size (block_size_). The ring buffer contains the last block_size_
+   index positions of the given hash key in the compressed data. */
+func (*h5) HashTypeLength() uint {
+	return 4
+}
+
+func (*h5) StoreLookahead() uint {
+	return 4
+}
+
+/* HashBytes is the function that chooses the bucket to place the address in. */
+func hashBytesH5(data []byte, shift int) uint32 {
+	var h uint32 = binary.LittleEndian.Uint32(data) * kHashMul32
+
+	/* The higher bits contain more mixture from the multiplication,
+	   so we take our results from there. */
+	return uint32(h >> uint(shift))
+}
+
+type h5 struct {
+	hasherCommon
+	bucket_size_ uint
+	block_size_  uint
+	hash_shift_  int
+	block_mask_  uint32
+	num          []uint16
+	buckets      []uint32
+}
+
+func (h *h5) Initialize(params *encoderParams) {
+	h.hash_shift_ = 32 - h.params.bucket_bits
+	h.bucket_size_ = uint(1) << uint(h.params.bucket_bits)
+	h.block_size_ = uint(1) << uint(h.params.block_bits)
+	h.block_mask_ = uint32(h.block_size_ - 1)
+	h.num = make([]uint16, h.bucket_size_)
+	h.buckets = make([]uint32, h.block_size_*h.bucket_size_)
+}
+
+func (h *h5) Prepare(one_shot bool, input_size uint, data []byte) {
+	var num []uint16 = h.num
+	var partial_prepare_threshold uint = h.bucket_size_ >> 6
+	/* Partial preparation is 100 times slower (per socket). */
+	if one_shot && input_size <= partial_prepare_threshold {
+		var i uint
+		for i = 0; i < input_size; i++ {
+			var key uint32 = hashBytesH5(data[i:], h.hash_shift_)
+			num[key] = 0
+		}
+	} else {
+		for i := 0; i < int(h.bucket_size_); i++ {
+			num[i] = 0
+		}
+	}
+}
+
+/* Look at 4 bytes at &data[ix & mask].
+   Compute a hash from these, and store the value of ix at that position. */
+func (h *h5) Store(data []byte, mask uint, ix uint) {
+	var num []uint16 = h.num
+	var key uint32 = hashBytesH5(data[ix&mask:], h.hash_shift_)
+	var minor_ix uint = uint(num[key]) & uint(h.block_mask_)
+	var offset uint = minor_ix + uint(key<<uint(h.params.block_bits))
+	h.buckets[offset] = uint32(ix)
+	num[key]++
+}
+
+func (h *h5) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
+	var i uint
+	for i = ix_start; i < ix_end; i++ {
+		h.Store(data, mask, i)
+	}
+}
+
+func (h *h5) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
+	if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
+		/* Prepare the hashes for three last bytes of the last write.
+		   These could not be calculated before, since they require knowledge
+		   of both the previous and the current block. */
+		h.Store(ringbuffer, ringbuffer_mask, position-3)
+		h.Store(ringbuffer, ringbuffer_mask, position-2)
+		h.Store(ringbuffer, ringbuffer_mask, position-1)
+	}
+}
+
+func (h *h5) PrepareDistanceCache(distance_cache []int) {
+	prepareDistanceCache(distance_cache, h.params.num_last_distances_to_check)
+}
+
+/* Find a longest backward match of &data[cur_ix] up to the length of
+   max_length and stores the position cur_ix in the hash table.
+
+   REQUIRES: PrepareDistanceCacheH5 must be invoked for current distance cache
+             values; if this method is invoked repeatedly with the same distance
+             cache values, it is enough to invoke PrepareDistanceCacheH5 once.
+
+   Does not look for matches longer than max_length.
+   Does not look for matches further away than max_backward.
+   Writes the best match into |out|.
+   |out|->score is updated only if a better match is found. */
+func (h *h5) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
+	var num []uint16 = h.num
+	var buckets []uint32 = h.buckets
+	var cur_ix_masked uint = cur_ix & ring_buffer_mask
+	var min_score uint = out.score
+	var best_score uint = out.score
+	var best_len uint = out.len
+	var i uint
+	var bucket []uint32
+	/* Don't accept a short copy from far away. */
+	out.len = 0
+
+	out.len_code_delta = 0
+
+	/* Try last distance first. */
+	for i = 0; i < uint(h.params.num_last_distances_to_check); i++ {
+		var backward uint = uint(distance_cache[i])
+		var prev_ix uint = uint(cur_ix - backward)
+		if prev_ix >= cur_ix {
+			continue
+		}
+
+		if backward > max_backward {
+			continue
+		}
+
+		prev_ix &= ring_buffer_mask
+
+		if cur_ix_masked+best_len > ring_buffer_mask || prev_ix+best_len > ring_buffer_mask || data[cur_ix_masked+best_len] != data[prev_ix+best_len] {
+			continue
+		}
+		{
+			var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
+			if len >= 3 || (len == 2 && i < 2) {
+				/* Comparing for >= 2 does not change the semantics, but just saves for
+				   a few unnecessary binary logarithms in backward reference score,
+				   since we are not interested in such short matches. */
+				var score uint = backwardReferenceScoreUsingLastDistance(uint(len))
+				if best_score < score {
+					if i != 0 {
+						score -= backwardReferencePenaltyUsingLastDistance(i)
+					}
+					if best_score < score {
+						best_score = score
+						best_len = uint(len)
+						out.len = best_len
+						out.distance = backward
+						out.score = best_score
+					}
+				}
+			}
+		}
+	}
+	{
+		var key uint32 = hashBytesH5(data[cur_ix_masked:], h.hash_shift_)
+		bucket = buckets[key<<uint(h.params.block_bits):]
+		var down uint
+		if uint(num[key]) > h.block_size_ {
+			down = uint(num[key]) - h.block_size_
+		} else {
+			down = 0
+		}
+		for i = uint(num[key]); i > down; {
+			var prev_ix uint
+			i--
+			prev_ix = uint(bucket[uint32(i)&h.block_mask_])
+			var backward uint = cur_ix - prev_ix
+			if backward > max_backward {
+				break
+			}
+
+			prev_ix &= ring_buffer_mask
+			if cur_ix_masked+best_len > ring_buffer_mask || prev_ix+best_len > ring_buffer_mask || data[cur_ix_masked+best_len] != data[prev_ix+best_len] {
+				continue
+			}
+			{
+				var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
+				if len >= 4 {
+					/* Comparing for >= 3 does not change the semantics, but just saves
+					   for a few unnecessary binary logarithms in backward reference
+					   score, since we are not interested in such short matches. */
+					var score uint = backwardReferenceScore(uint(len), backward)
+					if best_score < score {
+						best_score = score
+						best_len = uint(len)
+						out.len = best_len
+						out.distance = backward
+						out.score = best_score
+					}
+				}
+			}
+		}
+
+		bucket[uint32(num[key])&h.block_mask_] = uint32(cur_ix)
+		num[key]++
+	}
+
+	if min_score == out.score {
+		searchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, false)
+	}
+}
@@ -0,0 +1,216 @@
+package brotli
+
+import "encoding/binary"
+
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data.
+
+   This is a hash map of fixed size (bucket_size_) to a ring buffer of
+   fixed size (block_size_). The ring buffer contains the last block_size_
+   index positions of the given hash key in the compressed data. */
+func (*h6) HashTypeLength() uint {
+	return 8
+}
+
+func (*h6) StoreLookahead() uint {
+	return 8
+}
+
+/* HashBytes is the function that chooses the bucket to place the address in. */
+func hashBytesH6(data []byte, mask uint64, shift int) uint32 {
+	var h uint64 = (binary.LittleEndian.Uint64(data) & mask) * kHashMul64Long
+
+	/* The higher bits contain more mixture from the multiplication,
+	   so we take our results from there. */
+	return uint32(h >> uint(shift))
+}
+
+type h6 struct {
+	hasherCommon
+	bucket_size_ uint
+	block_size_  uint
+	hash_shift_  int
+	hash_mask_   uint64
+	block_mask_  uint32
+	num          []uint16
+	buckets      []uint32
+}
+
+func (h *h6) Initialize(params *encoderParams) {
+	h.hash_shift_ = 64 - h.params.bucket_bits
+	h.hash_mask_ = (^(uint64(0))) >> uint(64-8*h.params.hash_len)
+	h.bucket_size_ = uint(1) << uint(h.params.bucket_bits)
+	h.block_size_ = uint(1) << uint(h.params.block_bits)
+	h.block_mask_ = uint32(h.block_size_ - 1)
+	h.num = make([]uint16, h.bucket_size_)
+	h.buckets = make([]uint32, h.block_size_*h.bucket_size_)
+}
+
+func (h *h6) Prepare(one_shot bool, input_size uint, data []byte) {
+	var num []uint16 = h.num
+	var partial_prepare_threshold uint = h.bucket_size_ >> 6
+	/* Partial preparation is 100 times slower (per socket). */
+	if one_shot && input_size <= partial_prepare_threshold {
+		var i uint
+		for i = 0; i < input_size; i++ {
+			var key uint32 = hashBytesH6(data[i:], h.hash_mask_, h.hash_shift_)
+			num[key] = 0
+		}
+	} else {
+		for i := 0; i < int(h.bucket_size_); i++ {
+			num[i] = 0
+		}
+	}
+}
+
+/* Look at 4 bytes at &data[ix & mask].
+   Compute a hash from these, and store the value of ix at that position. */
+func (h *h6) Store(data []byte, mask uint, ix uint) {
+	var num []uint16 = h.num
+	var key uint32 = hashBytesH6(data[ix&mask:], h.hash_mask_, h.hash_shift_)
+	var minor_ix uint = uint(num[key]) & uint(h.block_mask_)
+	var offset uint = minor_ix + uint(key<<uint(h.params.block_bits))
+	h.buckets[offset] = uint32(ix)
+	num[key]++
+}
+
+func (h *h6) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
+	var i uint
+	for i = ix_start; i < ix_end; i++ {
+		h.Store(data, mask, i)
+	}
+}
+
+func (h *h6) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
+	if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
+		/* Prepare the hashes for three last bytes of the last write.
+		   These could not be calculated before, since they require knowledge
+		   of both the previous and the current block. */
+		h.Store(ringbuffer, ringbuffer_mask, position-3)
+		h.Store(ringbuffer, ringbuffer_mask, position-2)
+		h.Store(ringbuffer, ringbuffer_mask, position-1)
+	}
+}
+
+func (h *h6) PrepareDistanceCache(distance_cache []int) {
+	prepareDistanceCache(distance_cache, h.params.num_last_distances_to_check)
+}
+
+/* Find a longest backward match of &data[cur_ix] up to the length of
+   max_length and stores the position cur_ix in the hash table.
+
+   REQUIRES: PrepareDistanceCacheH6 must be invoked for current distance cache
+             values; if this method is invoked repeatedly with the same distance
+             cache values, it is enough to invoke PrepareDistanceCacheH6 once.
+
+   Does not look for matches longer than max_length.
+   Does not look for matches further away than max_backward.
+   Writes the best match into |out|.
+   |out|->score is updated only if a better match is found. */
+func (h *h6) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
+	var num []uint16 = h.num
+	var buckets []uint32 = h.buckets
+	var cur_ix_masked uint = cur_ix & ring_buffer_mask
+	var min_score uint = out.score
+	var best_score uint = out.score
+	var best_len uint = out.len
+	var i uint
+	var bucket []uint32
+	/* Don't accept a short copy from far away. */
+	out.len = 0
+
+	out.len_code_delta = 0
+
+	/* Try last distance first. */
+	for i = 0; i < uint(h.params.num_last_distances_to_check); i++ {
+		var backward uint = uint(distance_cache[i])
+		var prev_ix uint = uint(cur_ix - backward)
+		if prev_ix >= cur_ix {
+			continue
+		}
+
+		if backward > max_backward {
+			continue
+		}
+
+		prev_ix &= ring_buffer_mask
+
+		if cur_ix_masked+best_len > ring_buffer_mask || prev_ix+best_len > ring_buffer_mask || data[cur_ix_masked+best_len] != data[prev_ix+best_len] {
+			continue
+		}
+		{
+			var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
+			if len >= 3 || (len == 2 && i < 2) {
+				/* Comparing for >= 2 does not change the semantics, but just saves for
+				   a few unnecessary binary logarithms in backward reference score,
+				   since we are not interested in such short matches. */
+				var score uint = backwardReferenceScoreUsingLastDistance(uint(len))
+				if best_score < score {
+					if i != 0 {
+						score -= backwardReferencePenaltyUsingLastDistance(i)
+					}
+					if best_score < score {
+						best_score = score
+						best_len = uint(len)
+						out.len = best_len
+						out.distance = backward
+						out.score = best_score
+					}
+				}
+			}
+		}
+	}
+	{
+		var key uint32 = hashBytesH6(data[cur_ix_masked:], h.hash_mask_, h.hash_shift_)
+		bucket = buckets[key<<uint(h.params.block_bits):]
+		var down uint
+		if uint(num[key]) > h.block_size_ {
+			down = uint(num[key]) - h.block_size_
+		} else {
+			down = 0
+		}
+		for i = uint(num[key]); i > down; {
+			var prev_ix uint
+			i--
+			prev_ix = uint(bucket[uint32(i)&h.block_mask_])
+			var backward uint = cur_ix - prev_ix
+			if backward > max_backward {
+				break
+			}
+
+			prev_ix &= ring_buffer_mask
+			if cur_ix_masked+best_len > ring_buffer_mask || prev_ix+best_len > ring_buffer_mask || data[cur_ix_masked+best_len] != data[prev_ix+best_len] {
+				continue
+			}
+			{
+				var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
+				if len >= 4 {
+					/* Comparing for >= 3 does not change the semantics, but just saves
+					   for a few unnecessary binary logarithms in backward reference
+					   score, since we are not interested in such short matches. */
+					var score uint = backwardReferenceScore(uint(len), backward)
+					if best_score < score {
+						best_score = score
+						best_len = uint(len)
+						out.len = best_len
+						out.distance = backward
+						out.score = best_score
+					}
+				}
+			}
+		}
+
+		bucket[uint32(num[key])&h.block_mask_] = uint32(cur_ix)
+		num[key]++
+	}
+
+	if min_score == out.score {
+		searchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, false)
+	}
+}
@@ -0,0 +1,342 @@
+package brotli
+
+import (
+	"encoding/binary"
+	"fmt"
+)
+
+type hasherCommon struct {
+	params           hasherParams
+	is_prepared_     bool
+	dict_num_lookups uint
+	dict_num_matches uint
+}
+
+func (h *hasherCommon) Common() *hasherCommon {
+	return h
+}
+
+type hasherHandle interface {
+	Common() *hasherCommon
+	Initialize(params *encoderParams)
+	Prepare(one_shot bool, input_size uint, data []byte)
+	StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint)
+	HashTypeLength() uint
+	StoreLookahead() uint
+	PrepareDistanceCache(distance_cache []int)
+	FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult)
+	StoreRange(data []byte, mask uint, ix_start uint, ix_end uint)
+	Store(data []byte, mask uint, ix uint)
+}
+
+const kCutoffTransformsCount uint32 = 10
+
+/*   0,  12,   27,    23,    42,    63,    56,    48,    59,    64 */
+/* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
+const kCutoffTransforms uint64 = 0x071B520ADA2D3200
+
+type hasherSearchResult struct {
+	len            uint
+	distance       uint
+	score          uint
+	len_code_delta int
+}
+
+/* kHashMul32 multiplier has these properties:
+   * The multiplier must be odd. Otherwise we may lose the highest bit.
+   * No long streaks of ones or zeros.
+   * There is no effort to ensure that it is a prime, the oddity is enough
+     for this use.
+   * The number has been tuned heuristically against compression benchmarks. */
+const kHashMul32 uint32 = 0x1E35A7BD
+
+const kHashMul64 uint64 = 0x1E35A7BD1E35A7BD
+
+const kHashMul64Long uint64 = 0x1FE35A7BD3579BD3
+
+func hash14(data []byte) uint32 {
+	var h uint32 = binary.LittleEndian.Uint32(data) * kHashMul32
+
+	/* The higher bits contain more mixture from the multiplication,
+	   so we take our results from there. */
+	return h >> (32 - 14)
+}
+
+func prepareDistanceCache(distance_cache []int, num_distances int) {
+	if num_distances > 4 {
+		var last_distance int = distance_cache[0]
+		distance_cache[4] = last_distance - 1
+		distance_cache[5] = last_distance + 1
+		distance_cache[6] = last_distance - 2
+		distance_cache[7] = last_distance + 2
+		distance_cache[8] = last_distance - 3
+		distance_cache[9] = last_distance + 3
+		if num_distances > 10 {
+			var next_last_distance int = distance_cache[1]
+			distance_cache[10] = next_last_distance - 1
+			distance_cache[11] = next_last_distance + 1
+			distance_cache[12] = next_last_distance - 2
+			distance_cache[13] = next_last_distance + 2
+			distance_cache[14] = next_last_distance - 3
+			distance_cache[15] = next_last_distance + 3
+		}
+	}
+}
+
+const literalByteScore = 135
+
+const distanceBitPenalty = 30
+
+/* Score must be positive after applying maximal penalty. */
+const scoreBase = (distanceBitPenalty * 8 * 8)
+
+/* Usually, we always choose the longest backward reference. This function
+   allows for the exception of that rule.
+
+   If we choose a backward reference that is further away, it will
+   usually be coded with more bits. We approximate this by assuming
+   log2(distance). If the distance can be expressed in terms of the
+   last four distances, we use some heuristic constants to estimate
+   the bits cost. For the first up to four literals we use the bit
+   cost of the literals from the literal cost model, after that we
+   use the average bit cost of the cost model.
+
+   This function is used to sometimes discard a longer backward reference
+   when it is not much longer and the bit cost for encoding it is more
+   than the saved literals.
+
+   backward_reference_offset MUST be positive. */
+func backwardReferenceScore(copy_length uint, backward_reference_offset uint) uint {
+	return scoreBase + literalByteScore*uint(copy_length) - distanceBitPenalty*uint(log2FloorNonZero(backward_reference_offset))
+}
+
+func backwardReferenceScoreUsingLastDistance(copy_length uint) uint {
+	return literalByteScore*uint(copy_length) + scoreBase + 15
+}
+
+func backwardReferencePenaltyUsingLastDistance(distance_short_code uint) uint {
+	return uint(39) + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE)
+}
+
+func testStaticDictionaryItem(dictionary *encoderDictionary, item uint, data []byte, max_length uint, max_backward uint, max_distance uint, out *hasherSearchResult) bool {
+	var len uint
+	var word_idx uint
+	var offset uint
+	var matchlen uint
+	var backward uint
+	var score uint
+	len = item & 0x1F
+	word_idx = item >> 5
+	offset = uint(dictionary.words.offsets_by_length[len]) + len*word_idx
+	if len > max_length {
+		return false
+	}
+
+	matchlen = findMatchLengthWithLimit(data, dictionary.words.data[offset:], uint(len))
+	if matchlen+uint(dictionary.cutoffTransformsCount) <= len || matchlen == 0 {
+		return false
+	}
+	{
+		var cut uint = len - matchlen
+		var transform_id uint = (cut << 2) + uint((dictionary.cutoffTransforms>>(cut*6))&0x3F)
+		backward = max_backward + 1 + word_idx + (transform_id << dictionary.words.size_bits_by_length[len])
+	}
+
+	if backward > max_distance {
+		return false
+	}
+
+	score = backwardReferenceScore(matchlen, backward)
+	if score < out.score {
+		return false
+	}
+
+	out.len = matchlen
+	out.len_code_delta = int(len) - int(matchlen)
+	out.distance = backward
+	out.score = score
+	return true
+}
+
+func searchInStaticDictionary(dictionary *encoderDictionary, handle hasherHandle, data []byte, max_length uint, max_backward uint, max_distance uint, out *hasherSearchResult, shallow bool) {
+	var key uint
+	var i uint
+	var self *hasherCommon = handle.Common()
+	if self.dict_num_matches < self.dict_num_lookups>>7 {
+		return
+	}
+
+	key = uint(hash14(data) << 1)
+	for i = 0; ; (func() { i++; key++ })() {
+		var tmp uint
+		if shallow {
+			tmp = 1
+		} else {
+			tmp = 2
+		}
+		if i >= tmp {
+			break
+		}
+		var item uint = uint(dictionary.hash_table[key])
+		self.dict_num_lookups++
+		if item != 0 {
+			var item_matches bool = testStaticDictionaryItem(dictionary, item, data, max_length, max_backward, max_distance, out)
+			if item_matches {
+				self.dict_num_matches++
+			}
+		}
+	}
+}
+
+type backwardMatch struct {
+	distance        uint32
+	length_and_code uint32
+}
+
+func initBackwardMatch(self *backwardMatch, dist uint, len uint) {
+	self.distance = uint32(dist)
+	self.length_and_code = uint32(len << 5)
+}
+
+func initDictionaryBackwardMatch(self *backwardMatch, dist uint, len uint, len_code uint) {
+	self.distance = uint32(dist)
+	var tmp uint
+	if len == len_code {
+		tmp = 0
+	} else {
+		tmp = len_code
+	}
+	self.length_and_code = uint32(len<<5 | tmp)
+}
+
+func backwardMatchLength(self *backwardMatch) uint {
+	return uint(self.length_and_code >> 5)
+}
+
+func backwardMatchLengthCode(self *backwardMatch) uint {
+	var code uint = uint(self.length_and_code) & 31
+	if code != 0 {
+		return code
+	} else {
+		return backwardMatchLength(self)
+	}
+}
+
+func hasherReset(handle hasherHandle) {
+	if handle == nil {
+		return
+	}
+	handle.Common().is_prepared_ = false
+}
+
+func newHasher(typ int) hasherHandle {
+	switch typ {
+	case 2:
+		return &hashLongestMatchQuickly{
+			bucketBits:    16,
+			bucketSweep:   1,
+			hashLen:       5,
+			useDictionary: true,
+		}
+	case 3:
+		return &hashLongestMatchQuickly{
+			bucketBits:    16,
+			bucketSweep:   2,
+			hashLen:       5,
+			useDictionary: false,
+		}
+	case 4:
+		return &hashLongestMatchQuickly{
+			bucketBits:    17,
+			bucketSweep:   4,
+			hashLen:       5,
+			useDictionary: true,
+		}
+	case 5:
+		return new(h5)
+	case 6:
+		return new(h6)
+	case 10:
+		return new(h10)
+	case 35:
+		return &hashComposite{
+			ha: newHasher(3),
+			hb: &hashRolling{jump: 4},
+		}
+	case 40:
+		return &hashForgetfulChain{
+			bucketBits:              15,
+			numBanks:                1,
+			bankBits:                16,
+			numLastDistancesToCheck: 4,
+		}
+	case 41:
+		return &hashForgetfulChain{
+			bucketBits:              15,
+			numBanks:                1,
+			bankBits:                16,
+			numLastDistancesToCheck: 10,
+		}
+	case 42:
+		return &hashForgetfulChain{
+			bucketBits:              15,
+			numBanks:                512,
+			bankBits:                9,
+			numLastDistancesToCheck: 16,
+		}
+	case 54:
+		return &hashLongestMatchQuickly{
+			bucketBits:    20,
+			bucketSweep:   4,
+			hashLen:       7,
+			useDictionary: false,
+		}
+	case 55:
+		return &hashComposite{
+			ha: newHasher(54),
+			hb: &hashRolling{jump: 4},
+		}
+	case 65:
+		return &hashComposite{
+			ha: newHasher(6),
+			hb: &hashRolling{jump: 1},
+		}
+	}
+
+	panic(fmt.Sprintf("unknown hasher type: %d", typ))
+}
+
+func hasherSetup(handle *hasherHandle, params *encoderParams, data []byte, position uint, input_size uint, is_last bool) {
+	var self hasherHandle = nil
+	var common *hasherCommon = nil
+	var one_shot bool = (position == 0 && is_last)
+	if *handle == nil {
+		chooseHasher(params, &params.hasher)
+		self = newHasher(params.hasher.type_)
+
+		*handle = self
+		common = self.Common()
+		common.params = params.hasher
+		self.Initialize(params)
+	}
+
+	self = *handle
+	common = self.Common()
+	if !common.is_prepared_ {
+		self.Prepare(one_shot, input_size, data)
+
+		if position == 0 {
+			common.dict_num_lookups = 0
+			common.dict_num_matches = 0
+		}
+
+		common.is_prepared_ = true
+	}
+}
+
+func initOrStitchToPreviousBlock(handle *hasherHandle, data []byte, mask uint, params *encoderParams, position uint, input_size uint, is_last bool) {
+	var self hasherHandle
+	hasherSetup(handle, params, data, position, input_size, is_last)
+	self = *handle
+	self.StitchToPreviousBlock(input_size, position, data, mask)
+}
@@ -0,0 +1,93 @@
+package brotli
+
+/* Copyright 2018 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+func (h *hashComposite) HashTypeLength() uint {
+	var a uint = h.ha.HashTypeLength()
+	var b uint = h.hb.HashTypeLength()
+	if a > b {
+		return a
+	} else {
+		return b
+	}
+}
+
+func (h *hashComposite) StoreLookahead() uint {
+	var a uint = h.ha.StoreLookahead()
+	var b uint = h.hb.StoreLookahead()
+	if a > b {
+		return a
+	} else {
+		return b
+	}
+}
+
+/* Composite hasher: This hasher allows to combine two other hashers, HASHER_A
+   and HASHER_B. */
+type hashComposite struct {
+	hasherCommon
+	ha     hasherHandle
+	hb     hasherHandle
+	params *encoderParams
+}
+
+func (h *hashComposite) Initialize(params *encoderParams) {
+	h.params = params
+}
+
+/* TODO: Initialize of the hashers is defered to Prepare (and params
+   remembered here) because we don't get the one_shot and input_size params
+   here that are needed to know the memory size of them. Instead provide
+   those params to all hashers InitializehashComposite */
+func (h *hashComposite) Prepare(one_shot bool, input_size uint, data []byte) {
+	if h.ha == nil {
+		var common_a *hasherCommon
+		var common_b *hasherCommon
+
+		common_a = h.ha.Common()
+		common_a.params = h.params.hasher
+		common_a.is_prepared_ = false
+		common_a.dict_num_lookups = 0
+		common_a.dict_num_matches = 0
+		h.ha.Initialize(h.params)
+
+		common_b = h.hb.Common()
+		common_b.params = h.params.hasher
+		common_b.is_prepared_ = false
+		common_b.dict_num_lookups = 0
+		common_b.dict_num_matches = 0
+		h.hb.Initialize(h.params)
+	}
+
+	h.ha.Prepare(one_shot, input_size, data)
+	h.hb.Prepare(one_shot, input_size, data)
+}
+
+func (h *hashComposite) Store(data []byte, mask uint, ix uint) {
+	h.ha.Store(data, mask, ix)
+	h.hb.Store(data, mask, ix)
+}
+
+func (h *hashComposite) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
+	h.ha.StoreRange(data, mask, ix_start, ix_end)
+	h.hb.StoreRange(data, mask, ix_start, ix_end)
+}
+
+func (h *hashComposite) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ring_buffer_mask uint) {
+	h.ha.StitchToPreviousBlock(num_bytes, position, ringbuffer, ring_buffer_mask)
+	h.hb.StitchToPreviousBlock(num_bytes, position, ringbuffer, ring_buffer_mask)
+}
+
+func (h *hashComposite) PrepareDistanceCache(distance_cache []int) {
+	h.ha.PrepareDistanceCache(distance_cache)
+	h.hb.PrepareDistanceCache(distance_cache)
+}
+
+func (h *hashComposite) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
+	h.ha.FindLongestMatch(dictionary, data, ring_buffer_mask, distance_cache, cur_ix, max_length, max_backward, gap, max_distance, out)
+	h.hb.FindLongestMatch(dictionary, data, ring_buffer_mask, distance_cache, cur_ix, max_length, max_backward, gap, max_distance, out)
+}
@@ -0,0 +1,252 @@
+package brotli
+
+import "encoding/binary"
+
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+func (*hashForgetfulChain) HashTypeLength() uint {
+	return 4
+}
+
+func (*hashForgetfulChain) StoreLookahead() uint {
+	return 4
+}
+
+/* HashBytes is the function that chooses the bucket to place the address in.*/
+func (h *hashForgetfulChain) HashBytes(data []byte) uint {
+	var hash uint32 = binary.LittleEndian.Uint32(data) * kHashMul32
+
+	/* The higher bits contain more mixture from the multiplication,
+	   so we take our results from there. */
+	return uint(hash >> (32 - h.bucketBits))
+}
+
+type slot struct {
+	delta uint16
+	next  uint16
+}
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data.
+
+   Hashes are stored in chains which are bucketed to groups. Group of chains
+   share a storage "bank". When more than "bank size" chain nodes are added,
+   oldest nodes are replaced; this way several chains may share a tail. */
+type hashForgetfulChain struct {
+	hasherCommon
+
+	bucketBits              uint
+	numBanks                uint
+	bankBits                uint
+	numLastDistancesToCheck int
+
+	addr          []uint32
+	head          []uint16
+	tiny_hash     [65536]byte
+	banks         [][]slot
+	free_slot_idx []uint16
+	max_hops      uint
+}
+
+func (h *hashForgetfulChain) Initialize(params *encoderParams) {
+	var q uint
+	if params.quality > 6 {
+		q = 7
+	} else {
+		q = 8
+	}
+	h.max_hops = q << uint(params.quality-4)
+
+	bankSize := 1 << h.bankBits
+	bucketSize := 1 << h.bucketBits
+
+	h.addr = make([]uint32, bucketSize)
+	h.head = make([]uint16, bucketSize)
+	h.banks = make([][]slot, h.numBanks)
+	for i := range h.banks {
+		h.banks[i] = make([]slot, bankSize)
+	}
+	h.free_slot_idx = make([]uint16, h.numBanks)
+}
+
+func (h *hashForgetfulChain) Prepare(one_shot bool, input_size uint, data []byte) {
+	var partial_prepare_threshold uint = (1 << h.bucketBits) >> 6
+	/* Partial preparation is 100 times slower (per socket). */
+	if one_shot && input_size <= partial_prepare_threshold {
+		var i uint
+		for i = 0; i < input_size; i++ {
+			var bucket uint = h.HashBytes(data[i:])
+
+			/* See InitEmpty comment. */
+			h.addr[bucket] = 0xCCCCCCCC
+
+			h.head[bucket] = 0xCCCC
+		}
+	} else {
+		/* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position
+		   processed by hasher never reaches 3GB + 64M; this makes all new chains
+		   to be terminated after the first node. */
+		for i := range h.addr {
+			h.addr[i] = 0xCCCCCCCC
+		}
+
+		for i := range h.head {
+			h.head[i] = 0
+		}
+	}
+
+	h.tiny_hash = [65536]byte{}
+	for i := range h.free_slot_idx {
+		h.free_slot_idx[i] = 0
+	}
+}
+
+/* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend
+   node to corresponding chain; also update tiny_hash for current position. */
+func (h *hashForgetfulChain) Store(data []byte, mask uint, ix uint) {
+	var key uint = h.HashBytes(data[ix&mask:])
+	var bank uint = key & (h.numBanks - 1)
+	idx := uint(h.free_slot_idx[bank]) & ((1 << h.bankBits) - 1)
+	h.free_slot_idx[bank]++
+	var delta uint = ix - uint(h.addr[key])
+	h.tiny_hash[uint16(ix)] = byte(key)
+	if delta > 0xFFFF {
+		delta = 0xFFFF
+	}
+	h.banks[bank][idx].delta = uint16(delta)
+	h.banks[bank][idx].next = h.head[key]
+	h.addr[key] = uint32(ix)
+	h.head[key] = uint16(idx)
+}
+
+func (h *hashForgetfulChain) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
+	var i uint
+	for i = ix_start; i < ix_end; i++ {
+		h.Store(data, mask, i)
+	}
+}
+
+func (h *hashForgetfulChain) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ring_buffer_mask uint) {
+	if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
+		/* Prepare the hashes for three last bytes of the last write.
+		   These could not be calculated before, since they require knowledge
+		   of both the previous and the current block. */
+		h.Store(ringbuffer, ring_buffer_mask, position-3)
+		h.Store(ringbuffer, ring_buffer_mask, position-2)
+		h.Store(ringbuffer, ring_buffer_mask, position-1)
+	}
+}
+
+func (h *hashForgetfulChain) PrepareDistanceCache(distance_cache []int) {
+	prepareDistanceCache(distance_cache, h.numLastDistancesToCheck)
+}
+
+/* Find a longest backward match of &data[cur_ix] up to the length of
+   max_length and stores the position cur_ix in the hash table.
+
+   REQUIRES: PrepareDistanceCachehashForgetfulChain must be invoked for current distance cache
+             values; if this method is invoked repeatedly with the same distance
+             cache values, it is enough to invoke PrepareDistanceCachehashForgetfulChain once.
+
+   Does not look for matches longer than max_length.
+   Does not look for matches further away than max_backward.
+   Writes the best match into |out|.
+   |out|->score is updated only if a better match is found. */
+func (h *hashForgetfulChain) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
+	var cur_ix_masked uint = cur_ix & ring_buffer_mask
+	var min_score uint = out.score
+	var best_score uint = out.score
+	var best_len uint = out.len
+	var key uint = h.HashBytes(data[cur_ix_masked:])
+	var tiny_hash byte = byte(key)
+	/* Don't accept a short copy from far away. */
+	out.len = 0
+
+	out.len_code_delta = 0
+
+	/* Try last distance first. */
+	for i := 0; i < h.numLastDistancesToCheck; i++ {
+		var backward uint = uint(distance_cache[i])
+		var prev_ix uint = (cur_ix - backward)
+
+		/* For distance code 0 we want to consider 2-byte matches. */
+		if i > 0 && h.tiny_hash[uint16(prev_ix)] != tiny_hash {
+			continue
+		}
+		if prev_ix >= cur_ix || backward > max_backward {
+			continue
+		}
+
+		prev_ix &= ring_buffer_mask
+		{
+			var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
+			if len >= 2 {
+				var score uint = backwardReferenceScoreUsingLastDistance(uint(len))
+				if best_score < score {
+					if i != 0 {
+						score -= backwardReferencePenaltyUsingLastDistance(uint(i))
+					}
+					if best_score < score {
+						best_score = score
+						best_len = uint(len)
+						out.len = best_len
+						out.distance = backward
+						out.score = best_score
+					}
+				}
+			}
+		}
+	}
+	{
+		var bank uint = key & (h.numBanks - 1)
+		var backward uint = 0
+		var hops uint = h.max_hops
+		var delta uint = cur_ix - uint(h.addr[key])
+		var slot uint = uint(h.head[key])
+		for {
+			tmp6 := hops
+			hops--
+			if tmp6 == 0 {
+				break
+			}
+			var prev_ix uint
+			var last uint = slot
+			backward += delta
+			if backward > max_backward {
+				break
+			}
+			prev_ix = (cur_ix - backward) & ring_buffer_mask
+			slot = uint(h.banks[bank][last].next)
+			delta = uint(h.banks[bank][last].delta)
+			if cur_ix_masked+best_len > ring_buffer_mask || prev_ix+best_len > ring_buffer_mask || data[cur_ix_masked+best_len] != data[prev_ix+best_len] {
+				continue
+			}
+			{
+				var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
+				if len >= 4 {
+					/* Comparing for >= 3 does not change the semantics, but just saves
+					   for a few unnecessary binary logarithms in backward reference
+					   score, since we are not interested in such short matches. */
+					var score uint = backwardReferenceScore(uint(len), backward)
+					if best_score < score {
+						best_score = score
+						best_len = uint(len)
+						out.len = best_len
+						out.distance = backward
+						out.score = best_score
+					}
+				}
+			}
+		}
+
+		h.Store(data, ring_buffer_mask, cur_ix)
+	}
+
+	if out.score == min_score {
+		searchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, false)
+	}
+}
@@ -0,0 +1,214 @@
+package brotli
+
+import "encoding/binary"
+
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression
+   a little faster (0.5% - 1%) and it compresses 0.15% better on small text
+   and HTML inputs. */
+
+func (*hashLongestMatchQuickly) HashTypeLength() uint {
+	return 8
+}
+
+func (*hashLongestMatchQuickly) StoreLookahead() uint {
+	return 8
+}
+
+/* HashBytes is the function that chooses the bucket to place
+   the address in. The HashLongestMatch and hashLongestMatchQuickly
+   classes have separate, different implementations of hashing. */
+func (h *hashLongestMatchQuickly) HashBytes(data []byte) uint32 {
+	var hash uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*h.hashLen)) * kHashMul64)
+
+	/* The higher bits contain more mixture from the multiplication,
+	   so we take our results from there. */
+	return uint32(hash >> (64 - h.bucketBits))
+}
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data.
+
+   This is a hash map of fixed size (1 << 16). Starting from the
+   given index, 1 buckets are used to store values of a key. */
+type hashLongestMatchQuickly struct {
+	hasherCommon
+
+	bucketBits    uint
+	bucketSweep   int
+	hashLen       uint
+	useDictionary bool
+
+	buckets []uint32
+}
+
+func (h *hashLongestMatchQuickly) Initialize(params *encoderParams) {
+	h.buckets = make([]uint32, 1<<h.bucketBits+h.bucketSweep)
+}
+
+func (h *hashLongestMatchQuickly) Prepare(one_shot bool, input_size uint, data []byte) {
+	var partial_prepare_threshold uint = (4 << h.bucketBits) >> 7
+	/* Partial preparation is 100 times slower (per socket). */
+	if one_shot && input_size <= partial_prepare_threshold {
+		var i uint
+		for i = 0; i < input_size; i++ {
+			var key uint32 = h.HashBytes(data[i:])
+			for j := 0; j < h.bucketSweep; j++ {
+				h.buckets[key+uint32(j)] = 0
+			}
+		}
+	} else {
+		/* It is not strictly necessary to fill this buffer here, but
+		   not filling will make the results of the compression stochastic
+		   (but correct). This is because random data would cause the
+		   system to find accidentally good backward references here and there. */
+		for i := range h.buckets {
+			h.buckets[i] = 0
+		}
+	}
+}
+
+/* Look at 5 bytes at &data[ix & mask].
+   Compute a hash from these, and store the value somewhere within
+   [ix .. ix+3]. */
+func (h *hashLongestMatchQuickly) Store(data []byte, mask uint, ix uint) {
+	var key uint32 = h.HashBytes(data[ix&mask:])
+	var off uint32 = uint32(ix>>3) % uint32(h.bucketSweep)
+	/* Wiggle the value with the bucket sweep range. */
+	h.buckets[key+off] = uint32(ix)
+}
+
+func (h *hashLongestMatchQuickly) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
+	var i uint
+	for i = ix_start; i < ix_end; i++ {
+		h.Store(data, mask, i)
+	}
+}
+
+func (h *hashLongestMatchQuickly) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
+	if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
+		/* Prepare the hashes for three last bytes of the last write.
+		   These could not be calculated before, since they require knowledge
+		   of both the previous and the current block. */
+		h.Store(ringbuffer, ringbuffer_mask, position-3)
+		h.Store(ringbuffer, ringbuffer_mask, position-2)
+		h.Store(ringbuffer, ringbuffer_mask, position-1)
+	}
+}
+
+func (*hashLongestMatchQuickly) PrepareDistanceCache(distance_cache []int) {
+}
+
+/* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
+   up to the length of max_length and stores the position cur_ix in the
+   hash table.
+
+   Does not look for matches longer than max_length.
+   Does not look for matches further away than max_backward.
+   Writes the best match into |out|.
+   |out|->score is updated only if a better match is found. */
+func (h *hashLongestMatchQuickly) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
+	var best_len_in uint = out.len
+	var cur_ix_masked uint = cur_ix & ring_buffer_mask
+	var key uint32 = h.HashBytes(data[cur_ix_masked:])
+	var compare_char int = int(data[cur_ix_masked+best_len_in])
+	var min_score uint = out.score
+	var best_score uint = out.score
+	var best_len uint = best_len_in
+	var cached_backward uint = uint(distance_cache[0])
+	var prev_ix uint = cur_ix - cached_backward
+	var bucket []uint32
+	out.len_code_delta = 0
+	if prev_ix < cur_ix {
+		prev_ix &= uint(uint32(ring_buffer_mask))
+		if compare_char == int(data[prev_ix+best_len]) {
+			var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
+			if len >= 4 {
+				var score uint = backwardReferenceScoreUsingLastDistance(uint(len))
+				if best_score < score {
+					best_score = score
+					best_len = uint(len)
+					out.len = uint(len)
+					out.distance = cached_backward
+					out.score = best_score
+					compare_char = int(data[cur_ix_masked+best_len])
+					if h.bucketSweep == 1 {
+						h.buckets[key] = uint32(cur_ix)
+						return
+					}
+				}
+			}
+		}
+	}
+
+	if h.bucketSweep == 1 {
+		var backward uint
+		var len uint
+
+		/* Only one to look for, don't bother to prepare for a loop. */
+		prev_ix = uint(h.buckets[key])
+
+		h.buckets[key] = uint32(cur_ix)
+		backward = cur_ix - prev_ix
+		prev_ix &= uint(uint32(ring_buffer_mask))
+		if compare_char != int(data[prev_ix+best_len_in]) {
+			return
+		}
+
+		if backward == 0 || backward > max_backward {
+			return
+		}
+
+		len = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
+		if len >= 4 {
+			var score uint = backwardReferenceScore(uint(len), backward)
+			if best_score < score {
+				out.len = uint(len)
+				out.distance = backward
+				out.score = score
+				return
+			}
+		}
+	} else {
+		bucket = h.buckets[key:]
+		var i int
+		prev_ix = uint(bucket[0])
+		bucket = bucket[1:]
+		for i = 0; i < h.bucketSweep; (func() { i++; tmp3 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp3[0]) })() {
+			var backward uint = cur_ix - prev_ix
+			var len uint
+			prev_ix &= uint(uint32(ring_buffer_mask))
+			if compare_char != int(data[prev_ix+best_len]) {
+				continue
+			}
+
+			if backward == 0 || backward > max_backward {
+				continue
+			}
+
+			len = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
+			if len >= 4 {
+				var score uint = backwardReferenceScore(uint(len), backward)
+				if best_score < score {
+					best_score = score
+					best_len = uint(len)
+					out.len = best_len
+					out.distance = backward
+					out.score = score
+					compare_char = int(data[cur_ix_masked+best_len])
+				}
+			}
+		}
+	}
+
+	if h.useDictionary && min_score == out.score {
+		searchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, true)
+	}
+
+	h.buckets[key+uint32((cur_ix>>3)%uint(h.bucketSweep))] = uint32(cur_ix)
+}
@@ -0,0 +1,168 @@
+package brotli
+
+/* Copyright 2018 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* NOTE: this hasher does not search in the dictionary. It is used as
+   backup-hasher, the main hasher already searches in it. */
+
+const kRollingHashMul32 uint32 = 69069
+
+const kInvalidPosHashRolling uint32 = 0xffffffff
+
+/* This hasher uses a longer forward length, but returning a higher value here
+   will hurt compression by the main hasher when combined with a composite
+   hasher. The hasher tests for forward itself instead. */
+func (*hashRolling) HashTypeLength() uint {
+	return 4
+}
+
+func (*hashRolling) StoreLookahead() uint {
+	return 4
+}
+
+/* Computes a code from a single byte. A lookup table of 256 values could be
+   used, but simply adding 1 works about as good. */
+func (*hashRolling) HashByte(b byte) uint32 {
+	return uint32(b) + 1
+}
+
+func (h *hashRolling) HashRollingFunctionInitial(state uint32, add byte, factor uint32) uint32 {
+	return uint32(factor*state + h.HashByte(add))
+}
+
+func (h *hashRolling) HashRollingFunction(state uint32, add byte, rem byte, factor uint32, factor_remove uint32) uint32 {
+	return uint32(factor*state + h.HashByte(add) - factor_remove*h.HashByte(rem))
+}
+
+/* Rolling hash for long distance long string matches. Stores one position
+   per bucket, bucket key is computed over a long region. */
+type hashRolling struct {
+	hasherCommon
+
+	jump int
+
+	state         uint32
+	table         []uint32
+	next_ix       uint
+	factor        uint32
+	factor_remove uint32
+}
+
+func (h *hashRolling) Initialize(params *encoderParams) {
+	h.state = 0
+	h.next_ix = 0
+
+	h.factor = kRollingHashMul32
+
+	/* Compute the factor of the oldest byte to remove: factor**steps modulo
+	   0xffffffff (the multiplications rely on 32-bit overflow) */
+	h.factor_remove = 1
+
+	for i := 0; i < 32; i += h.jump {
+		h.factor_remove *= h.factor
+	}
+
+	h.table = make([]uint32, 16777216)
+	for i := 0; i < 16777216; i++ {
+		h.table[i] = kInvalidPosHashRolling
+	}
+}
+
+func (h *hashRolling) Prepare(one_shot bool, input_size uint, data []byte) {
+	/* Too small size, cannot use this hasher. */
+	if input_size < 32 {
+		return
+	}
+	h.state = 0
+	for i := 0; i < 32; i += h.jump {
+		h.state = h.HashRollingFunctionInitial(h.state, data[i], h.factor)
+	}
+}
+
+func (*hashRolling) Store(data []byte, mask uint, ix uint) {
+}
+
+func (*hashRolling) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
+}
+
+func (h *hashRolling) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ring_buffer_mask uint) {
+	var position_masked uint
+	/* In this case we must re-initialize the hasher from scratch from the
+	   current position. */
+
+	var available uint = num_bytes
+	if position&uint(h.jump-1) != 0 {
+		var diff uint = uint(h.jump) - (position & uint(h.jump-1))
+		if diff > available {
+			available = 0
+		} else {
+			available = available - diff
+		}
+		position += diff
+	}
+
+	position_masked = position & ring_buffer_mask
+
+	/* wrapping around ringbuffer not handled. */
+	if available > ring_buffer_mask-position_masked {
+		available = ring_buffer_mask - position_masked
+	}
+
+	h.Prepare(false, available, ringbuffer[position&ring_buffer_mask:])
+	h.next_ix = position
+}
+
+func (*hashRolling) PrepareDistanceCache(distance_cache []int) {
+}
+
+func (h *hashRolling) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
+	var cur_ix_masked uint = cur_ix & ring_buffer_mask
+	var pos uint = h.next_ix
+
+	if cur_ix&uint(h.jump-1) != 0 {
+		return
+	}
+
+	/* Not enough lookahead */
+	if max_length < 32 {
+		return
+	}
+
+	for pos = h.next_ix; pos <= cur_ix; pos += uint(h.jump) {
+		var code uint32 = h.state & ((16777216 * 64) - 1)
+		var rem byte = data[pos&ring_buffer_mask]
+		var add byte = data[(pos+32)&ring_buffer_mask]
+		var found_ix uint = uint(kInvalidPosHashRolling)
+
+		h.state = h.HashRollingFunction(h.state, add, rem, h.factor, h.factor_remove)
+
+		if code < 16777216 {
+			found_ix = uint(h.table[code])
+			h.table[code] = uint32(pos)
+			if pos == cur_ix && uint32(found_ix) != kInvalidPosHashRolling {
+				/* The cast to 32-bit makes backward distances up to 4GB work even
+				   if cur_ix is above 4GB, despite using 32-bit values in the table. */
+				var backward uint = uint(uint32(cur_ix - found_ix))
+				if backward <= max_backward {
+					var found_ix_masked uint = found_ix & ring_buffer_mask
+					var len uint = findMatchLengthWithLimit(data[found_ix_masked:], data[cur_ix_masked:], max_length)
+					if len >= 4 && len > out.len {
+						var score uint = backwardReferenceScore(uint(len), backward)
+						if score > out.score {
+							out.len = uint(len)
+							out.distance = backward
+							out.score = score
+							out.len_code_delta = 0
+						}
+					}
+				}
+			}
+		}
+	}
+
+	h.next_ix = cur_ix + uint(h.jump)
+}
@@ -0,0 +1,226 @@
+package brotli
+
+import "math"
+
+/* The distance symbols effectively used by "Large Window Brotli" (32-bit). */
+const numHistogramDistanceSymbols = 544
+
+type histogramLiteral struct {
+	data_        [numLiteralSymbols]uint32
+	total_count_ uint
+	bit_cost_    float64
+}
+
+func histogramClearLiteral(self *histogramLiteral) {
+	self.data_ = [numLiteralSymbols]uint32{}
+	self.total_count_ = 0
+	self.bit_cost_ = math.MaxFloat64
+}
+
+func clearHistogramsLiteral(array []histogramLiteral, length uint) {
+	var i uint
+	for i = 0; i < length; i++ {
+		histogramClearLiteral(&array[i:][0])
+	}
+}
+
+func histogramAddLiteral(self *histogramLiteral, val uint) {
+	self.data_[val]++
+	self.total_count_++
+}
+
+func histogramAddVectorLiteral(self *histogramLiteral, p []byte, n uint) {
+	self.total_count_ += n
+	n += 1
+	for {
+		n--
+		if n == 0 {
+			break
+		}
+		self.data_[p[0]]++
+		p = p[1:]
+	}
+}
+
+func histogramAddHistogramLiteral(self *histogramLiteral, v *histogramLiteral) {
+	var i uint
+	self.total_count_ += v.total_count_
+	for i = 0; i < numLiteralSymbols; i++ {
+		self.data_[i] += v.data_[i]
+	}
+}
+
+func histogramDataSizeLiteral() uint {
+	return numLiteralSymbols
+}
+
+type histogramCommand struct {
+	data_        [numCommandSymbols]uint32
+	total_count_ uint
+	bit_cost_    float64
+}
+
+func histogramClearCommand(self *histogramCommand) {
+	self.data_ = [numCommandSymbols]uint32{}
+	self.total_count_ = 0
+	self.bit_cost_ = math.MaxFloat64
+}
+
+func clearHistogramsCommand(array []histogramCommand, length uint) {
+	var i uint
+	for i = 0; i < length; i++ {
+		histogramClearCommand(&array[i:][0])
+	}
+}
+
+func histogramAddCommand(self *histogramCommand, val uint) {
+	self.data_[val]++
+	self.total_count_++
+}
+
+func histogramAddVectorCommand(self *histogramCommand, p []uint16, n uint) {
+	self.total_count_ += n
+	n += 1
+	for {
+		n--
+		if n == 0 {
+			break
+		}
+		self.data_[p[0]]++
+		p = p[1:]
+	}
+}
+
+func histogramAddHistogramCommand(self *histogramCommand, v *histogramCommand) {
+	var i uint
+	self.total_count_ += v.total_count_
+	for i = 0; i < numCommandSymbols; i++ {
+		self.data_[i] += v.data_[i]
+	}
+}
+
+func histogramDataSizeCommand() uint {
+	return numCommandSymbols
+}
+
+type histogramDistance struct {
+	data_        [numDistanceSymbols]uint32
+	total_count_ uint
+	bit_cost_    float64
+}
+
+func histogramClearDistance(self *histogramDistance) {
+	self.data_ = [numDistanceSymbols]uint32{}
+	self.total_count_ = 0
+	self.bit_cost_ = math.MaxFloat64
+}
+
+func clearHistogramsDistance(array []histogramDistance, length uint) {
+	var i uint
+	for i = 0; i < length; i++ {
+		histogramClearDistance(&array[i:][0])
+	}
+}
+
+func histogramAddDistance(self *histogramDistance, val uint) {
+	self.data_[val]++
+	self.total_count_++
+}
+
+func histogramAddVectorDistance(self *histogramDistance, p []uint16, n uint) {
+	self.total_count_ += n
+	n += 1
+	for {
+		n--
+		if n == 0 {
+			break
+		}
+		self.data_[p[0]]++
+		p = p[1:]
+	}
+}
+
+func histogramAddHistogramDistance(self *histogramDistance, v *histogramDistance) {
+	var i uint
+	self.total_count_ += v.total_count_
+	for i = 0; i < numDistanceSymbols; i++ {
+		self.data_[i] += v.data_[i]
+	}
+}
+
+func histogramDataSizeDistance() uint {
+	return numDistanceSymbols
+}
+
+type blockSplitIterator struct {
+	split_  *blockSplit
+	idx_    uint
+	type_   uint
+	length_ uint
+}
+
+func initBlockSplitIterator(self *blockSplitIterator, split *blockSplit) {
+	self.split_ = split
+	self.idx_ = 0
+	self.type_ = 0
+	if len(split.lengths) > 0 {
+		self.length_ = uint(split.lengths[0])
+	} else {
+		self.length_ = 0
+	}
+}
+
+func blockSplitIteratorNext(self *blockSplitIterator) {
+	if self.length_ == 0 {
+		self.idx_++
+		self.type_ = uint(self.split_.types[self.idx_])
+		self.length_ = uint(self.split_.lengths[self.idx_])
+	}
+
+	self.length_--
+}
+
+func buildHistogramsWithContext(cmds []command, literal_split *blockSplit, insert_and_copy_split *blockSplit, dist_split *blockSplit, ringbuffer []byte, start_pos uint, mask uint, prev_byte byte, prev_byte2 byte, context_modes []int, literal_histograms []histogramLiteral, insert_and_copy_histograms []histogramCommand, copy_dist_histograms []histogramDistance) {
+	var pos uint = start_pos
+	var literal_it blockSplitIterator
+	var insert_and_copy_it blockSplitIterator
+	var dist_it blockSplitIterator
+
+	initBlockSplitIterator(&literal_it, literal_split)
+	initBlockSplitIterator(&insert_and_copy_it, insert_and_copy_split)
+	initBlockSplitIterator(&dist_it, dist_split)
+	for i := range cmds {
+		var cmd *command = &cmds[i]
+		var j uint
+		blockSplitIteratorNext(&insert_and_copy_it)
+		histogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_], uint(cmd.cmd_prefix_))
+
+		/* TODO: unwrap iterator blocks. */
+		for j = uint(cmd.insert_len_); j != 0; j-- {
+			var context uint
+			blockSplitIteratorNext(&literal_it)
+			context = literal_it.type_
+			if context_modes != nil {
+				var lut contextLUT = getContextLUT(context_modes[context])
+				context = (context << literalContextBits) + uint(getContext(prev_byte, prev_byte2, lut))
+			}
+
+			histogramAddLiteral(&literal_histograms[context], uint(ringbuffer[pos&mask]))
+			prev_byte2 = prev_byte
+			prev_byte = ringbuffer[pos&mask]
+			pos++
+		}
+
+		pos += uint(commandCopyLen(cmd))
+		if commandCopyLen(cmd) != 0 {
+			prev_byte2 = ringbuffer[(pos-2)&mask]
+			prev_byte = ringbuffer[(pos-1)&mask]
+			if cmd.cmd_prefix_ >= 128 {
+				var context uint
+				blockSplitIteratorNext(&dist_it)
+				context = uint(uint32(dist_it.type_<<distanceContextBits) + commandDistanceContext(cmd))
+				histogramAddDistance(&copy_dist_histograms[context], uint(cmd.dist_prefix_)&0x3FF)
+			}
+		}
+	}
+}
@@ -0,0 +1,184 @@
+package brotli
+
+import (
+	"compress/gzip"
+	"io"
+	"net/http"
+	"strings"
+)
+
+// HTTPCompressor chooses a compression method (brotli, gzip, or none) based on
+// the Accept-Encoding header, sets the Content-Encoding header, and returns a
+// WriteCloser that implements that compression. The Close method must be called
+// before the current HTTP handler returns.
+func HTTPCompressor(w http.ResponseWriter, r *http.Request) io.WriteCloser {
+	if w.Header().Get("Vary") == "" {
+		w.Header().Set("Vary", "Accept-Encoding")
+	}
+
+	encoding := negotiateContentEncoding(r, []string{"br", "gzip"})
+	switch encoding {
+	case "br":
+		w.Header().Set("Content-Encoding", "br")
+		return NewWriterV2(w, DefaultCompression)
+	case "gzip":
+		w.Header().Set("Content-Encoding", "gzip")
+		return gzip.NewWriter(w)
+	}
+	return nopCloser{w}
+}
+
+// negotiateContentEncoding returns the best offered content encoding for the
+// request's Accept-Encoding header. If two offers match with equal weight and
+// then the offer earlier in the list is preferred. If no offers are
+// acceptable, then "" is returned.
+func negotiateContentEncoding(r *http.Request, offers []string) string {
+	bestOffer := "identity"
+	bestQ := -1.0
+	specs := parseAccept(r.Header, "Accept-Encoding")
+	for _, offer := range offers {
+		for _, spec := range specs {
+			if spec.Q > bestQ &&
+				(spec.Value == "*" || spec.Value == offer) {
+				bestQ = spec.Q
+				bestOffer = offer
+			}
+		}
+	}
+	if bestQ == 0 {
+		bestOffer = ""
+	}
+	return bestOffer
+}
+
+// acceptSpec describes an Accept* header.
+type acceptSpec struct {
+	Value string
+	Q     float64
+}
+
+// parseAccept parses Accept* headers.
+func parseAccept(header http.Header, key string) (specs []acceptSpec) {
+loop:
+	for _, s := range header[key] {
+		for {
+			var spec acceptSpec
+			spec.Value, s = expectTokenSlash(s)
+			if spec.Value == "" {
+				continue loop
+			}
+			spec.Q = 1.0
+			s = skipSpace(s)
+			if strings.HasPrefix(s, ";") {
+				s = skipSpace(s[1:])
+				if !strings.HasPrefix(s, "q=") {
+					continue loop
+				}
+				spec.Q, s = expectQuality(s[2:])
+				if spec.Q < 0.0 {
+					continue loop
+				}
+			}
+			specs = append(specs, spec)
+			s = skipSpace(s)
+			if !strings.HasPrefix(s, ",") {
+				continue loop
+			}
+			s = skipSpace(s[1:])
+		}
+	}
+	return
+}
+
+func skipSpace(s string) (rest string) {
+	i := 0
+	for ; i < len(s); i++ {
+		if octetTypes[s[i]]&isSpace == 0 {
+			break
+		}
+	}
+	return s[i:]
+}
+
+func expectTokenSlash(s string) (token, rest string) {
+	i := 0
+	for ; i < len(s); i++ {
+		b := s[i]
+		if (octetTypes[b]&isToken == 0) && b != '/' {
+			break
+		}
+	}
+	return s[:i], s[i:]
+}
+
+func expectQuality(s string) (q float64, rest string) {
+	switch {
+	case len(s) == 0:
+		return -1, ""
+	case s[0] == '0':
+		q = 0
+	case s[0] == '1':
+		q = 1
+	default:
+		return -1, ""
+	}
+	s = s[1:]
+	if !strings.HasPrefix(s, ".") {
+		return q, s
+	}
+	s = s[1:]
+	i := 0
+	n := 0
+	d := 1
+	for ; i < len(s); i++ {
+		b := s[i]
+		if b < '0' || b > '9' {
+			break
+		}
+		n = n*10 + int(b) - '0'
+		d *= 10
+	}
+	return q + float64(n)/float64(d), s[i:]
+}
+
+// Octet types from RFC 2616.
+var octetTypes [256]octetType
+
+type octetType byte
+
+const (
+	isToken octetType = 1 << iota
+	isSpace
+)
+
+func init() {
+	// OCTET      = <any 8-bit sequence of data>
+	// CHAR       = <any US-ASCII character (octets 0 - 127)>
+	// CTL        = <any US-ASCII control character (octets 0 - 31) and DEL (127)>
+	// CR         = <US-ASCII CR, carriage return (13)>
+	// LF         = <US-ASCII LF, linefeed (10)>
+	// SP         = <US-ASCII SP, space (32)>
+	// HT         = <US-ASCII HT, horizontal-tab (9)>
+	// <">        = <US-ASCII double-quote mark (34)>
+	// CRLF       = CR LF
+	// LWS        = [CRLF] 1*( SP | HT )
+	// TEXT       = <any OCTET except CTLs, but including LWS>
+	// separators = "(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\" | <">
+	//              | "/" | "[" | "]" | "?" | "=" | "{" | "}" | SP | HT
+	// token      = 1*<any CHAR except CTLs or separators>
+	// qdtext     = <any TEXT except <">>
+
+	for c := 0; c < 256; c++ {
+		var t octetType
+		isCtl := c <= 31 || c == 127
+		isChar := 0 <= c && c <= 127
+		isSeparator := strings.ContainsRune(" \t\"(),/:;<=>?@[]\\{}", rune(c))
+		if strings.ContainsRune(" \t\r\n", rune(c)) {
+			t |= isSpace
+		}
+		if isChar && !isCtl && !isSeparator {
+			t |= isToken
+		}
+		octetTypes[c] = t
+	}
+}
@@ -0,0 +1,653 @@
+package brotli
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Utilities for building Huffman decoding tables. */
+
+const huffmanMaxCodeLength = 15
+
+/* Maximum possible Huffman table size for an alphabet size of (index * 32),
+   max code length 15 and root table bits 8. */
+var kMaxHuffmanTableSize = []uint16{
+	256,
+	402,
+	436,
+	468,
+	500,
+	534,
+	566,
+	598,
+	630,
+	662,
+	694,
+	726,
+	758,
+	790,
+	822,
+	854,
+	886,
+	920,
+	952,
+	984,
+	1016,
+	1048,
+	1080,
+	1112,
+	1144,
+	1176,
+	1208,
+	1240,
+	1272,
+	1304,
+	1336,
+	1368,
+	1400,
+	1432,
+	1464,
+	1496,
+	1528,
+}
+
+/* BROTLI_NUM_BLOCK_LEN_SYMBOLS == 26 */
+const huffmanMaxSize26 = 396
+
+/* BROTLI_MAX_BLOCK_TYPE_SYMBOLS == 258 */
+const huffmanMaxSize258 = 632
+
+/* BROTLI_MAX_CONTEXT_MAP_SYMBOLS == 272 */
+const huffmanMaxSize272 = 646
+
+const huffmanMaxCodeLengthCodeLength = 5
+
+/* Do not create this struct directly - use the ConstructHuffmanCode
+ * constructor below! */
+type huffmanCode struct {
+	bits  byte
+	value uint16
+}
+
+func constructHuffmanCode(bits byte, value uint16) huffmanCode {
+	var h huffmanCode
+	h.bits = bits
+	h.value = value
+	return h
+}
+
+/* Builds Huffman lookup table assuming code lengths are in symbol order. */
+
+/* Builds Huffman lookup table assuming code lengths are in symbol order.
+   Returns size of resulting table. */
+
+/* Builds a simple Huffman table. The |num_symbols| parameter is to be
+   interpreted as follows: 0 means 1 symbol, 1 means 2 symbols,
+   2 means 3 symbols, 3 means 4 symbols with lengths [2, 2, 2, 2],
+   4 means 4 symbols with lengths [1, 2, 3, 3]. */
+
+/* Contains a collection of Huffman trees with the same alphabet size. */
+/* max_symbol is needed due to simple codes since log2(alphabet_size) could be
+   greater than log2(max_symbol). */
+type huffmanTreeGroup struct {
+	htrees        [][]huffmanCode
+	codes         []huffmanCode
+	alphabet_size uint16
+	max_symbol    uint16
+	num_htrees    uint16
+}
+
+const reverseBitsMax = 8
+
+const reverseBitsBase = 0
+
+var kReverseBits = [1 << reverseBitsMax]byte{
+	0x00,
+	0x80,
+	0x40,
+	0xC0,
+	0x20,
+	0xA0,
+	0x60,
+	0xE0,
+	0x10,
+	0x90,
+	0x50,
+	0xD0,
+	0x30,
+	0xB0,
+	0x70,
+	0xF0,
+	0x08,
+	0x88,
+	0x48,
+	0xC8,
+	0x28,
+	0xA8,
+	0x68,
+	0xE8,
+	0x18,
+	0x98,
+	0x58,
+	0xD8,
+	0x38,
+	0xB8,
+	0x78,
+	0xF8,
+	0x04,
+	0x84,
+	0x44,
+	0xC4,
+	0x24,
+	0xA4,
+	0x64,
+	0xE4,
+	0x14,
+	0x94,
+	0x54,
+	0xD4,
+	0x34,
+	0xB4,
+	0x74,
+	0xF4,
+	0x0C,
+	0x8C,
+	0x4C,
+	0xCC,
+	0x2C,
+	0xAC,
+	0x6C,
+	0xEC,
+	0x1C,
+	0x9C,
+	0x5C,
+	0xDC,
+	0x3C,
+	0xBC,
+	0x7C,
+	0xFC,
+	0x02,
+	0x82,
+	0x42,
+	0xC2,
+	0x22,
+	0xA2,
+	0x62,
+	0xE2,
+	0x12,
+	0x92,
+	0x52,
+	0xD2,
+	0x32,
+	0xB2,
+	0x72,
+	0xF2,
+	0x0A,
+	0x8A,
+	0x4A,
+	0xCA,
+	0x2A,
+	0xAA,
+	0x6A,
+	0xEA,
+	0x1A,
+	0x9A,
+	0x5A,
+	0xDA,
+	0x3A,
+	0xBA,
+	0x7A,
+	0xFA,
+	0x06,
+	0x86,
+	0x46,
+	0xC6,
+	0x26,
+	0xA6,
+	0x66,
+	0xE6,
+	0x16,
+	0x96,
+	0x56,
+	0xD6,
+	0x36,
+	0xB6,
+	0x76,
+	0xF6,
+	0x0E,
+	0x8E,
+	0x4E,
+	0xCE,
+	0x2E,
+	0xAE,
+	0x6E,
+	0xEE,
+	0x1E,
+	0x9E,
+	0x5E,
+	0xDE,
+	0x3E,
+	0xBE,
+	0x7E,
+	0xFE,
+	0x01,
+	0x81,
+	0x41,
+	0xC1,
+	0x21,
+	0xA1,
+	0x61,
+	0xE1,
+	0x11,
+	0x91,
+	0x51,
+	0xD1,
+	0x31,
+	0xB1,
+	0x71,
+	0xF1,
+	0x09,
+	0x89,
+	0x49,
+	0xC9,
+	0x29,
+	0xA9,
+	0x69,
+	0xE9,
+	0x19,
+	0x99,
+	0x59,
+	0xD9,
+	0x39,
+	0xB9,
+	0x79,
+	0xF9,
+	0x05,
+	0x85,
+	0x45,
+	0xC5,
+	0x25,
+	0xA5,
+	0x65,
+	0xE5,
+	0x15,
+	0x95,
+	0x55,
+	0xD5,
+	0x35,
+	0xB5,
+	0x75,
+	0xF5,
+	0x0D,
+	0x8D,
+	0x4D,
+	0xCD,
+	0x2D,
+	0xAD,
+	0x6D,
+	0xED,
+	0x1D,
+	0x9D,
+	0x5D,
+	0xDD,
+	0x3D,
+	0xBD,
+	0x7D,
+	0xFD,
+	0x03,
+	0x83,
+	0x43,
+	0xC3,
+	0x23,
+	0xA3,
+	0x63,
+	0xE3,
+	0x13,
+	0x93,
+	0x53,
+	0xD3,
+	0x33,
+	0xB3,
+	0x73,
+	0xF3,
+	0x0B,
+	0x8B,
+	0x4B,
+	0xCB,
+	0x2B,
+	0xAB,
+	0x6B,
+	0xEB,
+	0x1B,
+	0x9B,
+	0x5B,
+	0xDB,
+	0x3B,
+	0xBB,
+	0x7B,
+	0xFB,
+	0x07,
+	0x87,
+	0x47,
+	0xC7,
+	0x27,
+	0xA7,
+	0x67,
+	0xE7,
+	0x17,
+	0x97,
+	0x57,
+	0xD7,
+	0x37,
+	0xB7,
+	0x77,
+	0xF7,
+	0x0F,
+	0x8F,
+	0x4F,
+	0xCF,
+	0x2F,
+	0xAF,
+	0x6F,
+	0xEF,
+	0x1F,
+	0x9F,
+	0x5F,
+	0xDF,
+	0x3F,
+	0xBF,
+	0x7F,
+	0xFF,
+}
+
+const reverseBitsLowest = (uint64(1) << (reverseBitsMax - 1 + reverseBitsBase))
+
+/* Returns reverse(num >> BROTLI_REVERSE_BITS_BASE, BROTLI_REVERSE_BITS_MAX),
+   where reverse(value, len) is the bit-wise reversal of the len least
+   significant bits of value. */
+func reverseBits8(num uint64) uint64 {
+	return uint64(kReverseBits[num])
+}
+
+/* Stores code in table[0], table[step], table[2*step], ..., table[end] */
+/* Assumes that end is an integer multiple of step */
+func replicateValue(table []huffmanCode, step int, end int, code huffmanCode) {
+	for {
+		end -= step
+		table[end] = code
+		if end <= 0 {
+			break
+		}
+	}
+}
+
+/* Returns the table width of the next 2nd level table. |count| is the histogram
+   of bit lengths for the remaining symbols, |len| is the code length of the
+   next processed symbol. */
+func nextTableBitSize(count []uint16, len int, root_bits int) int {
+	var left int = 1 << uint(len-root_bits)
+	for len < huffmanMaxCodeLength {
+		left -= int(count[len])
+		if left <= 0 {
+			break
+		}
+		len++
+		left <<= 1
+	}
+
+	return len - root_bits
+}
+
+func buildCodeLengthsHuffmanTable(table []huffmanCode, code_lengths []byte, count []uint16) {
+	var code huffmanCode /* current table entry */ /* symbol index in original or sorted table */ /* prefix code */ /* prefix code addend */ /* step size to replicate values in current table */ /* size of current table */ /* symbols sorted by code length */
+	var symbol int
+	var key uint64
+	var key_step uint64
+	var step int
+	var table_size int
+	var sorted [codeLengthCodes]int
+	var offset [huffmanMaxCodeLengthCodeLength + 1]int
+	var bits int
+	var bits_count int
+	/* offsets in sorted table for each length */
+	assert(huffmanMaxCodeLengthCodeLength <= reverseBitsMax)
+
+	/* Generate offsets into sorted symbol table by code length. */
+	symbol = -1
+
+	bits = 1
+	var i int
+	for i = 0; i < huffmanMaxCodeLengthCodeLength; i++ {
+		symbol += int(count[bits])
+		offset[bits] = symbol
+		bits++
+	}
+
+	/* Symbols with code length 0 are placed after all other symbols. */
+	offset[0] = codeLengthCodes - 1
+
+	/* Sort symbols by length, by symbol order within each length. */
+	symbol = codeLengthCodes
+
+	for {
+		var i int
+		for i = 0; i < 6; i++ {
+			symbol--
+			sorted[offset[code_lengths[symbol]]] = symbol
+			offset[code_lengths[symbol]]--
+		}
+		if symbol == 0 {
+			break
+		}
+	}
+
+	table_size = 1 << huffmanMaxCodeLengthCodeLength
+
+	/* Special case: all symbols but one have 0 code length. */
+	if offset[0] == 0 {
+		code = constructHuffmanCode(0, uint16(sorted[0]))
+		for key = 0; key < uint64(table_size); key++ {
+			table[key] = code
+		}
+
+		return
+	}
+
+	/* Fill in table. */
+	key = 0
+
+	key_step = reverseBitsLowest
+	symbol = 0
+	bits = 1
+	step = 2
+	for {
+		for bits_count = int(count[bits]); bits_count != 0; bits_count-- {
+			code = constructHuffmanCode(byte(bits), uint16(sorted[symbol]))
+			symbol++
+			replicateValue(table[reverseBits8(key):], step, table_size, code)
+			key += key_step
+		}
+
+		step <<= 1
+		key_step >>= 1
+		bits++
+		if bits > huffmanMaxCodeLengthCodeLength {
+			break
+		}
+	}
+}
+
+func buildHuffmanTable(root_table []huffmanCode, root_bits int, symbol_lists symbolList, count []uint16) uint32 {
+	var code huffmanCode /* current table entry */ /* next available space in table */ /* current code length */ /* symbol index in original or sorted table */ /* prefix code */ /* prefix code addend */ /* 2nd level table prefix code */ /* 2nd level table prefix code addend */ /* step size to replicate values in current table */ /* key length of current table */ /* size of current table */ /* sum of root table size and 2nd level table sizes */
+	var table []huffmanCode
+	var len int
+	var symbol int
+	var key uint64
+	var key_step uint64
+	var sub_key uint64
+	var sub_key_step uint64
+	var step int
+	var table_bits int
+	var table_size int
+	var total_size int
+	var max_length int = -1
+	var bits int
+	var bits_count int
+
+	assert(root_bits <= reverseBitsMax)
+	assert(huffmanMaxCodeLength-root_bits <= reverseBitsMax)
+
+	for symbolListGet(symbol_lists, max_length) == 0xFFFF {
+		max_length--
+	}
+	max_length += huffmanMaxCodeLength + 1
+
+	table = root_table
+	table_bits = root_bits
+	table_size = 1 << uint(table_bits)
+	total_size = table_size
+
+	/* Fill in the root table. Reduce the table size to if possible,
+	   and create the repetitions by memcpy. */
+	if table_bits > max_length {
+		table_bits = max_length
+		table_size = 1 << uint(table_bits)
+	}
+
+	key = 0
+	key_step = reverseBitsLowest
+	bits = 1
+	step = 2
+	for {
+		symbol = bits - (huffmanMaxCodeLength + 1)
+		for bits_count = int(count[bits]); bits_count != 0; bits_count-- {
+			symbol = int(symbolListGet(symbol_lists, symbol))
+			code = constructHuffmanCode(byte(bits), uint16(symbol))
+			replicateValue(table[reverseBits8(key):], step, table_size, code)
+			key += key_step
+		}
+
+		step <<= 1
+		key_step >>= 1
+		bits++
+		if bits > table_bits {
+			break
+		}
+	}
+
+	/* If root_bits != table_bits then replicate to fill the remaining slots. */
+	for total_size != table_size {
+		copy(table[table_size:], table[:uint(table_size)])
+		table_size <<= 1
+	}
+
+	/* Fill in 2nd level tables and add pointers to root table. */
+	key_step = reverseBitsLowest >> uint(root_bits-1)
+
+	sub_key = reverseBitsLowest << 1
+	sub_key_step = reverseBitsLowest
+	len = root_bits + 1
+	step = 2
+	for ; len <= max_length; len++ {
+		symbol = len - (huffmanMaxCodeLength + 1)
+		for ; count[len] != 0; count[len]-- {
+			if sub_key == reverseBitsLowest<<1 {
+				table = table[table_size:]
+				table_bits = nextTableBitSize(count, int(len), root_bits)
+				table_size = 1 << uint(table_bits)
+				total_size += table_size
+				sub_key = reverseBits8(key)
+				key += key_step
+				root_table[sub_key] = constructHuffmanCode(byte(table_bits+root_bits), uint16(uint64(uint(-cap(table)+cap(root_table)))-sub_key))
+				sub_key = 0
+			}
+
+			symbol = int(symbolListGet(symbol_lists, symbol))
+			code = constructHuffmanCode(byte(len-root_bits), uint16(symbol))
+			replicateValue(table[reverseBits8(sub_key):], step, table_size, code)
+			sub_key += sub_key_step
+		}
+
+		step <<= 1
+		sub_key_step >>= 1
+	}
+
+	return uint32(total_size)
+}
+
+func buildSimpleHuffmanTable(table []huffmanCode, root_bits int, val []uint16, num_symbols uint32) uint32 {
+	var table_size uint32 = 1
+	var goal_size uint32 = 1 << uint(root_bits)
+	switch num_symbols {
+	case 0:
+		table[0] = constructHuffmanCode(0, val[0])
+
+	case 1:
+		if val[1] > val[0] {
+			table[0] = constructHuffmanCode(1, val[0])
+			table[1] = constructHuffmanCode(1, val[1])
+		} else {
+			table[0] = constructHuffmanCode(1, val[1])
+			table[1] = constructHuffmanCode(1, val[0])
+		}
+
+		table_size = 2
+
+	case 2:
+		table[0] = constructHuffmanCode(1, val[0])
+		table[2] = constructHuffmanCode(1, val[0])
+		if val[2] > val[1] {
+			table[1] = constructHuffmanCode(2, val[1])
+			table[3] = constructHuffmanCode(2, val[2])
+		} else {
+			table[1] = constructHuffmanCode(2, val[2])
+			table[3] = constructHuffmanCode(2, val[1])
+		}
+
+		table_size = 4
+
+	case 3:
+		var i int
+		var k int
+		for i = 0; i < 3; i++ {
+			for k = i + 1; k < 4; k++ {
+				if val[k] < val[i] {
+					var t uint16 = val[k]
+					val[k] = val[i]
+					val[i] = t
+				}
+			}
+		}
+
+		table[0] = constructHuffmanCode(2, val[0])
+		table[2] = constructHuffmanCode(2, val[1])
+		table[1] = constructHuffmanCode(2, val[2])
+		table[3] = constructHuffmanCode(2, val[3])
+		table_size = 4
+
+	case 4:
+		if val[3] < val[2] {
+			var t uint16 = val[3]
+			val[3] = val[2]
+			val[2] = t
+		}
+
+		table[0] = constructHuffmanCode(1, val[0])
+		table[1] = constructHuffmanCode(2, val[1])
+		table[2] = constructHuffmanCode(1, val[0])
+		table[3] = constructHuffmanCode(3, val[2])
+		table[4] = constructHuffmanCode(1, val[0])
+		table[5] = constructHuffmanCode(2, val[1])
+		table[6] = constructHuffmanCode(1, val[0])
+		table[7] = constructHuffmanCode(3, val[3])
+		table_size = 8
+	}
+
+	for table_size != goal_size {
+		copy(table[table_size:], table[:uint(table_size)])
+		table_size <<= 1
+	}
+
+	return goal_size
+}
@@ -0,0 +1,182 @@
+package brotli
+
+func utf8Position(last uint, c uint, clamp uint) uint {
+	if c < 128 {
+		return 0 /* Next one is the 'Byte 1' again. */
+	} else if c >= 192 { /* Next one is the 'Byte 2' of utf-8 encoding. */
+		return brotli_min_size_t(1, clamp)
+	} else {
+		/* Let's decide over the last byte if this ends the sequence. */
+		if last < 0xE0 {
+			return 0 /* Completed two or three byte coding. */ /* Next one is the 'Byte 3' of utf-8 encoding. */
+		} else {
+			return brotli_min_size_t(2, clamp)
+		}
+	}
+}
+
+func decideMultiByteStatsLevel(pos uint, len uint, mask uint, data []byte) uint {
+	var counts = [3]uint{0} /* should be 2, but 1 compresses better. */
+	var max_utf8 uint = 1
+	var last_c uint = 0
+	var i uint
+	for i = 0; i < len; i++ {
+		var c uint = uint(data[(pos+i)&mask])
+		counts[utf8Position(last_c, c, 2)]++
+		last_c = c
+	}
+
+	if counts[2] < 500 {
+		max_utf8 = 1
+	}
+
+	if counts[1]+counts[2] < 25 {
+		max_utf8 = 0
+	}
+
+	return max_utf8
+}
+
+func estimateBitCostsForLiteralsUTF8(pos uint, len uint, mask uint, data []byte, cost []float32) {
+	var max_utf8 uint = decideMultiByteStatsLevel(pos, uint(len), mask, data)
+	/* Bootstrap histograms. */
+	var histogram = [3][256]uint{[256]uint{0}}
+	var window_half uint = 495
+	var in_window uint = brotli_min_size_t(window_half, uint(len))
+	var in_window_utf8 = [3]uint{0}
+	/* max_utf8 is 0 (normal ASCII single byte modeling),
+	   1 (for 2-byte UTF-8 modeling), or 2 (for 3-byte UTF-8 modeling). */
+
+	var i uint
+	{
+		var last_c uint = 0
+		var utf8_pos uint = 0
+		for i = 0; i < in_window; i++ {
+			var c uint = uint(data[(pos+i)&mask])
+			histogram[utf8_pos][c]++
+			in_window_utf8[utf8_pos]++
+			utf8_pos = utf8Position(last_c, c, max_utf8)
+			last_c = c
+		}
+	}
+
+	/* Compute bit costs with sliding window. */
+	for i = 0; i < len; i++ {
+		if i >= window_half {
+			var c uint
+			var last_c uint
+			if i < window_half+1 {
+				c = 0
+			} else {
+				c = uint(data[(pos+i-window_half-1)&mask])
+			}
+			if i < window_half+2 {
+				last_c = 0
+			} else {
+				last_c = uint(data[(pos+i-window_half-2)&mask])
+			}
+			/* Remove a byte in the past. */
+
+			var utf8_pos2 uint = utf8Position(last_c, c, max_utf8)
+			histogram[utf8_pos2][data[(pos+i-window_half)&mask]]--
+			in_window_utf8[utf8_pos2]--
+		}
+
+		if i+window_half < len {
+			var c uint = uint(data[(pos+i+window_half-1)&mask])
+			var last_c uint = uint(data[(pos+i+window_half-2)&mask])
+			/* Add a byte in the future. */
+
+			var utf8_pos2 uint = utf8Position(last_c, c, max_utf8)
+			histogram[utf8_pos2][data[(pos+i+window_half)&mask]]++
+			in_window_utf8[utf8_pos2]++
+		}
+		{
+			var c uint
+			var last_c uint
+			if i < 1 {
+				c = 0
+			} else {
+				c = uint(data[(pos+i-1)&mask])
+			}
+			if i < 2 {
+				last_c = 0
+			} else {
+				last_c = uint(data[(pos+i-2)&mask])
+			}
+			var utf8_pos uint = utf8Position(last_c, c, max_utf8)
+			var masked_pos uint = (pos + i) & mask
+			var histo uint = histogram[utf8_pos][data[masked_pos]]
+			var lit_cost float64
+			if histo == 0 {
+				histo = 1
+			}
+
+			lit_cost = fastLog2(in_window_utf8[utf8_pos]) - fastLog2(histo)
+			lit_cost += 0.02905
+			if lit_cost < 1.0 {
+				lit_cost *= 0.5
+				lit_cost += 0.5
+			}
+
+			/* Make the first bytes more expensive -- seems to help, not sure why.
+			   Perhaps because the entropy source is changing its properties
+			   rapidly in the beginning of the file, perhaps because the beginning
+			   of the data is a statistical "anomaly". */
+			if i < 2000 {
+				lit_cost += 0.7 - (float64(2000-i) / 2000.0 * 0.35)
+			}
+
+			cost[i] = float32(lit_cost)
+		}
+	}
+}
+
+func estimateBitCostsForLiterals(pos uint, len uint, mask uint, data []byte, cost []float32) {
+	if isMostlyUTF8(data, pos, mask, uint(len), kMinUTF8Ratio) {
+		estimateBitCostsForLiteralsUTF8(pos, uint(len), mask, data, cost)
+		return
+	} else {
+		var histogram = [256]uint{0}
+		var window_half uint = 2000
+		var in_window uint = brotli_min_size_t(window_half, uint(len))
+		var i uint
+		/* Bootstrap histogram. */
+		for i = 0; i < in_window; i++ {
+			histogram[data[(pos+i)&mask]]++
+		}
+
+		/* Compute bit costs with sliding window. */
+		for i = 0; i < len; i++ {
+			var histo uint
+			if i >= window_half {
+				/* Remove a byte in the past. */
+				histogram[data[(pos+i-window_half)&mask]]--
+
+				in_window--
+			}
+
+			if i+window_half < len {
+				/* Add a byte in the future. */
+				histogram[data[(pos+i+window_half)&mask]]++
+
+				in_window++
+			}
+
+			histo = histogram[data[(pos+i)&mask]]
+			if histo == 0 {
+				histo = 1
+			}
+			{
+				var lit_cost float64 = fastLog2(in_window) - fastLog2(histo)
+				lit_cost += 0.029
+				if lit_cost < 1.0 {
+					lit_cost *= 0.5
+					lit_cost += 0.5
+				}
+
+				cost[i] = float32(lit_cost)
+			}
+		}
+	}
+}
@@ -0,0 +1,45 @@
+package matchfinder
+
+// An absoluteMatch is like a Match, but it stores indexes into the byte
+// stream instead of lengths.
+type absoluteMatch struct {
+	// Start is the index of the first byte.
+	Start int
+
+	// End is the index of the byte after the last byte
+	// (so that End - Start = Length).
+	End int
+
+	// Match is the index of the previous data that matches
+	// (Start - Match = Distance).
+	Match int
+}
+
+// A matchEmitter manages the output of matches for a MatchFinder.
+type matchEmitter struct {
+	// Dst is the destination slice that Matches are added to.
+	Dst []Match
+
+	// NextEmit is the index of the next byte to emit.
+	NextEmit int
+}
+
+func (e *matchEmitter) emit(m absoluteMatch) {
+	e.Dst = append(e.Dst, Match{
+		Unmatched: m.Start - e.NextEmit,
+		Length:    m.End - m.Start,
+		Distance:  m.Start - m.Match,
+	})
+	e.NextEmit = m.End
+}
+
+// trim shortens m if it extends past maxEnd. Then if the length is at least
+// minLength, the match is emitted.
+func (e *matchEmitter) trim(m absoluteMatch, maxEnd int, minLength int) {
+	if m.End > maxEnd {
+		m.End = maxEnd
+	}
+	if m.End-m.Start >= minLength {
+		e.emit(m)
+	}
+}
@@ -0,0 +1,169 @@
+package matchfinder
+
+import (
+	"encoding/binary"
+)
+
+// M0 is an implementation of the MatchFinder interface based
+// on the algorithm used by snappy, but modified to be more like the algorithm
+// used by compression level 0 of the brotli reference implementation.
+//
+// It has a maximum block size of 65536 bytes.
+type M0 struct {
+	// Lazy turns on "lazy matching," for higher compression but less speed.
+	Lazy bool
+
+	MaxDistance int
+	MaxLength   int
+}
+
+func (M0) Reset() {}
+
+const (
+	m0HashLen = 5
+
+	m0TableBits = 14
+	m0TableSize = 1 << m0TableBits
+	m0Shift     = 32 - m0TableBits
+	// m0TableMask is redundant, but helps the compiler eliminate bounds
+	// checks.
+	m0TableMask = m0TableSize - 1
+)
+
+func (m M0) hash(data uint64) uint64 {
+	hash := (data << (64 - 8*m0HashLen)) * hashMul64
+	return hash >> (64 - m0TableBits)
+}
+
+// FindMatches looks for matches in src, appends them to dst, and returns dst.
+// src must not be longer than 65536 bytes.
+func (m M0) FindMatches(dst []Match, src []byte) []Match {
+	const inputMargin = 16 - 1
+	const minNonLiteralBlockSize = 1 + 1 + inputMargin
+
+	if len(src) < minNonLiteralBlockSize {
+		dst = append(dst, Match{
+			Unmatched: len(src),
+		})
+		return dst
+	}
+	if len(src) > 65536 {
+		panic("block too long")
+	}
+
+	var table [m0TableSize]uint16
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := len(src) - inputMargin
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	nextEmit := 0
+
+	// The encoded form must start with a literal, as there are no previous
+	// bytes to copy, so we start looking for hash matches at s == 1.
+	s := 1
+	nextHash := m.hash(binary.LittleEndian.Uint64(src[s:]))
+
+	for {
+		// Copied from the C++ snappy implementation:
+		//
+		// Heuristic match skipping: If 32 bytes are scanned with no matches
+		// found, start looking only at every other byte. If 32 more bytes are
+		// scanned (or skipped), look at every third byte, etc.. When a match
+		// is found, immediately go back to looking at every byte. This is a
+		// small loss (~5% performance, ~0.1% density) for compressible data
+		// due to more bookkeeping, but for non-compressible data (such as
+		// JPEG) it's a huge win since the compressor quickly "realizes" the
+		// data is incompressible and doesn't bother looking for matches
+		// everywhere.
+		//
+		// The "skip" variable keeps track of how many bytes there are since
+		// the last match; dividing it by 32 (ie. right-shifting by five) gives
+		// the number of bytes to move ahead for each iteration.
+		skip := 32
+
+		nextS := s
+		candidate := 0
+		for {
+			s = nextS
+			bytesBetweenHashLookups := skip >> 5
+			nextS = s + bytesBetweenHashLookups
+			skip += bytesBetweenHashLookups
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			candidate = int(table[nextHash&m0TableMask])
+			table[nextHash&m0TableMask] = uint16(s)
+			nextHash = m.hash(binary.LittleEndian.Uint64(src[nextS:]))
+			if m.MaxDistance != 0 && s-candidate > m.MaxDistance {
+				continue
+			}
+			if binary.LittleEndian.Uint32(src[s:]) == binary.LittleEndian.Uint32(src[candidate:]) {
+				break
+			}
+		}
+
+		// Invariant: we have a 4-byte match at s.
+		base := s
+		s = extendMatch(src, candidate+4, s+4)
+
+		origBase := base
+		if m.Lazy && base+1 < sLimit {
+			newBase := base + 1
+			h := m.hash(binary.LittleEndian.Uint64(src[newBase:]))
+			newCandidate := int(table[h&m0TableMask])
+			table[h&m0TableMask] = uint16(newBase)
+			okDistance := true
+			if m.MaxDistance != 0 && newBase-newCandidate > m.MaxDistance {
+				okDistance = false
+			}
+			if okDistance && binary.LittleEndian.Uint32(src[newBase:]) == binary.LittleEndian.Uint32(src[newCandidate:]) {
+				newS := extendMatch(src, newCandidate+4, newBase+4)
+				if newS-newBase > s-base+1 {
+					s = newS
+					base = newBase
+					candidate = newCandidate
+				}
+			}
+		}
+
+		if m.MaxLength != 0 && s-base > m.MaxLength {
+			s = base + m.MaxLength
+		}
+		dst = append(dst, Match{
+			Unmatched: base - nextEmit,
+			Length:    s - base,
+			Distance:  base - candidate,
+		})
+		nextEmit = s
+		if s >= sLimit {
+			goto emitRemainder
+		}
+
+		if m.Lazy {
+			// If lazy matching is enabled, we update the hash table for
+			// every byte in the match.
+			for i := origBase + 2; i < s-1; i++ {
+				x := binary.LittleEndian.Uint64(src[i:])
+				table[m.hash(x)&m0TableMask] = uint16(i)
+			}
+		}
+
+		// We could immediately start working at s now, but to improve
+		// compression we first update the hash table at s-1 and at s.
+		x := binary.LittleEndian.Uint64(src[s-1:])
+		prevHash := m.hash(x >> 0)
+		table[prevHash&m0TableMask] = uint16(s - 1)
+		nextHash = m.hash(x >> 8)
+	}
+
+emitRemainder:
+	if nextEmit < len(src) {
+		dst = append(dst, Match{
+			Unmatched: len(src) - nextEmit,
+		})
+	}
+	return dst
+}
@@ -0,0 +1,297 @@
+package matchfinder
+
+import (
+	"encoding/binary"
+	"math/bits"
+	"runtime"
+)
+
+// M4 is an implementation of the MatchFinder
+// interface that uses a hash table to find matches,
+// optional match chains,
+// and the advanced parsing technique from
+// https://fastcompression.blogspot.com/2011/12/advanced-parsing-strategies.html.
+type M4 struct {
+	// MaxDistance is the maximum distance (in bytes) to look back for
+	// a match. The default is 65535.
+	MaxDistance int
+
+	// MinLength is the length of the shortest match to return.
+	// The default is 4.
+	MinLength int
+
+	// HashLen is the number of bytes to use to calculate the hashes.
+	// The maximum is 8 and the default is 6.
+	HashLen int
+
+	// TableBits is the number of bits in the hash table indexes.
+	// The default is 17 (128K entries).
+	TableBits int
+
+	// ChainLength is how many entries to search on the "match chain" of older
+	// locations with the same hash as the current location.
+	ChainLength int
+
+	// DistanceBitCost is used when comparing two matches to see
+	// which is better. The comparison is primarily based on the length
+	// of the matches, but it can also take the distance into account,
+	// in terms of the number of bits needed to represent the distance.
+	// One byte of length is given a score of 256, so 32 (256/8) would
+	// be a reasonable first guess for the value of one bit.
+	// (The default is 0, which bases the comparison solely on length.)
+	DistanceBitCost int
+
+	table []uint32
+	chain []uint16
+
+	history []byte
+}
+
+func (q *M4) Reset() {
+	for i := range q.table {
+		q.table[i] = 0
+	}
+	q.history = q.history[:0]
+	q.chain = q.chain[:0]
+}
+
+func (q *M4) score(m absoluteMatch) int {
+	return (m.End-m.Start)*256 + bits.LeadingZeros32(uint32(m.Start-m.Match))*q.DistanceBitCost
+}
+
+func (q *M4) FindMatches(dst []Match, src []byte) []Match {
+	if q.MaxDistance == 0 {
+		q.MaxDistance = 65535
+	}
+	if q.MinLength == 0 {
+		q.MinLength = 4
+	}
+	if q.HashLen == 0 {
+		q.HashLen = 6
+	}
+	if q.TableBits == 0 {
+		q.TableBits = 17
+	}
+	if len(q.table) < 1<<q.TableBits {
+		q.table = make([]uint32, 1<<q.TableBits)
+	}
+
+	e := matchEmitter{Dst: dst}
+
+	if len(q.history) > q.MaxDistance*2 {
+		// Trim down the history buffer.
+		delta := len(q.history) - q.MaxDistance
+		copy(q.history, q.history[delta:])
+		q.history = q.history[:q.MaxDistance]
+		if q.ChainLength > 0 {
+			q.chain = q.chain[:q.MaxDistance]
+		}
+
+		for i, v := range q.table {
+			newV := int(v) - delta
+			if newV < 0 {
+				newV = 0
+			}
+			q.table[i] = uint32(newV)
+		}
+	}
+
+	// Append src to the history buffer.
+	e.NextEmit = len(q.history)
+	q.history = append(q.history, src...)
+	if q.ChainLength > 0 {
+		q.chain = append(q.chain, make([]uint16, len(src))...)
+	}
+	src = q.history
+
+	// matches stores the matches that have been found but not emitted,
+	// in reverse order. (matches[0] is the most recent one.)
+	var matches [3]absoluteMatch
+	for i := e.NextEmit; i < len(src)-7; i++ {
+		if matches[0] != (absoluteMatch{}) && i >= matches[0].End {
+			// We have found some matches, and we're far enough along that we probably
+			// won't find overlapping matches, so we might as well emit them.
+			if matches[1] != (absoluteMatch{}) {
+				e.trim(matches[1], matches[0].Start, q.MinLength)
+			}
+			e.emit(matches[0])
+			matches = [3]absoluteMatch{}
+		}
+
+		// Calculate and store the hash.
+		h := ((binary.LittleEndian.Uint64(src[i:]) & (1<<(8*q.HashLen) - 1)) * hashMul64) >> (64 - q.TableBits)
+		candidate := int(q.table[h])
+		q.table[h] = uint32(i)
+		if q.ChainLength > 0 && candidate != 0 {
+			delta := i - candidate
+			if delta < 1<<16 {
+				q.chain[i] = uint16(delta)
+			}
+		}
+
+		if i < matches[0].End && i != matches[0].End+2-q.HashLen {
+			continue
+		}
+		if candidate == 0 || i-candidate > q.MaxDistance {
+			continue
+		}
+
+		// Look for a match.
+		var currentMatch absoluteMatch
+
+		if i-candidate != matches[0].Start-matches[0].Match {
+			if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
+				m := extendMatch2(src, i, candidate, e.NextEmit)
+				if m.End-m.Start > q.MinLength {
+					currentMatch = m
+				}
+			}
+		}
+
+		for j := 0; j < q.ChainLength; j++ {
+			delta := q.chain[candidate]
+			if delta == 0 {
+				break
+			}
+			candidate -= int(delta)
+			if candidate <= 0 || i-candidate > q.MaxDistance {
+				break
+			}
+			if i-candidate != matches[0].Start-matches[0].Match {
+				if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
+					m := extendMatch2(src, i, candidate, e.NextEmit)
+					if m.End-m.Start > q.MinLength && q.score(m) > q.score(currentMatch) {
+						currentMatch = m
+					}
+				}
+			}
+		}
+
+		if currentMatch.End-currentMatch.Start < q.MinLength {
+			continue
+		}
+
+		overlapPenalty := 0
+		if matches[0] != (absoluteMatch{}) {
+			overlapPenalty = 275
+			if currentMatch.Start <= matches[1].End {
+				// This match would completely replace the previous match,
+				// so there is no penalty for overlap.
+				overlapPenalty = 0
+			}
+		}
+
+		if q.score(currentMatch) <= q.score(matches[0])+overlapPenalty {
+			continue
+		}
+
+		matches = [3]absoluteMatch{
+			currentMatch,
+			matches[0],
+			matches[1],
+		}
+
+		if matches[2] == (absoluteMatch{}) {
+			continue
+		}
+
+		// We have three matches, so it's time to emit one and/or eliminate one.
+		switch {
+		case matches[0].Start < matches[2].End:
+			// The first and third matches overlap; discard the one in between.
+			matches = [3]absoluteMatch{
+				matches[0],
+				matches[2],
+				absoluteMatch{},
+			}
+
+		case matches[0].Start < matches[2].End+q.MinLength:
+			// The first and third matches don't overlap, but there's no room for
+			// another match between them. Emit the first match and discard the second.
+			e.emit(matches[2])
+			matches = [3]absoluteMatch{
+				matches[0],
+				absoluteMatch{},
+				absoluteMatch{},
+			}
+
+		default:
+			// Emit the first match, shortening it if necessary to avoid overlap with the second.
+			e.trim(matches[2], matches[1].Start, q.MinLength)
+			matches[2] = absoluteMatch{}
+		}
+	}
+
+	// We've found all the matches now; emit the remaining ones.
+	if matches[1] != (absoluteMatch{}) {
+		e.trim(matches[1], matches[0].Start, q.MinLength)
+	}
+	if matches[0] != (absoluteMatch{}) {
+		e.emit(matches[0])
+	}
+
+	dst = e.Dst
+	if e.NextEmit < len(src) {
+		dst = append(dst, Match{
+			Unmatched: len(src) - e.NextEmit,
+		})
+	}
+
+	return dst
+}
+
+const hashMul64 = 0x1E35A7BD1E35A7BD
+
+// extendMatch returns the largest k such that k <= len(src) and that
+// src[i:i+k-j] and src[j:k] have the same contents.
+//
+// It assumes that:
+//
+//	0 <= i && i < j && j <= len(src)
+func extendMatch(src []byte, i, j int) int {
+	switch runtime.GOARCH {
+	case "amd64":
+		// As long as we are 8 or more bytes before the end of src, we can load and
+		// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
+		for j+8 < len(src) {
+			iBytes := binary.LittleEndian.Uint64(src[i:])
+			jBytes := binary.LittleEndian.Uint64(src[j:])
+			if iBytes != jBytes {
+				// If those 8 bytes were not equal, XOR the two 8 byte values, and return
+				// the index of the first byte that differs. The BSF instruction finds the
+				// least significant 1 bit, the amd64 architecture is little-endian, and
+				// the shift by 3 converts a bit index to a byte index.
+				return j + bits.TrailingZeros64(iBytes^jBytes)>>3
+			}
+			i, j = i+8, j+8
+		}
+	case "386":
+		// On a 32-bit CPU, we do it 4 bytes at a time.
+		for j+4 < len(src) {
+			iBytes := binary.LittleEndian.Uint32(src[i:])
+			jBytes := binary.LittleEndian.Uint32(src[j:])
+			if iBytes != jBytes {
+				return j + bits.TrailingZeros32(iBytes^jBytes)>>3
+			}
+			i, j = i+4, j+4
+		}
+	}
+	for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
+	}
+	return j
+}
+
+// Given a 4-byte match at src[start] and src[candidate], extendMatch2 extends it
+// upward as far as possible, and downward no farther than to min.
+func extendMatch2(src []byte, start, candidate, min int) absoluteMatch {
+	end := extendMatch(src, candidate+4, start+4)
+	for start > min && candidate > 0 && src[start-1] == src[candidate-1] {
+		start--
+		candidate--
+	}
+	return absoluteMatch{
+		Start: start,
+		End:   end,
+		Match: candidate,
+	}
+}
@@ -0,0 +1,103 @@
+// The matchfinder package defines reusable components for data compression.
+//
+// Many compression libraries have two main parts:
+//   - Something that looks for repeated sequences of bytes
+//   - An encoder for the compressed data format (often an entropy coder)
+//
+// Although these are logically two separate steps, the implementations are
+// usually closely tied together. You can't use flate's matcher with snappy's
+// encoder, for example. This package defines interfaces and an intermediate
+// representation to allow mixing and matching compression components.
+package matchfinder
+
+import "io"
+
+// A Match is the basic unit of LZ77 compression.
+type Match struct {
+	Unmatched int // the number of unmatched bytes since the previous match
+	Length    int // the number of bytes in the matched string; it may be 0 at the end of the input
+	Distance  int // how far back in the stream to copy from
+}
+
+// A MatchFinder performs the LZ77 stage of compression, looking for matches.
+type MatchFinder interface {
+	// FindMatches looks for matches in src, appends them to dst, and returns dst.
+	FindMatches(dst []Match, src []byte) []Match
+
+	// Reset clears any internal state, preparing the MatchFinder to be used with
+	// a new stream.
+	Reset()
+}
+
+// An Encoder encodes the data in its final format.
+type Encoder interface {
+	// Encode appends the encoded format of src to dst, using the match
+	// information from matches.
+	Encode(dst []byte, src []byte, matches []Match, lastBlock bool) []byte
+
+	// Reset clears any internal state, preparing the Encoder to be used with
+	// a new stream.
+	Reset()
+}
+
+// A Writer uses MatchFinder and Encoder to write compressed data to Dest.
+type Writer struct {
+	Dest        io.Writer
+	MatchFinder MatchFinder
+	Encoder     Encoder
+
+	// BlockSize is the number of bytes to compress at a time. If it is zero,
+	// each Write operation will be treated as one block.
+	BlockSize int
+
+	err     error
+	inBuf   []byte
+	outBuf  []byte
+	matches []Match
+}
+
+func (w *Writer) Write(p []byte) (n int, err error) {
+	if w.err != nil {
+		return 0, w.err
+	}
+
+	if w.BlockSize == 0 {
+		return w.writeBlock(p, false)
+	}
+
+	w.inBuf = append(w.inBuf, p...)
+	var pos int
+	for pos = 0; pos+w.BlockSize <= len(w.inBuf) && w.err == nil; pos += w.BlockSize {
+		w.writeBlock(w.inBuf[pos:pos+w.BlockSize], false)
+	}
+	if pos > 0 {
+		n := copy(w.inBuf, w.inBuf[pos:])
+		w.inBuf = w.inBuf[:n]
+	}
+
+	return len(p), w.err
+}
+
+func (w *Writer) writeBlock(p []byte, lastBlock bool) (n int, err error) {
+	w.outBuf = w.outBuf[:0]
+	w.matches = w.MatchFinder.FindMatches(w.matches[:0], p)
+	w.outBuf = w.Encoder.Encode(w.outBuf, p, w.matches, lastBlock)
+	_, w.err = w.Dest.Write(w.outBuf)
+	return len(p), w.err
+}
+
+func (w *Writer) Close() error {
+	w.writeBlock(w.inBuf, true)
+	w.inBuf = w.inBuf[:0]
+	return w.err
+}
+
+func (w *Writer) Reset(newDest io.Writer) {
+	w.MatchFinder.Reset()
+	w.Encoder.Reset()
+	w.err = nil
+	w.inBuf = w.inBuf[:0]
+	w.outBuf = w.outBuf[:0]
+	w.matches = w.matches[:0]
+	w.Dest = newDest
+}
@@ -0,0 +1,53 @@
+package matchfinder
+
+import "fmt"
+
+// A TextEncoder is an Encoder that produces a human-readable representation of
+// the LZ77 compression. Matches are replaced with <Length,Distance> symbols.
+type TextEncoder struct{}
+
+func (t TextEncoder) Reset() {}
+
+func (t TextEncoder) Encode(dst []byte, src []byte, matches []Match, lastBlock bool) []byte {
+	pos := 0
+	for _, m := range matches {
+		if m.Unmatched > 0 {
+			dst = append(dst, src[pos:pos+m.Unmatched]...)
+			pos += m.Unmatched
+		}
+		if m.Length > 0 {
+			dst = append(dst, []byte(fmt.Sprintf("<%d,%d>", m.Length, m.Distance))...)
+			pos += m.Length
+		}
+	}
+	if pos < len(src) {
+		dst = append(dst, src[pos:]...)
+	}
+	return dst
+}
+
+// A NoMatchFinder implements MatchFinder, but doesn't find any matches.
+// It can be used to implement the equivalent of the standard library flate package's
+// HuffmanOnly setting.
+type NoMatchFinder struct{}
+
+func (n NoMatchFinder) Reset() {}
+
+func (n NoMatchFinder) FindMatches(dst []Match, src []byte) []Match {
+	return append(dst, Match{
+		Unmatched: len(src),
+	})
+}
+
+// AutoReset wraps a MatchFinder that can return references to data in previous
+// blocks, and calls Reset before each block. It is useful for (e.g.) using a
+// snappy Encoder with a MatchFinder designed for flate. (Snappy doesn't
+// support references between blocks.)
+type AutoReset struct {
+	MatchFinder
+}
+
+func (a AutoReset) FindMatches(dst []Match, src []byte) []Match {
+	a.Reset()
+	return a.MatchFinder.FindMatches(dst, src)
+}
@@ -0,0 +1,66 @@
+package brotli
+
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/*
+Dynamically grows array capacity to at least the requested size
+T: data type
+A: array
+C: capacity
+R: requested size
+*/
+func brotli_ensure_capacity_uint8_t(a *[]byte, c *uint, r uint) {
+	if *c < r {
+		var new_size uint = *c
+		if new_size == 0 {
+			new_size = r
+		}
+
+		for new_size < r {
+			new_size *= 2
+		}
+
+		if cap(*a) < int(new_size) {
+			var new_array []byte = make([]byte, new_size)
+			if *c != 0 {
+				copy(new_array, (*a)[:*c])
+			}
+
+			*a = new_array
+		} else {
+			*a = (*a)[:new_size]
+		}
+
+		*c = new_size
+	}
+}
+
+func brotli_ensure_capacity_uint32_t(a *[]uint32, c *uint, r uint) {
+	var new_array []uint32
+	if *c < r {
+		var new_size uint = *c
+		if new_size == 0 {
+			new_size = r
+		}
+
+		for new_size < r {
+			new_size *= 2
+		}
+
+		if cap(*a) < int(new_size) {
+			new_array = make([]uint32, new_size)
+			if *c != 0 {
+				copy(new_array, (*a)[:*c])
+			}
+
+			*a = new_array
+		} else {
+			*a = (*a)[:new_size]
+		}
+		*c = new_size
+	}
+}
@@ -0,0 +1,574 @@
+package brotli
+
+import (
+	"sync"
+)
+
+/* Copyright 2014 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Algorithms for distributing the literals and commands of a metablock between
+   block types and contexts. */
+
+type metaBlockSplit struct {
+	literal_split             blockSplit
+	command_split             blockSplit
+	distance_split            blockSplit
+	literal_context_map       []uint32
+	literal_context_map_size  uint
+	distance_context_map      []uint32
+	distance_context_map_size uint
+	literal_histograms        []histogramLiteral
+	literal_histograms_size   uint
+	command_histograms        []histogramCommand
+	command_histograms_size   uint
+	distance_histograms       []histogramDistance
+	distance_histograms_size  uint
+}
+
+var metaBlockPool sync.Pool
+
+func getMetaBlockSplit() *metaBlockSplit {
+	mb, _ := metaBlockPool.Get().(*metaBlockSplit)
+
+	if mb == nil {
+		mb = &metaBlockSplit{}
+	} else {
+		initBlockSplit(&mb.literal_split)
+		initBlockSplit(&mb.command_split)
+		initBlockSplit(&mb.distance_split)
+		mb.literal_context_map = mb.literal_context_map[:0]
+		mb.literal_context_map_size = 0
+		mb.distance_context_map = mb.distance_context_map[:0]
+		mb.distance_context_map_size = 0
+		mb.literal_histograms = mb.literal_histograms[:0]
+		mb.command_histograms = mb.command_histograms[:0]
+		mb.distance_histograms = mb.distance_histograms[:0]
+	}
+	return mb
+}
+
+func freeMetaBlockSplit(mb *metaBlockSplit) {
+	metaBlockPool.Put(mb)
+}
+
+func initDistanceParams(params *encoderParams, npostfix uint32, ndirect uint32) {
+	var dist_params *distanceParams = &params.dist
+	var alphabet_size uint32
+	var max_distance uint32
+
+	dist_params.distance_postfix_bits = npostfix
+	dist_params.num_direct_distance_codes = ndirect
+
+	alphabet_size = uint32(distanceAlphabetSize(uint(npostfix), uint(ndirect), maxDistanceBits))
+	max_distance = ndirect + (1 << (maxDistanceBits + npostfix + 2)) - (1 << (npostfix + 2))
+
+	if params.large_window {
+		var bound = [maxNpostfix + 1]uint32{0, 4, 12, 28}
+		var postfix uint32 = 1 << npostfix
+		alphabet_size = uint32(distanceAlphabetSize(uint(npostfix), uint(ndirect), largeMaxDistanceBits))
+
+		/* The maximum distance is set so that no distance symbol used can encode
+		   a distance larger than BROTLI_MAX_ALLOWED_DISTANCE with all
+		   its extra bits set. */
+		if ndirect < bound[npostfix] {
+			max_distance = maxAllowedDistance - (bound[npostfix] - ndirect)
+		} else if ndirect >= bound[npostfix]+postfix {
+			max_distance = (3 << 29) - 4 + (ndirect - bound[npostfix])
+		} else {
+			max_distance = maxAllowedDistance
+		}
+	}
+
+	dist_params.alphabet_size = alphabet_size
+	dist_params.max_distance = uint(max_distance)
+}
+
+func recomputeDistancePrefixes(cmds []command, orig_params *distanceParams, new_params *distanceParams) {
+	if orig_params.distance_postfix_bits == new_params.distance_postfix_bits && orig_params.num_direct_distance_codes == new_params.num_direct_distance_codes {
+		return
+	}
+
+	for i := range cmds {
+		var cmd *command = &cmds[i]
+		if commandCopyLen(cmd) != 0 && cmd.cmd_prefix_ >= 128 {
+			prefixEncodeCopyDistance(uint(commandRestoreDistanceCode(cmd, orig_params)), uint(new_params.num_direct_distance_codes), uint(new_params.distance_postfix_bits), &cmd.dist_prefix_, &cmd.dist_extra_)
+		}
+	}
+}
+
+func computeDistanceCost(cmds []command, orig_params *distanceParams, new_params *distanceParams, cost *float64) bool {
+	var equal_params bool = false
+	var dist_prefix uint16
+	var dist_extra uint32
+	var extra_bits float64 = 0.0
+	var histo histogramDistance
+	histogramClearDistance(&histo)
+
+	if orig_params.distance_postfix_bits == new_params.distance_postfix_bits && orig_params.num_direct_distance_codes == new_params.num_direct_distance_codes {
+		equal_params = true
+	}
+
+	for i := range cmds {
+		cmd := &cmds[i]
+		if commandCopyLen(cmd) != 0 && cmd.cmd_prefix_ >= 128 {
+			if equal_params {
+				dist_prefix = cmd.dist_prefix_
+			} else {
+				var distance uint32 = commandRestoreDistanceCode(cmd, orig_params)
+				if distance > uint32(new_params.max_distance) {
+					return false
+				}
+
+				prefixEncodeCopyDistance(uint(distance), uint(new_params.num_direct_distance_codes), uint(new_params.distance_postfix_bits), &dist_prefix, &dist_extra)
+			}
+
+			histogramAddDistance(&histo, uint(dist_prefix)&0x3FF)
+			extra_bits += float64(dist_prefix >> 10)
+		}
+	}
+
+	*cost = populationCostDistance(&histo) + extra_bits
+	return true
+}
+
+var buildMetaBlock_kMaxNumberOfHistograms uint = 256
+
+func buildMetaBlock(ringbuffer []byte, pos uint, mask uint, params *encoderParams, prev_byte byte, prev_byte2 byte, cmds []command, literal_context_mode int, mb *metaBlockSplit) {
+	var distance_histograms []histogramDistance
+	var literal_histograms []histogramLiteral
+	var literal_context_modes []int = nil
+	var literal_histograms_size uint
+	var distance_histograms_size uint
+	var i uint
+	var literal_context_multiplier uint = 1
+	var npostfix uint32
+	var ndirect_msb uint32 = 0
+	var check_orig bool = true
+	var best_dist_cost float64 = 1e99
+	var orig_params encoderParams = *params
+	/* Histogram ids need to fit in one byte. */
+
+	var new_params encoderParams = *params
+
+	for npostfix = 0; npostfix <= maxNpostfix; npostfix++ {
+		for ; ndirect_msb < 16; ndirect_msb++ {
+			var ndirect uint32 = ndirect_msb << npostfix
+			var skip bool
+			var dist_cost float64
+			initDistanceParams(&new_params, npostfix, ndirect)
+			if npostfix == orig_params.dist.distance_postfix_bits && ndirect == orig_params.dist.num_direct_distance_codes {
+				check_orig = false
+			}
+
+			skip = !computeDistanceCost(cmds, &orig_params.dist, &new_params.dist, &dist_cost)
+			if skip || (dist_cost > best_dist_cost) {
+				break
+			}
+
+			best_dist_cost = dist_cost
+			params.dist = new_params.dist
+		}
+
+		if ndirect_msb > 0 {
+			ndirect_msb--
+		}
+		ndirect_msb /= 2
+	}
+
+	if check_orig {
+		var dist_cost float64
+		computeDistanceCost(cmds, &orig_params.dist, &orig_params.dist, &dist_cost)
+		if dist_cost < best_dist_cost {
+			/* NB: currently unused; uncomment when more param tuning is added. */
+			/* best_dist_cost = dist_cost; */
+			params.dist = orig_params.dist
+		}
+	}
+
+	recomputeDistancePrefixes(cmds, &orig_params.dist, &params.dist)
+
+	splitBlock(cmds, ringbuffer, pos, mask, params, &mb.literal_split, &mb.command_split, &mb.distance_split)
+
+	if !params.disable_literal_context_modeling {
+		literal_context_multiplier = 1 << literalContextBits
+		literal_context_modes = make([]int, (mb.literal_split.num_types))
+		for i = 0; i < mb.literal_split.num_types; i++ {
+			literal_context_modes[i] = literal_context_mode
+		}
+	}
+
+	literal_histograms_size = mb.literal_split.num_types * literal_context_multiplier
+	literal_histograms = make([]histogramLiteral, literal_histograms_size)
+	clearHistogramsLiteral(literal_histograms, literal_histograms_size)
+
+	distance_histograms_size = mb.distance_split.num_types << distanceContextBits
+	distance_histograms = make([]histogramDistance, distance_histograms_size)
+	clearHistogramsDistance(distance_histograms, distance_histograms_size)
+
+	mb.command_histograms_size = mb.command_split.num_types
+	if cap(mb.command_histograms) < int(mb.command_histograms_size) {
+		mb.command_histograms = make([]histogramCommand, (mb.command_histograms_size))
+	} else {
+		mb.command_histograms = mb.command_histograms[:mb.command_histograms_size]
+	}
+	clearHistogramsCommand(mb.command_histograms, mb.command_histograms_size)
+
+	buildHistogramsWithContext(cmds, &mb.literal_split, &mb.command_split, &mb.distance_split, ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_modes, literal_histograms, mb.command_histograms, distance_histograms)
+	literal_context_modes = nil
+
+	mb.literal_context_map_size = mb.literal_split.num_types << literalContextBits
+	if cap(mb.literal_context_map) < int(mb.literal_context_map_size) {
+		mb.literal_context_map = make([]uint32, (mb.literal_context_map_size))
+	} else {
+		mb.literal_context_map = mb.literal_context_map[:mb.literal_context_map_size]
+	}
+
+	mb.literal_histograms_size = mb.literal_context_map_size
+	if cap(mb.literal_histograms) < int(mb.literal_histograms_size) {
+		mb.literal_histograms = make([]histogramLiteral, (mb.literal_histograms_size))
+	} else {
+		mb.literal_histograms = mb.literal_histograms[:mb.literal_histograms_size]
+	}
+
+	clusterHistogramsLiteral(literal_histograms, literal_histograms_size, buildMetaBlock_kMaxNumberOfHistograms, mb.literal_histograms, &mb.literal_histograms_size, mb.literal_context_map)
+	literal_histograms = nil
+
+	if params.disable_literal_context_modeling {
+		/* Distribute assignment to all contexts. */
+		for i = mb.literal_split.num_types; i != 0; {
+			var j uint = 0
+			i--
+			for ; j < 1<<literalContextBits; j++ {
+				mb.literal_context_map[(i<<literalContextBits)+j] = mb.literal_context_map[i]
+			}
+		}
+	}
+
+	mb.distance_context_map_size = mb.distance_split.num_types << distanceContextBits
+	if cap(mb.distance_context_map) < int(mb.distance_context_map_size) {
+		mb.distance_context_map = make([]uint32, (mb.distance_context_map_size))
+	} else {
+		mb.distance_context_map = mb.distance_context_map[:mb.distance_context_map_size]
+	}
+
+	mb.distance_histograms_size = mb.distance_context_map_size
+	if cap(mb.distance_histograms) < int(mb.distance_histograms_size) {
+		mb.distance_histograms = make([]histogramDistance, (mb.distance_histograms_size))
+	} else {
+		mb.distance_histograms = mb.distance_histograms[:mb.distance_histograms_size]
+	}
+
+	clusterHistogramsDistance(distance_histograms, mb.distance_context_map_size, buildMetaBlock_kMaxNumberOfHistograms, mb.distance_histograms, &mb.distance_histograms_size, mb.distance_context_map)
+	distance_histograms = nil
+}
+
+const maxStaticContexts = 13
+
+/* Greedy block splitter for one block category (literal, command or distance).
+   Gathers histograms for all context buckets. */
+type contextBlockSplitter struct {
+	alphabet_size_     uint
+	num_contexts_      uint
+	max_block_types_   uint
+	min_block_size_    uint
+	split_threshold_   float64
+	num_blocks_        uint
+	split_             *blockSplit
+	histograms_        []histogramLiteral
+	histograms_size_   *uint
+	target_block_size_ uint
+	block_size_        uint
+	curr_histogram_ix_ uint
+	last_histogram_ix_ [2]uint
+	last_entropy_      [2 * maxStaticContexts]float64
+	merge_last_count_  uint
+}
+
+func initContextBlockSplitter(self *contextBlockSplitter, alphabet_size uint, num_contexts uint, min_block_size uint, split_threshold float64, num_symbols uint, split *blockSplit, histograms *[]histogramLiteral, histograms_size *uint) {
+	var max_num_blocks uint = num_symbols/min_block_size + 1
+	var max_num_types uint
+	assert(num_contexts <= maxStaticContexts)
+
+	self.alphabet_size_ = alphabet_size
+	self.num_contexts_ = num_contexts
+	self.max_block_types_ = maxNumberOfBlockTypes / num_contexts
+	self.min_block_size_ = min_block_size
+	self.split_threshold_ = split_threshold
+	self.num_blocks_ = 0
+	self.split_ = split
+	self.histograms_size_ = histograms_size
+	self.target_block_size_ = min_block_size
+	self.block_size_ = 0
+	self.curr_histogram_ix_ = 0
+	self.merge_last_count_ = 0
+
+	/* We have to allocate one more histogram than the maximum number of block
+	   types for the current histogram when the meta-block is too big. */
+	max_num_types = brotli_min_size_t(max_num_blocks, self.max_block_types_+1)
+
+	brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks)
+	brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks)
+	split.num_blocks = max_num_blocks
+	*histograms_size = max_num_types * num_contexts
+	if histograms == nil || cap(*histograms) < int(*histograms_size) {
+		*histograms = make([]histogramLiteral, (*histograms_size))
+	} else {
+		*histograms = (*histograms)[:*histograms_size]
+	}
+	self.histograms_ = *histograms
+
+	/* Clear only current histogram. */
+	clearHistogramsLiteral(self.histograms_[0:], num_contexts)
+
+	self.last_histogram_ix_[1] = 0
+	self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
+}
+
+/* Does either of three things:
+   (1) emits the current block with a new block type;
+   (2) emits the current block with the type of the second last block;
+   (3) merges the current block with the last block. */
+func contextBlockSplitterFinishBlock(self *contextBlockSplitter, is_final bool) {
+	var split *blockSplit = self.split_
+	var num_contexts uint = self.num_contexts_
+	var last_entropy []float64 = self.last_entropy_[:]
+	var histograms []histogramLiteral = self.histograms_
+
+	if self.block_size_ < self.min_block_size_ {
+		self.block_size_ = self.min_block_size_
+	}
+
+	if self.num_blocks_ == 0 {
+		var i uint
+
+		/* Create first block. */
+		split.lengths[0] = uint32(self.block_size_)
+
+		split.types[0] = 0
+
+		for i = 0; i < num_contexts; i++ {
+			last_entropy[i] = bitsEntropy(histograms[i].data_[:], self.alphabet_size_)
+			last_entropy[num_contexts+i] = last_entropy[i]
+		}
+
+		self.num_blocks_++
+		split.num_types++
+		self.curr_histogram_ix_ += num_contexts
+		if self.curr_histogram_ix_ < *self.histograms_size_ {
+			clearHistogramsLiteral(self.histograms_[self.curr_histogram_ix_:], self.num_contexts_)
+		}
+
+		self.block_size_ = 0
+	} else if self.block_size_ > 0 {
+		var entropy [maxStaticContexts]float64
+		var combined_histo []histogramLiteral = make([]histogramLiteral, (2 * num_contexts))
+		var combined_entropy [2 * maxStaticContexts]float64
+		var diff = [2]float64{0.0}
+		/* Try merging the set of histograms for the current block type with the
+		   respective set of histograms for the last and second last block types.
+		   Decide over the split based on the total reduction of entropy across
+		   all contexts. */
+
+		var i uint
+		for i = 0; i < num_contexts; i++ {
+			var curr_histo_ix uint = self.curr_histogram_ix_ + i
+			var j uint
+			entropy[i] = bitsEntropy(histograms[curr_histo_ix].data_[:], self.alphabet_size_)
+			for j = 0; j < 2; j++ {
+				var jx uint = j*num_contexts + i
+				var last_histogram_ix uint = self.last_histogram_ix_[j] + i
+				combined_histo[jx] = histograms[curr_histo_ix]
+				histogramAddHistogramLiteral(&combined_histo[jx], &histograms[last_histogram_ix])
+				combined_entropy[jx] = bitsEntropy(combined_histo[jx].data_[0:], self.alphabet_size_)
+				diff[j] += combined_entropy[jx] - entropy[i] - last_entropy[jx]
+			}
+		}
+
+		if split.num_types < self.max_block_types_ && diff[0] > self.split_threshold_ && diff[1] > self.split_threshold_ {
+			/* Create new block. */
+			split.lengths[self.num_blocks_] = uint32(self.block_size_)
+
+			split.types[self.num_blocks_] = byte(split.num_types)
+			self.last_histogram_ix_[1] = self.last_histogram_ix_[0]
+			self.last_histogram_ix_[0] = split.num_types * num_contexts
+			for i = 0; i < num_contexts; i++ {
+				last_entropy[num_contexts+i] = last_entropy[i]
+				last_entropy[i] = entropy[i]
+			}
+
+			self.num_blocks_++
+			split.num_types++
+			self.curr_histogram_ix_ += num_contexts
+			if self.curr_histogram_ix_ < *self.histograms_size_ {
+				clearHistogramsLiteral(self.histograms_[self.curr_histogram_ix_:], self.num_contexts_)
+			}
+
+			self.block_size_ = 0
+			self.merge_last_count_ = 0
+			self.target_block_size_ = self.min_block_size_
+		} else if diff[1] < diff[0]-20.0 {
+			split.lengths[self.num_blocks_] = uint32(self.block_size_)
+			split.types[self.num_blocks_] = split.types[self.num_blocks_-2]
+			/* Combine this block with second last block. */
+
+			var tmp uint = self.last_histogram_ix_[0]
+			self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
+			self.last_histogram_ix_[1] = tmp
+			for i = 0; i < num_contexts; i++ {
+				histograms[self.last_histogram_ix_[0]+i] = combined_histo[num_contexts+i]
+				last_entropy[num_contexts+i] = last_entropy[i]
+				last_entropy[i] = combined_entropy[num_contexts+i]
+				histogramClearLiteral(&histograms[self.curr_histogram_ix_+i])
+			}
+
+			self.num_blocks_++
+			self.block_size_ = 0
+			self.merge_last_count_ = 0
+			self.target_block_size_ = self.min_block_size_
+		} else {
+			/* Combine this block with last block. */
+			split.lengths[self.num_blocks_-1] += uint32(self.block_size_)
+
+			for i = 0; i < num_contexts; i++ {
+				histograms[self.last_histogram_ix_[0]+i] = combined_histo[i]
+				last_entropy[i] = combined_entropy[i]
+				if split.num_types == 1 {
+					last_entropy[num_contexts+i] = last_entropy[i]
+				}
+
+				histogramClearLiteral(&histograms[self.curr_histogram_ix_+i])
+			}
+
+			self.block_size_ = 0
+			self.merge_last_count_++
+			if self.merge_last_count_ > 1 {
+				self.target_block_size_ += self.min_block_size_
+			}
+		}
+
+		combined_histo = nil
+	}
+
+	if is_final {
+		*self.histograms_size_ = split.num_types * num_contexts
+		split.num_blocks = self.num_blocks_
+	}
+}
+
+/* Adds the next symbol to the current block type and context. When the
+   current block reaches the target size, decides on merging the block. */
+func contextBlockSplitterAddSymbol(self *contextBlockSplitter, symbol uint, context uint) {
+	histogramAddLiteral(&self.histograms_[self.curr_histogram_ix_+context], symbol)
+	self.block_size_++
+	if self.block_size_ == self.target_block_size_ {
+		contextBlockSplitterFinishBlock(self, false) /* is_final = */
+	}
+}
+
+func mapStaticContexts(num_contexts uint, static_context_map []uint32, mb *metaBlockSplit) {
+	var i uint
+	mb.literal_context_map_size = mb.literal_split.num_types << literalContextBits
+	if cap(mb.literal_context_map) < int(mb.literal_context_map_size) {
+		mb.literal_context_map = make([]uint32, (mb.literal_context_map_size))
+	} else {
+		mb.literal_context_map = mb.literal_context_map[:mb.literal_context_map_size]
+	}
+
+	for i = 0; i < mb.literal_split.num_types; i++ {
+		var offset uint32 = uint32(i * num_contexts)
+		var j uint
+		for j = 0; j < 1<<literalContextBits; j++ {
+			mb.literal_context_map[(i<<literalContextBits)+j] = offset + static_context_map[j]
+		}
+	}
+}
+
+func buildMetaBlockGreedyInternal(ringbuffer []byte, pos uint, mask uint, prev_byte byte, prev_byte2 byte, literal_context_lut contextLUT, num_contexts uint, static_context_map []uint32, commands []command, mb *metaBlockSplit) {
+	var lit_blocks struct {
+		plain blockSplitterLiteral
+		ctx   contextBlockSplitter
+	}
+	var cmd_blocks blockSplitterCommand
+	var dist_blocks blockSplitterDistance
+	var num_literals uint = 0
+	for i := range commands {
+		num_literals += uint(commands[i].insert_len_)
+	}
+
+	if num_contexts == 1 {
+		initBlockSplitterLiteral(&lit_blocks.plain, 256, 512, 400.0, num_literals, &mb.literal_split, &mb.literal_histograms, &mb.literal_histograms_size)
+	} else {
+		initContextBlockSplitter(&lit_blocks.ctx, 256, num_contexts, 512, 400.0, num_literals, &mb.literal_split, &mb.literal_histograms, &mb.literal_histograms_size)
+	}
+
+	initBlockSplitterCommand(&cmd_blocks, numCommandSymbols, 1024, 500.0, uint(len(commands)), &mb.command_split, &mb.command_histograms, &mb.command_histograms_size)
+	initBlockSplitterDistance(&dist_blocks, 64, 512, 100.0, uint(len(commands)), &mb.distance_split, &mb.distance_histograms, &mb.distance_histograms_size)
+
+	for _, cmd := range commands {
+		var j uint
+		blockSplitterAddSymbolCommand(&cmd_blocks, uint(cmd.cmd_prefix_))
+		for j = uint(cmd.insert_len_); j != 0; j-- {
+			var literal byte = ringbuffer[pos&mask]
+			if num_contexts == 1 {
+				blockSplitterAddSymbolLiteral(&lit_blocks.plain, uint(literal))
+			} else {
+				var context uint = uint(getContext(prev_byte, prev_byte2, literal_context_lut))
+				contextBlockSplitterAddSymbol(&lit_blocks.ctx, uint(literal), uint(static_context_map[context]))
+			}
+
+			prev_byte2 = prev_byte
+			prev_byte = literal
+			pos++
+		}
+
+		pos += uint(commandCopyLen(&cmd))
+		if commandCopyLen(&cmd) != 0 {
+			prev_byte2 = ringbuffer[(pos-2)&mask]
+			prev_byte = ringbuffer[(pos-1)&mask]
+			if cmd.cmd_prefix_ >= 128 {
+				blockSplitterAddSymbolDistance(&dist_blocks, uint(cmd.dist_prefix_)&0x3FF)
+			}
+		}
+	}
+
+	if num_contexts == 1 {
+		blockSplitterFinishBlockLiteral(&lit_blocks.plain, true) /* is_final = */
+	} else {
+		contextBlockSplitterFinishBlock(&lit_blocks.ctx, true) /* is_final = */
+	}
+
+	blockSplitterFinishBlockCommand(&cmd_blocks, true)   /* is_final = */
+	blockSplitterFinishBlockDistance(&dist_blocks, true) /* is_final = */
+
+	if num_contexts > 1 {
+		mapStaticContexts(num_contexts, static_context_map, mb)
+	}
+}
+
+func buildMetaBlockGreedy(ringbuffer []byte, pos uint, mask uint, prev_byte byte, prev_byte2 byte, literal_context_lut contextLUT, num_contexts uint, static_context_map []uint32, commands []command, mb *metaBlockSplit) {
+	if num_contexts == 1 {
+		buildMetaBlockGreedyInternal(ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_lut, 1, nil, commands, mb)
+	} else {
+		buildMetaBlockGreedyInternal(ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_lut, num_contexts, static_context_map, commands, mb)
+	}
+}
+
+func optimizeHistograms(num_distance_codes uint32, mb *metaBlockSplit) {
+	var good_for_rle [numCommandSymbols]byte
+	var i uint
+	for i = 0; i < mb.literal_histograms_size; i++ {
+		optimizeHuffmanCountsForRLE(256, mb.literal_histograms[i].data_[:], good_for_rle[:])
+	}
+
+	for i = 0; i < mb.command_histograms_size; i++ {
+		optimizeHuffmanCountsForRLE(numCommandSymbols, mb.command_histograms[i].data_[:], good_for_rle[:])
+	}
+
+	for i = 0; i < mb.distance_histograms_size; i++ {
+		optimizeHuffmanCountsForRLE(uint(num_distance_codes), mb.distance_histograms[i].data_[:], good_for_rle[:])
+	}
+}
@@ -0,0 +1,165 @@
+package brotli
+
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Greedy block splitter for one block category (literal, command or distance).
+ */
+type blockSplitterCommand struct {
+	alphabet_size_     uint
+	min_block_size_    uint
+	split_threshold_   float64
+	num_blocks_        uint
+	split_             *blockSplit
+	histograms_        []histogramCommand
+	histograms_size_   *uint
+	target_block_size_ uint
+	block_size_        uint
+	curr_histogram_ix_ uint
+	last_histogram_ix_ [2]uint
+	last_entropy_      [2]float64
+	merge_last_count_  uint
+}
+
+func initBlockSplitterCommand(self *blockSplitterCommand, alphabet_size uint, min_block_size uint, split_threshold float64, num_symbols uint, split *blockSplit, histograms *[]histogramCommand, histograms_size *uint) {
+	var max_num_blocks uint = num_symbols/min_block_size + 1
+	var max_num_types uint = brotli_min_size_t(max_num_blocks, maxNumberOfBlockTypes+1)
+	/* We have to allocate one more histogram than the maximum number of block
+	   types for the current histogram when the meta-block is too big. */
+	self.alphabet_size_ = alphabet_size
+
+	self.min_block_size_ = min_block_size
+	self.split_threshold_ = split_threshold
+	self.num_blocks_ = 0
+	self.split_ = split
+	self.histograms_size_ = histograms_size
+	self.target_block_size_ = min_block_size
+	self.block_size_ = 0
+	self.curr_histogram_ix_ = 0
+	self.merge_last_count_ = 0
+	brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks)
+	brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks)
+	self.split_.num_blocks = max_num_blocks
+	*histograms_size = max_num_types
+	if histograms == nil || cap(*histograms) < int(*histograms_size) {
+		*histograms = make([]histogramCommand, (*histograms_size))
+	} else {
+		*histograms = (*histograms)[:*histograms_size]
+	}
+	self.histograms_ = *histograms
+
+	/* Clear only current histogram. */
+	histogramClearCommand(&self.histograms_[0])
+
+	self.last_histogram_ix_[1] = 0
+	self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
+}
+
+/* Does either of three things:
+   (1) emits the current block with a new block type;
+   (2) emits the current block with the type of the second last block;
+   (3) merges the current block with the last block. */
+func blockSplitterFinishBlockCommand(self *blockSplitterCommand, is_final bool) {
+	var split *blockSplit = self.split_
+	var last_entropy []float64 = self.last_entropy_[:]
+	var histograms []histogramCommand = self.histograms_
+	self.block_size_ = brotli_max_size_t(self.block_size_, self.min_block_size_)
+	if self.num_blocks_ == 0 {
+		/* Create first block. */
+		split.lengths[0] = uint32(self.block_size_)
+
+		split.types[0] = 0
+		last_entropy[0] = bitsEntropy(histograms[0].data_[:], self.alphabet_size_)
+		last_entropy[1] = last_entropy[0]
+		self.num_blocks_++
+		split.num_types++
+		self.curr_histogram_ix_++
+		if self.curr_histogram_ix_ < *self.histograms_size_ {
+			histogramClearCommand(&histograms[self.curr_histogram_ix_])
+		}
+		self.block_size_ = 0
+	} else if self.block_size_ > 0 {
+		var entropy float64 = bitsEntropy(histograms[self.curr_histogram_ix_].data_[:], self.alphabet_size_)
+		var combined_histo [2]histogramCommand
+		var combined_entropy [2]float64
+		var diff [2]float64
+		var j uint
+		for j = 0; j < 2; j++ {
+			var last_histogram_ix uint = self.last_histogram_ix_[j]
+			combined_histo[j] = histograms[self.curr_histogram_ix_]
+			histogramAddHistogramCommand(&combined_histo[j], &histograms[last_histogram_ix])
+			combined_entropy[j] = bitsEntropy(combined_histo[j].data_[0:], self.alphabet_size_)
+			diff[j] = combined_entropy[j] - entropy - last_entropy[j]
+		}
+
+		if split.num_types < maxNumberOfBlockTypes && diff[0] > self.split_threshold_ && diff[1] > self.split_threshold_ {
+			/* Create new block. */
+			split.lengths[self.num_blocks_] = uint32(self.block_size_)
+
+			split.types[self.num_blocks_] = byte(split.num_types)
+			self.last_histogram_ix_[1] = self.last_histogram_ix_[0]
+			self.last_histogram_ix_[0] = uint(byte(split.num_types))
+			last_entropy[1] = last_entropy[0]
+			last_entropy[0] = entropy
+			self.num_blocks_++
+			split.num_types++
+			self.curr_histogram_ix_++
+			if self.curr_histogram_ix_ < *self.histograms_size_ {
+				histogramClearCommand(&histograms[self.curr_histogram_ix_])
+			}
+			self.block_size_ = 0
+			self.merge_last_count_ = 0
+			self.target_block_size_ = self.min_block_size_
+		} else if diff[1] < diff[0]-20.0 {
+			split.lengths[self.num_blocks_] = uint32(self.block_size_)
+			split.types[self.num_blocks_] = split.types[self.num_blocks_-2]
+			/* Combine this block with second last block. */
+
+			var tmp uint = self.last_histogram_ix_[0]
+			self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
+			self.last_histogram_ix_[1] = tmp
+			histograms[self.last_histogram_ix_[0]] = combined_histo[1]
+			last_entropy[1] = last_entropy[0]
+			last_entropy[0] = combined_entropy[1]
+			self.num_blocks_++
+			self.block_size_ = 0
+			histogramClearCommand(&histograms[self.curr_histogram_ix_])
+			self.merge_last_count_ = 0
+			self.target_block_size_ = self.min_block_size_
+		} else {
+			/* Combine this block with last block. */
+			split.lengths[self.num_blocks_-1] += uint32(self.block_size_)
+
+			histograms[self.last_histogram_ix_[0]] = combined_histo[0]
+			last_entropy[0] = combined_entropy[0]
+			if split.num_types == 1 {
+				last_entropy[1] = last_entropy[0]
+			}
+
+			self.block_size_ = 0
+			histogramClearCommand(&histograms[self.curr_histogram_ix_])
+			self.merge_last_count_++
+			if self.merge_last_count_ > 1 {
+				self.target_block_size_ += self.min_block_size_
+			}
+		}
+	}
+
+	if is_final {
+		*self.histograms_size_ = split.num_types
+		split.num_blocks = self.num_blocks_
+	}
+}
+
+/* Adds the next symbol to the current histogram. When the current histogram
+   reaches the target size, decides on merging the block. */
+func blockSplitterAddSymbolCommand(self *blockSplitterCommand, symbol uint) {
+	histogramAddCommand(&self.histograms_[self.curr_histogram_ix_], symbol)
+	self.block_size_++
+	if self.block_size_ == self.target_block_size_ {
+		blockSplitterFinishBlockCommand(self, false) /* is_final = */
+	}
+}
@@ -0,0 +1,165 @@
+package brotli
+
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Greedy block splitter for one block category (literal, command or distance).
+ */
+type blockSplitterDistance struct {
+	alphabet_size_     uint
+	min_block_size_    uint
+	split_threshold_   float64
+	num_blocks_        uint
+	split_             *blockSplit
+	histograms_        []histogramDistance
+	histograms_size_   *uint
+	target_block_size_ uint
+	block_size_        uint
+	curr_histogram_ix_ uint
+	last_histogram_ix_ [2]uint
+	last_entropy_      [2]float64
+	merge_last_count_  uint
+}
+
+func initBlockSplitterDistance(self *blockSplitterDistance, alphabet_size uint, min_block_size uint, split_threshold float64, num_symbols uint, split *blockSplit, histograms *[]histogramDistance, histograms_size *uint) {
+	var max_num_blocks uint = num_symbols/min_block_size + 1
+	var max_num_types uint = brotli_min_size_t(max_num_blocks, maxNumberOfBlockTypes+1)
+	/* We have to allocate one more histogram than the maximum number of block
+	   types for the current histogram when the meta-block is too big. */
+	self.alphabet_size_ = alphabet_size
+
+	self.min_block_size_ = min_block_size
+	self.split_threshold_ = split_threshold
+	self.num_blocks_ = 0
+	self.split_ = split
+	self.histograms_size_ = histograms_size
+	self.target_block_size_ = min_block_size
+	self.block_size_ = 0
+	self.curr_histogram_ix_ = 0
+	self.merge_last_count_ = 0
+	brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks)
+	brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks)
+	self.split_.num_blocks = max_num_blocks
+	*histograms_size = max_num_types
+	if histograms == nil || cap(*histograms) < int(*histograms_size) {
+		*histograms = make([]histogramDistance, *histograms_size)
+	} else {
+		*histograms = (*histograms)[:*histograms_size]
+	}
+	self.histograms_ = *histograms
+
+	/* Clear only current histogram. */
+	histogramClearDistance(&self.histograms_[0])
+
+	self.last_histogram_ix_[1] = 0
+	self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
+}
+
+/* Does either of three things:
+   (1) emits the current block with a new block type;
+   (2) emits the current block with the type of the second last block;
+   (3) merges the current block with the last block. */
+func blockSplitterFinishBlockDistance(self *blockSplitterDistance, is_final bool) {
+	var split *blockSplit = self.split_
+	var last_entropy []float64 = self.last_entropy_[:]
+	var histograms []histogramDistance = self.histograms_
+	self.block_size_ = brotli_max_size_t(self.block_size_, self.min_block_size_)
+	if self.num_blocks_ == 0 {
+		/* Create first block. */
+		split.lengths[0] = uint32(self.block_size_)
+
+		split.types[0] = 0
+		last_entropy[0] = bitsEntropy(histograms[0].data_[:], self.alphabet_size_)
+		last_entropy[1] = last_entropy[0]
+		self.num_blocks_++
+		split.num_types++
+		self.curr_histogram_ix_++
+		if self.curr_histogram_ix_ < *self.histograms_size_ {
+			histogramClearDistance(&histograms[self.curr_histogram_ix_])
+		}
+		self.block_size_ = 0
+	} else if self.block_size_ > 0 {
+		var entropy float64 = bitsEntropy(histograms[self.curr_histogram_ix_].data_[:], self.alphabet_size_)
+		var combined_histo [2]histogramDistance
+		var combined_entropy [2]float64
+		var diff [2]float64
+		var j uint
+		for j = 0; j < 2; j++ {
+			var last_histogram_ix uint = self.last_histogram_ix_[j]
+			combined_histo[j] = histograms[self.curr_histogram_ix_]
+			histogramAddHistogramDistance(&combined_histo[j], &histograms[last_histogram_ix])
+			combined_entropy[j] = bitsEntropy(combined_histo[j].data_[0:], self.alphabet_size_)
+			diff[j] = combined_entropy[j] - entropy - last_entropy[j]
+		}
+
+		if split.num_types < maxNumberOfBlockTypes && diff[0] > self.split_threshold_ && diff[1] > self.split_threshold_ {
+			/* Create new block. */
+			split.lengths[self.num_blocks_] = uint32(self.block_size_)
+
+			split.types[self.num_blocks_] = byte(split.num_types)
+			self.last_histogram_ix_[1] = self.last_histogram_ix_[0]
+			self.last_histogram_ix_[0] = uint(byte(split.num_types))
+			last_entropy[1] = last_entropy[0]
+			last_entropy[0] = entropy
+			self.num_blocks_++
+			split.num_types++
+			self.curr_histogram_ix_++
+			if self.curr_histogram_ix_ < *self.histograms_size_ {
+				histogramClearDistance(&histograms[self.curr_histogram_ix_])
+			}
+			self.block_size_ = 0
+			self.merge_last_count_ = 0
+			self.target_block_size_ = self.min_block_size_
+		} else if diff[1] < diff[0]-20.0 {
+			split.lengths[self.num_blocks_] = uint32(self.block_size_)
+			split.types[self.num_blocks_] = split.types[self.num_blocks_-2]
+			/* Combine this block with second last block. */
+
+			var tmp uint = self.last_histogram_ix_[0]
+			self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
+			self.last_histogram_ix_[1] = tmp
+			histograms[self.last_histogram_ix_[0]] = combined_histo[1]
+			last_entropy[1] = last_entropy[0]
+			last_entropy[0] = combined_entropy[1]
+			self.num_blocks_++
+			self.block_size_ = 0
+			histogramClearDistance(&histograms[self.curr_histogram_ix_])
+			self.merge_last_count_ = 0
+			self.target_block_size_ = self.min_block_size_
+		} else {
+			/* Combine this block with last block. */
+			split.lengths[self.num_blocks_-1] += uint32(self.block_size_)
+
+			histograms[self.last_histogram_ix_[0]] = combined_histo[0]
+			last_entropy[0] = combined_entropy[0]
+			if split.num_types == 1 {
+				last_entropy[1] = last_entropy[0]
+			}
+
+			self.block_size_ = 0
+			histogramClearDistance(&histograms[self.curr_histogram_ix_])
+			self.merge_last_count_++
+			if self.merge_last_count_ > 1 {
+				self.target_block_size_ += self.min_block_size_
+			}
+		}
+	}
+
+	if is_final {
+		*self.histograms_size_ = split.num_types
+		split.num_blocks = self.num_blocks_
+	}
+}
+
+/* Adds the next symbol to the current histogram. When the current histogram
+   reaches the target size, decides on merging the block. */
+func blockSplitterAddSymbolDistance(self *blockSplitterDistance, symbol uint) {
+	histogramAddDistance(&self.histograms_[self.curr_histogram_ix_], symbol)
+	self.block_size_++
+	if self.block_size_ == self.target_block_size_ {
+		blockSplitterFinishBlockDistance(self, false) /* is_final = */
+	}
+}
@@ -0,0 +1,165 @@
+package brotli
+
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Greedy block splitter for one block category (literal, command or distance).
+ */
+type blockSplitterLiteral struct {
+	alphabet_size_     uint
+	min_block_size_    uint
+	split_threshold_   float64
+	num_blocks_        uint
+	split_             *blockSplit
+	histograms_        []histogramLiteral
+	histograms_size_   *uint
+	target_block_size_ uint
+	block_size_        uint
+	curr_histogram_ix_ uint
+	last_histogram_ix_ [2]uint
+	last_entropy_      [2]float64
+	merge_last_count_  uint
+}
+
+func initBlockSplitterLiteral(self *blockSplitterLiteral, alphabet_size uint, min_block_size uint, split_threshold float64, num_symbols uint, split *blockSplit, histograms *[]histogramLiteral, histograms_size *uint) {
+	var max_num_blocks uint = num_symbols/min_block_size + 1
+	var max_num_types uint = brotli_min_size_t(max_num_blocks, maxNumberOfBlockTypes+1)
+	/* We have to allocate one more histogram than the maximum number of block
+	   types for the current histogram when the meta-block is too big. */
+	self.alphabet_size_ = alphabet_size
+
+	self.min_block_size_ = min_block_size
+	self.split_threshold_ = split_threshold
+	self.num_blocks_ = 0
+	self.split_ = split
+	self.histograms_size_ = histograms_size
+	self.target_block_size_ = min_block_size
+	self.block_size_ = 0
+	self.curr_histogram_ix_ = 0
+	self.merge_last_count_ = 0
+	brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks)
+	brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks)
+	self.split_.num_blocks = max_num_blocks
+	*histograms_size = max_num_types
+	if histograms == nil || cap(*histograms) < int(*histograms_size) {
+		*histograms = make([]histogramLiteral, *histograms_size)
+	} else {
+		*histograms = (*histograms)[:*histograms_size]
+	}
+	self.histograms_ = *histograms
+
+	/* Clear only current histogram. */
+	histogramClearLiteral(&self.histograms_[0])
+
+	self.last_histogram_ix_[1] = 0
+	self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
+}
+
+/* Does either of three things:
+   (1) emits the current block with a new block type;
+   (2) emits the current block with the type of the second last block;
+   (3) merges the current block with the last block. */
+func blockSplitterFinishBlockLiteral(self *blockSplitterLiteral, is_final bool) {
+	var split *blockSplit = self.split_
+	var last_entropy []float64 = self.last_entropy_[:]
+	var histograms []histogramLiteral = self.histograms_
+	self.block_size_ = brotli_max_size_t(self.block_size_, self.min_block_size_)
+	if self.num_blocks_ == 0 {
+		/* Create first block. */
+		split.lengths[0] = uint32(self.block_size_)
+
+		split.types[0] = 0
+		last_entropy[0] = bitsEntropy(histograms[0].data_[:], self.alphabet_size_)
+		last_entropy[1] = last_entropy[0]
+		self.num_blocks_++
+		split.num_types++
+		self.curr_histogram_ix_++
+		if self.curr_histogram_ix_ < *self.histograms_size_ {
+			histogramClearLiteral(&histograms[self.curr_histogram_ix_])
+		}
+		self.block_size_ = 0
+	} else if self.block_size_ > 0 {
+		var entropy float64 = bitsEntropy(histograms[self.curr_histogram_ix_].data_[:], self.alphabet_size_)
+		var combined_histo [2]histogramLiteral
+		var combined_entropy [2]float64
+		var diff [2]float64
+		var j uint
+		for j = 0; j < 2; j++ {
+			var last_histogram_ix uint = self.last_histogram_ix_[j]
+			combined_histo[j] = histograms[self.curr_histogram_ix_]
+			histogramAddHistogramLiteral(&combined_histo[j], &histograms[last_histogram_ix])
+			combined_entropy[j] = bitsEntropy(combined_histo[j].data_[0:], self.alphabet_size_)
+			diff[j] = combined_entropy[j] - entropy - last_entropy[j]
+		}
+
+		if split.num_types < maxNumberOfBlockTypes && diff[0] > self.split_threshold_ && diff[1] > self.split_threshold_ {
+			/* Create new block. */
+			split.lengths[self.num_blocks_] = uint32(self.block_size_)
+
+			split.types[self.num_blocks_] = byte(split.num_types)
+			self.last_histogram_ix_[1] = self.last_histogram_ix_[0]
+			self.last_histogram_ix_[0] = uint(byte(split.num_types))
+			last_entropy[1] = last_entropy[0]
+			last_entropy[0] = entropy
+			self.num_blocks_++
+			split.num_types++
+			self.curr_histogram_ix_++
+			if self.curr_histogram_ix_ < *self.histograms_size_ {
+				histogramClearLiteral(&histograms[self.curr_histogram_ix_])
+			}
+			self.block_size_ = 0
+			self.merge_last_count_ = 0
+			self.target_block_size_ = self.min_block_size_
+		} else if diff[1] < diff[0]-20.0 {
+			split.lengths[self.num_blocks_] = uint32(self.block_size_)
+			split.types[self.num_blocks_] = split.types[self.num_blocks_-2]
+			/* Combine this block with second last block. */
+
+			var tmp uint = self.last_histogram_ix_[0]
+			self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
+			self.last_histogram_ix_[1] = tmp
+			histograms[self.last_histogram_ix_[0]] = combined_histo[1]
+			last_entropy[1] = last_entropy[0]
+			last_entropy[0] = combined_entropy[1]
+			self.num_blocks_++
+			self.block_size_ = 0
+			histogramClearLiteral(&histograms[self.curr_histogram_ix_])
+			self.merge_last_count_ = 0
+			self.target_block_size_ = self.min_block_size_
+		} else {
+			/* Combine this block with last block. */
+			split.lengths[self.num_blocks_-1] += uint32(self.block_size_)
+
+			histograms[self.last_histogram_ix_[0]] = combined_histo[0]
+			last_entropy[0] = combined_entropy[0]
+			if split.num_types == 1 {
+				last_entropy[1] = last_entropy[0]
+			}
+
+			self.block_size_ = 0
+			histogramClearLiteral(&histograms[self.curr_histogram_ix_])
+			self.merge_last_count_++
+			if self.merge_last_count_ > 1 {
+				self.target_block_size_ += self.min_block_size_
+			}
+		}
+	}
+
+	if is_final {
+		*self.histograms_size_ = split.num_types
+		split.num_blocks = self.num_blocks_
+	}
+}
+
+/* Adds the next symbol to the current histogram. When the current histogram
+   reaches the target size, decides on merging the block. */
+func blockSplitterAddSymbolLiteral(self *blockSplitterLiteral, symbol uint) {
+	histogramAddLiteral(&self.histograms_[self.curr_histogram_ix_], symbol)
+	self.block_size_++
+	if self.block_size_ == self.target_block_size_ {
+		blockSplitterFinishBlockLiteral(self, false) /* is_final = */
+	}
+}
@@ -0,0 +1,37 @@
+package brotli
+
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Parameters for the Brotli encoder with chosen quality levels. */
+type hasherParams struct {
+	type_                       int
+	bucket_bits                 int
+	block_bits                  int
+	hash_len                    int
+	num_last_distances_to_check int
+}
+
+type distanceParams struct {
+	distance_postfix_bits     uint32
+	num_direct_distance_codes uint32
+	alphabet_size             uint32
+	max_distance              uint
+}
+
+/* Encoding parameters */
+type encoderParams struct {
+	mode                             int
+	quality                          int
+	lgwin                            uint
+	lgblock                          int
+	size_hint                        uint
+	disable_literal_context_modeling bool
+	large_window                     bool
+	hasher                           hasherParams
+	dist                             distanceParams
+	dictionary                       encoderDictionary
+}
@@ -0,0 +1,103 @@
+package brotli
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+func brotli_min_double(a float64, b float64) float64 {
+	if a < b {
+		return a
+	} else {
+		return b
+	}
+}
+
+func brotli_max_double(a float64, b float64) float64 {
+	if a > b {
+		return a
+	} else {
+		return b
+	}
+}
+
+func brotli_min_float(a float32, b float32) float32 {
+	if a < b {
+		return a
+	} else {
+		return b
+	}
+}
+
+func brotli_max_float(a float32, b float32) float32 {
+	if a > b {
+		return a
+	} else {
+		return b
+	}
+}
+
+func brotli_min_int(a int, b int) int {
+	if a < b {
+		return a
+	} else {
+		return b
+	}
+}
+
+func brotli_max_int(a int, b int) int {
+	if a > b {
+		return a
+	} else {
+		return b
+	}
+}
+
+func brotli_min_size_t(a uint, b uint) uint {
+	if a < b {
+		return a
+	} else {
+		return b
+	}
+}
+
+func brotli_max_size_t(a uint, b uint) uint {
+	if a > b {
+		return a
+	} else {
+		return b
+	}
+}
+
+func brotli_min_uint32_t(a uint32, b uint32) uint32 {
+	if a < b {
+		return a
+	} else {
+		return b
+	}
+}
+
+func brotli_max_uint32_t(a uint32, b uint32) uint32 {
+	if a > b {
+		return a
+	} else {
+		return b
+	}
+}
+
+func brotli_min_uint8_t(a byte, b byte) byte {
+	if a < b {
+		return a
+	} else {
+		return b
+	}
+}
+
+func brotli_max_uint8_t(a byte, b byte) byte {
+	if a > b {
+		return a
+	} else {
+		return b
+	}
+}
@@ -0,0 +1,30 @@
+package brotli
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions for encoding of integers into prefix codes the amount of extra
+   bits, and the actual values of the extra bits. */
+
+/* Here distance_code is an intermediate code, i.e. one of the special codes or
+   the actual distance increased by BROTLI_NUM_DISTANCE_SHORT_CODES - 1. */
+func prefixEncodeCopyDistance(distance_code uint, num_direct_codes uint, postfix_bits uint, code *uint16, extra_bits *uint32) {
+	if distance_code < numDistanceShortCodes+num_direct_codes {
+		*code = uint16(distance_code)
+		*extra_bits = 0
+		return
+	} else {
+		var dist uint = (uint(1) << (postfix_bits + 2)) + (distance_code - numDistanceShortCodes - num_direct_codes)
+		var bucket uint = uint(log2FloorNonZero(dist) - 1)
+		var postfix_mask uint = (1 << postfix_bits) - 1
+		var postfix uint = dist & postfix_mask
+		var prefix uint = (dist >> bucket) & 1
+		var offset uint = (2 + prefix) << bucket
+		var nbits uint = bucket - postfix_bits
+		*code = uint16(nbits<<10 | (numDistanceShortCodes + num_direct_codes + ((2*(nbits-1) + prefix) << postfix_bits) + postfix))
+		*extra_bits = uint32((dist - offset) >> postfix_bits)
+	}
+}
@@ -0,0 +1,723 @@
+package brotli
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+type cmdLutElement struct {
+	insert_len_extra_bits byte
+	copy_len_extra_bits   byte
+	distance_code         int8
+	context               byte
+	insert_len_offset     uint16
+	copy_len_offset       uint16
+}
+
+var kCmdLut = [numCommandSymbols]cmdLutElement{
+	cmdLutElement{0x00, 0x00, 0, 0x00, 0x0000, 0x0002},
+	cmdLutElement{0x00, 0x00, 0, 0x01, 0x0000, 0x0003},
+	cmdLutElement{0x00, 0x00, 0, 0x02, 0x0000, 0x0004},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0000, 0x0005},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0000, 0x0006},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0000, 0x0007},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0000, 0x0008},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0000, 0x0009},
+	cmdLutElement{0x00, 0x00, 0, 0x00, 0x0001, 0x0002},
+	cmdLutElement{0x00, 0x00, 0, 0x01, 0x0001, 0x0003},
+	cmdLutElement{0x00, 0x00, 0, 0x02, 0x0001, 0x0004},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0001, 0x0005},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0001, 0x0006},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0001, 0x0007},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0001, 0x0008},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0001, 0x0009},
+	cmdLutElement{0x00, 0x00, 0, 0x00, 0x0002, 0x0002},
+	cmdLutElement{0x00, 0x00, 0, 0x01, 0x0002, 0x0003},
+	cmdLutElement{0x00, 0x00, 0, 0x02, 0x0002, 0x0004},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0002, 0x0005},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0002, 0x0006},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0002, 0x0007},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0002, 0x0008},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0002, 0x0009},
+	cmdLutElement{0x00, 0x00, 0, 0x00, 0x0003, 0x0002},
+	cmdLutElement{0x00, 0x00, 0, 0x01, 0x0003, 0x0003},
+	cmdLutElement{0x00, 0x00, 0, 0x02, 0x0003, 0x0004},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0003, 0x0005},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0003, 0x0006},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0003, 0x0007},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0003, 0x0008},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0003, 0x0009},
+	cmdLutElement{0x00, 0x00, 0, 0x00, 0x0004, 0x0002},
+	cmdLutElement{0x00, 0x00, 0, 0x01, 0x0004, 0x0003},
+	cmdLutElement{0x00, 0x00, 0, 0x02, 0x0004, 0x0004},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0004, 0x0005},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0004, 0x0006},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0004, 0x0007},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0004, 0x0008},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0004, 0x0009},
+	cmdLutElement{0x00, 0x00, 0, 0x00, 0x0005, 0x0002},
+	cmdLutElement{0x00, 0x00, 0, 0x01, 0x0005, 0x0003},
+	cmdLutElement{0x00, 0x00, 0, 0x02, 0x0005, 0x0004},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0005, 0x0005},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0005, 0x0006},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0005, 0x0007},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0005, 0x0008},
+	cmdLutElement{0x00, 0x00, 0, 0x03, 0x0005, 0x0009},
+	cmdLutElement{0x01, 0x00, 0, 0x00, 0x0006, 0x0002},
+	cmdLutElement{0x01, 0x00, 0, 0x01, 0x0006, 0x0003},
+	cmdLutElement{0x01, 0x00, 0, 0x02, 0x0006, 0x0004},
+	cmdLutElement{0x01, 0x00, 0, 0x03, 0x0006, 0x0005},
+	cmdLutElement{0x01, 0x00, 0, 0x03, 0x0006, 0x0006},
+	cmdLutElement{0x01, 0x00, 0, 0x03, 0x0006, 0x0007},
+	cmdLutElement{0x01, 0x00, 0, 0x03, 0x0006, 0x0008},
+	cmdLutElement{0x01, 0x00, 0, 0x03, 0x0006, 0x0009},
+	cmdLutElement{0x01, 0x00, 0, 0x00, 0x0008, 0x0002},
+	cmdLutElement{0x01, 0x00, 0, 0x01, 0x0008, 0x0003},
+	cmdLutElement{0x01, 0x00, 0, 0x02, 0x0008, 0x0004},
+	cmdLutElement{0x01, 0x00, 0, 0x03, 0x0008, 0x0005},
+	cmdLutElement{0x01, 0x00, 0, 0x03, 0x0008, 0x0006},
+	cmdLutElement{0x01, 0x00, 0, 0x03, 0x0008, 0x0007},
+	cmdLutElement{0x01, 0x00, 0, 0x03, 0x0008, 0x0008},
+	cmdLutElement{0x01, 0x00, 0, 0x03, 0x0008, 0x0009},
+	cmdLutElement{0x00, 0x01, 0, 0x03, 0x0000, 0x000a},
+	cmdLutElement{0x00, 0x01, 0, 0x03, 0x0000, 0x000c},
+	cmdLutElement{0x00, 0x02, 0, 0x03, 0x0000, 0x000e},
+	cmdLutElement{0x00, 0x02, 0, 0x03, 0x0000, 0x0012},
+	cmdLutElement{0x00, 0x03, 0, 0x03, 0x0000, 0x0016},
+	cmdLutElement{0x00, 0x03, 0, 0x03, 0x0000, 0x001e},
+	cmdLutElement{0x00, 0x04, 0, 0x03, 0x0000, 0x0026},
+	cmdLutElement{0x00, 0x04, 0, 0x03, 0x0000, 0x0036},
+	cmdLutElement{0x00, 0x01, 0, 0x03, 0x0001, 0x000a},
+	cmdLutElement{0x00, 0x01, 0, 0x03, 0x0001, 0x000c},
+	cmdLutElement{0x00, 0x02, 0, 0x03, 0x0001, 0x000e},
+	cmdLutElement{0x00, 0x02, 0, 0x03, 0x0001, 0x0012},
+	cmdLutElement{0x00, 0x03, 0, 0x03, 0x0001, 0x0016},
+	cmdLutElement{0x00, 0x03, 0, 0x03, 0x0001, 0x001e},
+	cmdLutElement{0x00, 0x04, 0, 0x03, 0x0001, 0x0026},
+	cmdLutElement{0x00, 0x04, 0, 0x03, 0x0001, 0x0036},
+	cmdLutElement{0x00, 0x01, 0, 0x03, 0x0002, 0x000a},
+	cmdLutElement{0x00, 0x01, 0, 0x03, 0x0002, 0x000c},
+	cmdLutElement{0x00, 0x02, 0, 0x03, 0x0002, 0x000e},
+	cmdLutElement{0x00, 0x02, 0, 0x03, 0x0002, 0x0012},
+	cmdLutElement{0x00, 0x03, 0, 0x03, 0x0002, 0x0016},
+	cmdLutElement{0x00, 0x03, 0, 0x03, 0x0002, 0x001e},
+	cmdLutElement{0x00, 0x04, 0, 0x03, 0x0002, 0x0026},
+	cmdLutElement{0x00, 0x04, 0, 0x03, 0x0002, 0x0036},
+	cmdLutElement{0x00, 0x01, 0, 0x03, 0x0003, 0x000a},
+	cmdLutElement{0x00, 0x01, 0, 0x03, 0x0003, 0x000c},
+	cmdLutElement{0x00, 0x02, 0, 0x03, 0x0003, 0x000e},
+	cmdLutElement{0x00, 0x02, 0, 0x03, 0x0003, 0x0012},
+	cmdLutElement{0x00, 0x03, 0, 0x03, 0x0003, 0x0016},
+	cmdLutElement{0x00, 0x03, 0, 0x03, 0x0003, 0x001e},
+	cmdLutElement{0x00, 0x04, 0, 0x03, 0x0003, 0x0026},
+	cmdLutElement{0x00, 0x04, 0, 0x03, 0x0003, 0x0036},
+	cmdLutElement{0x00, 0x01, 0, 0x03, 0x0004, 0x000a},
+	cmdLutElement{0x00, 0x01, 0, 0x03, 0x0004, 0x000c},
+	cmdLutElement{0x00, 0x02, 0, 0x03, 0x0004, 0x000e},
+	cmdLutElement{0x00, 0x02, 0, 0x03, 0x0004, 0x0012},
+	cmdLutElement{0x00, 0x03, 0, 0x03, 0x0004, 0x0016},
+	cmdLutElement{0x00, 0x03, 0, 0x03, 0x0004, 0x001e},
+	cmdLutElement{0x00, 0x04, 0, 0x03, 0x0004, 0x0026},
+	cmdLutElement{0x00, 0x04, 0, 0x03, 0x0004, 0x0036},
+	cmdLutElement{0x00, 0x01, 0, 0x03, 0x0005, 0x000a},
+	cmdLutElement{0x00, 0x01, 0, 0x03, 0x0005, 0x000c},
+	cmdLutElement{0x00, 0x02, 0, 0x03, 0x0005, 0x000e},
+	cmdLutElement{0x00, 0x02, 0, 0x03, 0x0005, 0x0012},
+	cmdLutElement{0x00, 0x03, 0, 0x03, 0x0005, 0x0016},
+	cmdLutElement{0x00, 0x03, 0, 0x03, 0x0005, 0x001e},
+	cmdLutElement{0x00, 0x04, 0, 0x03, 0x0005, 0x0026},
+	cmdLutElement{0x00, 0x04, 0, 0x03, 0x0005, 0x0036},
+	cmdLutElement{0x01, 0x01, 0, 0x03, 0x0006, 0x000a},
+	cmdLutElement{0x01, 0x01, 0, 0x03, 0x0006, 0x000c},
+	cmdLutElement{0x01, 0x02, 0, 0x03, 0x0006, 0x000e},
+	cmdLutElement{0x01, 0x02, 0, 0x03, 0x0006, 0x0012},
+	cmdLutElement{0x01, 0x03, 0, 0x03, 0x0006, 0x0016},
+	cmdLutElement{0x01, 0x03, 0, 0x03, 0x0006, 0x001e},
+	cmdLutElement{0x01, 0x04, 0, 0x03, 0x0006, 0x0026},
+	cmdLutElement{0x01, 0x04, 0, 0x03, 0x0006, 0x0036},
+	cmdLutElement{0x01, 0x01, 0, 0x03, 0x0008, 0x000a},
+	cmdLutElement{0x01, 0x01, 0, 0x03, 0x0008, 0x000c},
+	cmdLutElement{0x01, 0x02, 0, 0x03, 0x0008, 0x000e},
+	cmdLutElement{0x01, 0x02, 0, 0x03, 0x0008, 0x0012},
+	cmdLutElement{0x01, 0x03, 0, 0x03, 0x0008, 0x0016},
+	cmdLutElement{0x01, 0x03, 0, 0x03, 0x0008, 0x001e},
+	cmdLutElement{0x01, 0x04, 0, 0x03, 0x0008, 0x0026},
+	cmdLutElement{0x01, 0x04, 0, 0x03, 0x0008, 0x0036},
+	cmdLutElement{0x00, 0x00, -1, 0x00, 0x0000, 0x0002},
+	cmdLutElement{0x00, 0x00, -1, 0x01, 0x0000, 0x0003},
+	cmdLutElement{0x00, 0x00, -1, 0x02, 0x0000, 0x0004},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0000, 0x0005},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0000, 0x0006},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0000, 0x0007},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0000, 0x0008},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0000, 0x0009},
+	cmdLutElement{0x00, 0x00, -1, 0x00, 0x0001, 0x0002},
+	cmdLutElement{0x00, 0x00, -1, 0x01, 0x0001, 0x0003},
+	cmdLutElement{0x00, 0x00, -1, 0x02, 0x0001, 0x0004},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0001, 0x0005},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0001, 0x0006},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0001, 0x0007},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0001, 0x0008},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0001, 0x0009},
+	cmdLutElement{0x00, 0x00, -1, 0x00, 0x0002, 0x0002},
+	cmdLutElement{0x00, 0x00, -1, 0x01, 0x0002, 0x0003},
+	cmdLutElement{0x00, 0x00, -1, 0x02, 0x0002, 0x0004},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0002, 0x0005},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0002, 0x0006},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0002, 0x0007},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0002, 0x0008},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0002, 0x0009},
+	cmdLutElement{0x00, 0x00, -1, 0x00, 0x0003, 0x0002},
+	cmdLutElement{0x00, 0x00, -1, 0x01, 0x0003, 0x0003},
+	cmdLutElement{0x00, 0x00, -1, 0x02, 0x0003, 0x0004},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0003, 0x0005},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0003, 0x0006},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0003, 0x0007},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0003, 0x0008},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0003, 0x0009},
+	cmdLutElement{0x00, 0x00, -1, 0x00, 0x0004, 0x0002},
+	cmdLutElement{0x00, 0x00, -1, 0x01, 0x0004, 0x0003},
+	cmdLutElement{0x00, 0x00, -1, 0x02, 0x0004, 0x0004},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0004, 0x0005},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0004, 0x0006},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0004, 0x0007},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0004, 0x0008},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0004, 0x0009},
+	cmdLutElement{0x00, 0x00, -1, 0x00, 0x0005, 0x0002},
+	cmdLutElement{0x00, 0x00, -1, 0x01, 0x0005, 0x0003},
+	cmdLutElement{0x00, 0x00, -1, 0x02, 0x0005, 0x0004},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0005, 0x0005},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0005, 0x0006},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0005, 0x0007},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0005, 0x0008},
+	cmdLutElement{0x00, 0x00, -1, 0x03, 0x0005, 0x0009},
+	cmdLutElement{0x01, 0x00, -1, 0x00, 0x0006, 0x0002},
+	cmdLutElement{0x01, 0x00, -1, 0x01, 0x0006, 0x0003},
+	cmdLutElement{0x01, 0x00, -1, 0x02, 0x0006, 0x0004},
+	cmdLutElement{0x01, 0x00, -1, 0x03, 0x0006, 0x0005},
+	cmdLutElement{0x01, 0x00, -1, 0x03, 0x0006, 0x0006},
+	cmdLutElement{0x01, 0x00, -1, 0x03, 0x0006, 0x0007},
+	cmdLutElement{0x01, 0x00, -1, 0x03, 0x0006, 0x0008},
+	cmdLutElement{0x01, 0x00, -1, 0x03, 0x0006, 0x0009},
+	cmdLutElement{0x01, 0x00, -1, 0x00, 0x0008, 0x0002},
+	cmdLutElement{0x01, 0x00, -1, 0x01, 0x0008, 0x0003},
+	cmdLutElement{0x01, 0x00, -1, 0x02, 0x0008, 0x0004},
+	cmdLutElement{0x01, 0x00, -1, 0x03, 0x0008, 0x0005},
+	cmdLutElement{0x01, 0x00, -1, 0x03, 0x0008, 0x0006},
+	cmdLutElement{0x01, 0x00, -1, 0x03, 0x0008, 0x0007},
+	cmdLutElement{0x01, 0x00, -1, 0x03, 0x0008, 0x0008},
+	cmdLutElement{0x01, 0x00, -1, 0x03, 0x0008, 0x0009},
+	cmdLutElement{0x00, 0x01, -1, 0x03, 0x0000, 0x000a},
+	cmdLutElement{0x00, 0x01, -1, 0x03, 0x0000, 0x000c},
+	cmdLutElement{0x00, 0x02, -1, 0x03, 0x0000, 0x000e},
+	cmdLutElement{0x00, 0x02, -1, 0x03, 0x0000, 0x0012},
+	cmdLutElement{0x00, 0x03, -1, 0x03, 0x0000, 0x0016},
+	cmdLutElement{0x00, 0x03, -1, 0x03, 0x0000, 0x001e},
+	cmdLutElement{0x00, 0x04, -1, 0x03, 0x0000, 0x0026},
+	cmdLutElement{0x00, 0x04, -1, 0x03, 0x0000, 0x0036},
+	cmdLutElement{0x00, 0x01, -1, 0x03, 0x0001, 0x000a},
+	cmdLutElement{0x00, 0x01, -1, 0x03, 0x0001, 0x000c},
+	cmdLutElement{0x00, 0x02, -1, 0x03, 0x0001, 0x000e},
+	cmdLutElement{0x00, 0x02, -1, 0x03, 0x0001, 0x0012},
+	cmdLutElement{0x00, 0x03, -1, 0x03, 0x0001, 0x0016},
+	cmdLutElement{0x00, 0x03, -1, 0x03, 0x0001, 0x001e},
+	cmdLutElement{0x00, 0x04, -1, 0x03, 0x0001, 0x0026},
+	cmdLutElement{0x00, 0x04, -1, 0x03, 0x0001, 0x0036},
+	cmdLutElement{0x00, 0x01, -1, 0x03, 0x0002, 0x000a},
+	cmdLutElement{0x00, 0x01, -1, 0x03, 0x0002, 0x000c},
+	cmdLutElement{0x00, 0x02, -1, 0x03, 0x0002, 0x000e},
+	cmdLutElement{0x00, 0x02, -1, 0x03, 0x0002, 0x0012},
+	cmdLutElement{0x00, 0x03, -1, 0x03, 0x0002, 0x0016},
+	cmdLutElement{0x00, 0x03, -1, 0x03, 0x0002, 0x001e},
+	cmdLutElement{0x00, 0x04, -1, 0x03, 0x0002, 0x0026},
+	cmdLutElement{0x00, 0x04, -1, 0x03, 0x0002, 0x0036},
+	cmdLutElement{0x00, 0x01, -1, 0x03, 0x0003, 0x000a},
+	cmdLutElement{0x00, 0x01, -1, 0x03, 0x0003, 0x000c},
+	cmdLutElement{0x00, 0x02, -1, 0x03, 0x0003, 0x000e},
+	cmdLutElement{0x00, 0x02, -1, 0x03, 0x0003, 0x0012},
+	cmdLutElement{0x00, 0x03, -1, 0x03, 0x0003, 0x0016},
+	cmdLutElement{0x00, 0x03, -1, 0x03, 0x0003, 0x001e},
+	cmdLutElement{0x00, 0x04, -1, 0x03, 0x0003, 0x0026},
+	cmdLutElement{0x00, 0x04, -1, 0x03, 0x0003, 0x0036},
+	cmdLutElement{0x00, 0x01, -1, 0x03, 0x0004, 0x000a},
+	cmdLutElement{0x00, 0x01, -1, 0x03, 0x0004, 0x000c},
+	cmdLutElement{0x00, 0x02, -1, 0x03, 0x0004, 0x000e},
+	cmdLutElement{0x00, 0x02, -1, 0x03, 0x0004, 0x0012},
+	cmdLutElement{0x00, 0x03, -1, 0x03, 0x0004, 0x0016},
+	cmdLutElement{0x00, 0x03, -1, 0x03, 0x0004, 0x001e},
+	cmdLutElement{0x00, 0x04, -1, 0x03, 0x0004, 0x0026},
+	cmdLutElement{0x00, 0x04, -1, 0x03, 0x0004, 0x0036},
+	cmdLutElement{0x00, 0x01, -1, 0x03, 0x0005, 0x000a},
+	cmdLutElement{0x00, 0x01, -1, 0x03, 0x0005, 0x000c},
+	cmdLutElement{0x00, 0x02, -1, 0x03, 0x0005, 0x000e},
+	cmdLutElement{0x00, 0x02, -1, 0x03, 0x0005, 0x0012},
+	cmdLutElement{0x00, 0x03, -1, 0x03, 0x0005, 0x0016},
+	cmdLutElement{0x00, 0x03, -1, 0x03, 0x0005, 0x001e},
+	cmdLutElement{0x00, 0x04, -1, 0x03, 0x0005, 0x0026},
+	cmdLutElement{0x00, 0x04, -1, 0x03, 0x0005, 0x0036},
+	cmdLutElement{0x01, 0x01, -1, 0x03, 0x0006, 0x000a},
+	cmdLutElement{0x01, 0x01, -1, 0x03, 0x0006, 0x000c},
+	cmdLutElement{0x01, 0x02, -1, 0x03, 0x0006, 0x000e},
+	cmdLutElement{0x01, 0x02, -1, 0x03, 0x0006, 0x0012},
+	cmdLutElement{0x01, 0x03, -1, 0x03, 0x0006, 0x0016},
+	cmdLutElement{0x01, 0x03, -1, 0x03, 0x0006, 0x001e},
+	cmdLutElement{0x01, 0x04, -1, 0x03, 0x0006, 0x0026},
+	cmdLutElement{0x01, 0x04, -1, 0x03, 0x0006, 0x0036},
+	cmdLutElement{0x01, 0x01, -1, 0x03, 0x0008, 0x000a},
+	cmdLutElement{0x01, 0x01, -1, 0x03, 0x0008, 0x000c},
+	cmdLutElement{0x01, 0x02, -1, 0x03, 0x0008, 0x000e},
+	cmdLutElement{0x01, 0x02, -1, 0x03, 0x0008, 0x0012},
+	cmdLutElement{0x01, 0x03, -1, 0x03, 0x0008, 0x0016},
+	cmdLutElement{0x01, 0x03, -1, 0x03, 0x0008, 0x001e},
+	cmdLutElement{0x01, 0x04, -1, 0x03, 0x0008, 0x0026},
+	cmdLutElement{0x01, 0x04, -1, 0x03, 0x0008, 0x0036},
+	cmdLutElement{0x02, 0x00, -1, 0x00, 0x000a, 0x0002},
+	cmdLutElement{0x02, 0x00, -1, 0x01, 0x000a, 0x0003},
+	cmdLutElement{0x02, 0x00, -1, 0x02, 0x000a, 0x0004},
+	cmdLutElement{0x02, 0x00, -1, 0x03, 0x000a, 0x0005},
+	cmdLutElement{0x02, 0x00, -1, 0x03, 0x000a, 0x0006},
+	cmdLutElement{0x02, 0x00, -1, 0x03, 0x000a, 0x0007},
+	cmdLutElement{0x02, 0x00, -1, 0x03, 0x000a, 0x0008},
+	cmdLutElement{0x02, 0x00, -1, 0x03, 0x000a, 0x0009},
+	cmdLutElement{0x02, 0x00, -1, 0x00, 0x000e, 0x0002},
+	cmdLutElement{0x02, 0x00, -1, 0x01, 0x000e, 0x0003},
+	cmdLutElement{0x02, 0x00, -1, 0x02, 0x000e, 0x0004},
+	cmdLutElement{0x02, 0x00, -1, 0x03, 0x000e, 0x0005},
+	cmdLutElement{0x02, 0x00, -1, 0x03, 0x000e, 0x0006},
+	cmdLutElement{0x02, 0x00, -1, 0x03, 0x000e, 0x0007},
+	cmdLutElement{0x02, 0x00, -1, 0x03, 0x000e, 0x0008},
+	cmdLutElement{0x02, 0x00, -1, 0x03, 0x000e, 0x0009},
+	cmdLutElement{0x03, 0x00, -1, 0x00, 0x0012, 0x0002},
+	cmdLutElement{0x03, 0x00, -1, 0x01, 0x0012, 0x0003},
+	cmdLutElement{0x03, 0x00, -1, 0x02, 0x0012, 0x0004},
+	cmdLutElement{0x03, 0x00, -1, 0x03, 0x0012, 0x0005},
+	cmdLutElement{0x03, 0x00, -1, 0x03, 0x0012, 0x0006},
+	cmdLutElement{0x03, 0x00, -1, 0x03, 0x0012, 0x0007},
+	cmdLutElement{0x03, 0x00, -1, 0x03, 0x0012, 0x0008},
+	cmdLutElement{0x03, 0x00, -1, 0x03, 0x0012, 0x0009},
+	cmdLutElement{0x03, 0x00, -1, 0x00, 0x001a, 0x0002},
+	cmdLutElement{0x03, 0x00, -1, 0x01, 0x001a, 0x0003},
+	cmdLutElement{0x03, 0x00, -1, 0x02, 0x001a, 0x0004},
+	cmdLutElement{0x03, 0x00, -1, 0x03, 0x001a, 0x0005},
+	cmdLutElement{0x03, 0x00, -1, 0x03, 0x001a, 0x0006},
+	cmdLutElement{0x03, 0x00, -1, 0x03, 0x001a, 0x0007},
+	cmdLutElement{0x03, 0x00, -1, 0x03, 0x001a, 0x0008},
+	cmdLutElement{0x03, 0x00, -1, 0x03, 0x001a, 0x0009},
+	cmdLutElement{0x04, 0x00, -1, 0x00, 0x0022, 0x0002},
+	cmdLutElement{0x04, 0x00, -1, 0x01, 0x0022, 0x0003},
+	cmdLutElement{0x04, 0x00, -1, 0x02, 0x0022, 0x0004},
+	cmdLutElement{0x04, 0x00, -1, 0x03, 0x0022, 0x0005},
+	cmdLutElement{0x04, 0x00, -1, 0x03, 0x0022, 0x0006},
+	cmdLutElement{0x04, 0x00, -1, 0x03, 0x0022, 0x0007},
+	cmdLutElement{0x04, 0x00, -1, 0x03, 0x0022, 0x0008},
+	cmdLutElement{0x04, 0x00, -1, 0x03, 0x0022, 0x0009},
+	cmdLutElement{0x04, 0x00, -1, 0x00, 0x0032, 0x0002},
+	cmdLutElement{0x04, 0x00, -1, 0x01, 0x0032, 0x0003},
+	cmdLutElement{0x04, 0x00, -1, 0x02, 0x0032, 0x0004},
+	cmdLutElement{0x04, 0x00, -1, 0x03, 0x0032, 0x0005},
+	cmdLutElement{0x04, 0x00, -1, 0x03, 0x0032, 0x0006},
+	cmdLutElement{0x04, 0x00, -1, 0x03, 0x0032, 0x0007},
+	cmdLutElement{0x04, 0x00, -1, 0x03, 0x0032, 0x0008},
+	cmdLutElement{0x04, 0x00, -1, 0x03, 0x0032, 0x0009},
+	cmdLutElement{0x05, 0x00, -1, 0x00, 0x0042, 0x0002},
+	cmdLutElement{0x05, 0x00, -1, 0x01, 0x0042, 0x0003},
+	cmdLutElement{0x05, 0x00, -1, 0x02, 0x0042, 0x0004},
+	cmdLutElement{0x05, 0x00, -1, 0x03, 0x0042, 0x0005},
+	cmdLutElement{0x05, 0x00, -1, 0x03, 0x0042, 0x0006},
+	cmdLutElement{0x05, 0x00, -1, 0x03, 0x0042, 0x0007},
+	cmdLutElement{0x05, 0x00, -1, 0x03, 0x0042, 0x0008},
+	cmdLutElement{0x05, 0x00, -1, 0x03, 0x0042, 0x0009},
+	cmdLutElement{0x05, 0x00, -1, 0x00, 0x0062, 0x0002},
+	cmdLutElement{0x05, 0x00, -1, 0x01, 0x0062, 0x0003},
+	cmdLutElement{0x05, 0x00, -1, 0x02, 0x0062, 0x0004},
+	cmdLutElement{0x05, 0x00, -1, 0x03, 0x0062, 0x0005},
+	cmdLutElement{0x05, 0x00, -1, 0x03, 0x0062, 0x0006},
+	cmdLutElement{0x05, 0x00, -1, 0x03, 0x0062, 0x0007},
+	cmdLutElement{0x05, 0x00, -1, 0x03, 0x0062, 0x0008},
+	cmdLutElement{0x05, 0x00, -1, 0x03, 0x0062, 0x0009},
+	cmdLutElement{0x02, 0x01, -1, 0x03, 0x000a, 0x000a},
+	cmdLutElement{0x02, 0x01, -1, 0x03, 0x000a, 0x000c},
+	cmdLutElement{0x02, 0x02, -1, 0x03, 0x000a, 0x000e},
+	cmdLutElement{0x02, 0x02, -1, 0x03, 0x000a, 0x0012},
+	cmdLutElement{0x02, 0x03, -1, 0x03, 0x000a, 0x0016},
+	cmdLutElement{0x02, 0x03, -1, 0x03, 0x000a, 0x001e},
+	cmdLutElement{0x02, 0x04, -1, 0x03, 0x000a, 0x0026},
+	cmdLutElement{0x02, 0x04, -1, 0x03, 0x000a, 0x0036},
+	cmdLutElement{0x02, 0x01, -1, 0x03, 0x000e, 0x000a},
+	cmdLutElement{0x02, 0x01, -1, 0x03, 0x000e, 0x000c},
+	cmdLutElement{0x02, 0x02, -1, 0x03, 0x000e, 0x000e},
+	cmdLutElement{0x02, 0x02, -1, 0x03, 0x000e, 0x0012},
+	cmdLutElement{0x02, 0x03, -1, 0x03, 0x000e, 0x0016},
+	cmdLutElement{0x02, 0x03, -1, 0x03, 0x000e, 0x001e},
+	cmdLutElement{0x02, 0x04, -1, 0x03, 0x000e, 0x0026},
+	cmdLutElement{0x02, 0x04, -1, 0x03, 0x000e, 0x0036},
+	cmdLutElement{0x03, 0x01, -1, 0x03, 0x0012, 0x000a},
+	cmdLutElement{0x03, 0x01, -1, 0x03, 0x0012, 0x000c},
+	cmdLutElement{0x03, 0x02, -1, 0x03, 0x0012, 0x000e},
+	cmdLutElement{0x03, 0x02, -1, 0x03, 0x0012, 0x0012},
+	cmdLutElement{0x03, 0x03, -1, 0x03, 0x0012, 0x0016},
+	cmdLutElement{0x03, 0x03, -1, 0x03, 0x0012, 0x001e},
+	cmdLutElement{0x03, 0x04, -1, 0x03, 0x0012, 0x0026},
+	cmdLutElement{0x03, 0x04, -1, 0x03, 0x0012, 0x0036},
+	cmdLutElement{0x03, 0x01, -1, 0x03, 0x001a, 0x000a},
+	cmdLutElement{0x03, 0x01, -1, 0x03, 0x001a, 0x000c},
+	cmdLutElement{0x03, 0x02, -1, 0x03, 0x001a, 0x000e},
+	cmdLutElement{0x03, 0x02, -1, 0x03, 0x001a, 0x0012},
+	cmdLutElement{0x03, 0x03, -1, 0x03, 0x001a, 0x0016},
+	cmdLutElement{0x03, 0x03, -1, 0x03, 0x001a, 0x001e},
+	cmdLutElement{0x03, 0x04, -1, 0x03, 0x001a, 0x0026},
+	cmdLutElement{0x03, 0x04, -1, 0x03, 0x001a, 0x0036},
+	cmdLutElement{0x04, 0x01, -1, 0x03, 0x0022, 0x000a},
+	cmdLutElement{0x04, 0x01, -1, 0x03, 0x0022, 0x000c},
+	cmdLutElement{0x04, 0x02, -1, 0x03, 0x0022, 0x000e},
+	cmdLutElement{0x04, 0x02, -1, 0x03, 0x0022, 0x0012},
+	cmdLutElement{0x04, 0x03, -1, 0x03, 0x0022, 0x0016},
+	cmdLutElement{0x04, 0x03, -1, 0x03, 0x0022, 0x001e},
+	cmdLutElement{0x04, 0x04, -1, 0x03, 0x0022, 0x0026},
+	cmdLutElement{0x04, 0x04, -1, 0x03, 0x0022, 0x0036},
+	cmdLutElement{0x04, 0x01, -1, 0x03, 0x0032, 0x000a},
+	cmdLutElement{0x04, 0x01, -1, 0x03, 0x0032, 0x000c},
+	cmdLutElement{0x04, 0x02, -1, 0x03, 0x0032, 0x000e},
+	cmdLutElement{0x04, 0x02, -1, 0x03, 0x0032, 0x0012},
+	cmdLutElement{0x04, 0x03, -1, 0x03, 0x0032, 0x0016},
+	cmdLutElement{0x04, 0x03, -1, 0x03, 0x0032, 0x001e},
+	cmdLutElement{0x04, 0x04, -1, 0x03, 0x0032, 0x0026},
+	cmdLutElement{0x04, 0x04, -1, 0x03, 0x0032, 0x0036},
+	cmdLutElement{0x05, 0x01, -1, 0x03, 0x0042, 0x000a},
+	cmdLutElement{0x05, 0x01, -1, 0x03, 0x0042, 0x000c},
+	cmdLutElement{0x05, 0x02, -1, 0x03, 0x0042, 0x000e},
+	cmdLutElement{0x05, 0x02, -1, 0x03, 0x0042, 0x0012},
+	cmdLutElement{0x05, 0x03, -1, 0x03, 0x0042, 0x0016},
+	cmdLutElement{0x05, 0x03, -1, 0x03, 0x0042, 0x001e},
+	cmdLutElement{0x05, 0x04, -1, 0x03, 0x0042, 0x0026},
+	cmdLutElement{0x05, 0x04, -1, 0x03, 0x0042, 0x0036},
+	cmdLutElement{0x05, 0x01, -1, 0x03, 0x0062, 0x000a},
+	cmdLutElement{0x05, 0x01, -1, 0x03, 0x0062, 0x000c},
+	cmdLutElement{0x05, 0x02, -1, 0x03, 0x0062, 0x000e},
+	cmdLutElement{0x05, 0x02, -1, 0x03, 0x0062, 0x0012},
+	cmdLutElement{0x05, 0x03, -1, 0x03, 0x0062, 0x0016},
+	cmdLutElement{0x05, 0x03, -1, 0x03, 0x0062, 0x001e},
+	cmdLutElement{0x05, 0x04, -1, 0x03, 0x0062, 0x0026},
+	cmdLutElement{0x05, 0x04, -1, 0x03, 0x0062, 0x0036},
+	cmdLutElement{0x00, 0x05, -1, 0x03, 0x0000, 0x0046},
+	cmdLutElement{0x00, 0x05, -1, 0x03, 0x0000, 0x0066},
+	cmdLutElement{0x00, 0x06, -1, 0x03, 0x0000, 0x0086},
+	cmdLutElement{0x00, 0x07, -1, 0x03, 0x0000, 0x00c6},
+	cmdLutElement{0x00, 0x08, -1, 0x03, 0x0000, 0x0146},
+	cmdLutElement{0x00, 0x09, -1, 0x03, 0x0000, 0x0246},
+	cmdLutElement{0x00, 0x0a, -1, 0x03, 0x0000, 0x0446},
+	cmdLutElement{0x00, 0x18, -1, 0x03, 0x0000, 0x0846},
+	cmdLutElement{0x00, 0x05, -1, 0x03, 0x0001, 0x0046},
+	cmdLutElement{0x00, 0x05, -1, 0x03, 0x0001, 0x0066},
+	cmdLutElement{0x00, 0x06, -1, 0x03, 0x0001, 0x0086},
+	cmdLutElement{0x00, 0x07, -1, 0x03, 0x0001, 0x00c6},
+	cmdLutElement{0x00, 0x08, -1, 0x03, 0x0001, 0x0146},
+	cmdLutElement{0x00, 0x09, -1, 0x03, 0x0001, 0x0246},
+	cmdLutElement{0x00, 0x0a, -1, 0x03, 0x0001, 0x0446},
+	cmdLutElement{0x00, 0x18, -1, 0x03, 0x0001, 0x0846},
+	cmdLutElement{0x00, 0x05, -1, 0x03, 0x0002, 0x0046},
+	cmdLutElement{0x00, 0x05, -1, 0x03, 0x0002, 0x0066},
+	cmdLutElement{0x00, 0x06, -1, 0x03, 0x0002, 0x0086},
+	cmdLutElement{0x00, 0x07, -1, 0x03, 0x0002, 0x00c6},
+	cmdLutElement{0x00, 0x08, -1, 0x03, 0x0002, 0x0146},
+	cmdLutElement{0x00, 0x09, -1, 0x03, 0x0002, 0x0246},
+	cmdLutElement{0x00, 0x0a, -1, 0x03, 0x0002, 0x0446},
+	cmdLutElement{0x00, 0x18, -1, 0x03, 0x0002, 0x0846},
+	cmdLutElement{0x00, 0x05, -1, 0x03, 0x0003, 0x0046},
+	cmdLutElement{0x00, 0x05, -1, 0x03, 0x0003, 0x0066},
+	cmdLutElement{0x00, 0x06, -1, 0x03, 0x0003, 0x0086},
+	cmdLutElement{0x00, 0x07, -1, 0x03, 0x0003, 0x00c6},
+	cmdLutElement{0x00, 0x08, -1, 0x03, 0x0003, 0x0146},
+	cmdLutElement{0x00, 0x09, -1, 0x03, 0x0003, 0x0246},
+	cmdLutElement{0x00, 0x0a, -1, 0x03, 0x0003, 0x0446},
+	cmdLutElement{0x00, 0x18, -1, 0x03, 0x0003, 0x0846},
+	cmdLutElement{0x00, 0x05, -1, 0x03, 0x0004, 0x0046},
+	cmdLutElement{0x00, 0x05, -1, 0x03, 0x0004, 0x0066},
+	cmdLutElement{0x00, 0x06, -1, 0x03, 0x0004, 0x0086},
+	cmdLutElement{0x00, 0x07, -1, 0x03, 0x0004, 0x00c6},
+	cmdLutElement{0x00, 0x08, -1, 0x03, 0x0004, 0x0146},
+	cmdLutElement{0x00, 0x09, -1, 0x03, 0x0004, 0x0246},
+	cmdLutElement{0x00, 0x0a, -1, 0x03, 0x0004, 0x0446},
+	cmdLutElement{0x00, 0x18, -1, 0x03, 0x0004, 0x0846},
+	cmdLutElement{0x00, 0x05, -1, 0x03, 0x0005, 0x0046},
+	cmdLutElement{0x00, 0x05, -1, 0x03, 0x0005, 0x0066},
+	cmdLutElement{0x00, 0x06, -1, 0x03, 0x0005, 0x0086},
+	cmdLutElement{0x00, 0x07, -1, 0x03, 0x0005, 0x00c6},
+	cmdLutElement{0x00, 0x08, -1, 0x03, 0x0005, 0x0146},
+	cmdLutElement{0x00, 0x09, -1, 0x03, 0x0005, 0x0246},
+	cmdLutElement{0x00, 0x0a, -1, 0x03, 0x0005, 0x0446},
+	cmdLutElement{0x00, 0x18, -1, 0x03, 0x0005, 0x0846},
+	cmdLutElement{0x01, 0x05, -1, 0x03, 0x0006, 0x0046},
+	cmdLutElement{0x01, 0x05, -1, 0x03, 0x0006, 0x0066},
+	cmdLutElement{0x01, 0x06, -1, 0x03, 0x0006, 0x0086},
+	cmdLutElement{0x01, 0x07, -1, 0x03, 0x0006, 0x00c6},
+	cmdLutElement{0x01, 0x08, -1, 0x03, 0x0006, 0x0146},
+	cmdLutElement{0x01, 0x09, -1, 0x03, 0x0006, 0x0246},
+	cmdLutElement{0x01, 0x0a, -1, 0x03, 0x0006, 0x0446},
+	cmdLutElement{0x01, 0x18, -1, 0x03, 0x0006, 0x0846},
+	cmdLutElement{0x01, 0x05, -1, 0x03, 0x0008, 0x0046},
+	cmdLutElement{0x01, 0x05, -1, 0x03, 0x0008, 0x0066},
+	cmdLutElement{0x01, 0x06, -1, 0x03, 0x0008, 0x0086},
+	cmdLutElement{0x01, 0x07, -1, 0x03, 0x0008, 0x00c6},
+	cmdLutElement{0x01, 0x08, -1, 0x03, 0x0008, 0x0146},
+	cmdLutElement{0x01, 0x09, -1, 0x03, 0x0008, 0x0246},
+	cmdLutElement{0x01, 0x0a, -1, 0x03, 0x0008, 0x0446},
+	cmdLutElement{0x01, 0x18, -1, 0x03, 0x0008, 0x0846},
+	cmdLutElement{0x06, 0x00, -1, 0x00, 0x0082, 0x0002},
+	cmdLutElement{0x06, 0x00, -1, 0x01, 0x0082, 0x0003},
+	cmdLutElement{0x06, 0x00, -1, 0x02, 0x0082, 0x0004},
+	cmdLutElement{0x06, 0x00, -1, 0x03, 0x0082, 0x0005},
+	cmdLutElement{0x06, 0x00, -1, 0x03, 0x0082, 0x0006},
+	cmdLutElement{0x06, 0x00, -1, 0x03, 0x0082, 0x0007},
+	cmdLutElement{0x06, 0x00, -1, 0x03, 0x0082, 0x0008},
+	cmdLutElement{0x06, 0x00, -1, 0x03, 0x0082, 0x0009},
+	cmdLutElement{0x07, 0x00, -1, 0x00, 0x00c2, 0x0002},
+	cmdLutElement{0x07, 0x00, -1, 0x01, 0x00c2, 0x0003},
+	cmdLutElement{0x07, 0x00, -1, 0x02, 0x00c2, 0x0004},
+	cmdLutElement{0x07, 0x00, -1, 0x03, 0x00c2, 0x0005},
+	cmdLutElement{0x07, 0x00, -1, 0x03, 0x00c2, 0x0006},
+	cmdLutElement{0x07, 0x00, -1, 0x03, 0x00c2, 0x0007},
+	cmdLutElement{0x07, 0x00, -1, 0x03, 0x00c2, 0x0008},
+	cmdLutElement{0x07, 0x00, -1, 0x03, 0x00c2, 0x0009},
+	cmdLutElement{0x08, 0x00, -1, 0x00, 0x0142, 0x0002},
+	cmdLutElement{0x08, 0x00, -1, 0x01, 0x0142, 0x0003},
+	cmdLutElement{0x08, 0x00, -1, 0x02, 0x0142, 0x0004},
+	cmdLutElement{0x08, 0x00, -1, 0x03, 0x0142, 0x0005},
+	cmdLutElement{0x08, 0x00, -1, 0x03, 0x0142, 0x0006},
+	cmdLutElement{0x08, 0x00, -1, 0x03, 0x0142, 0x0007},
+	cmdLutElement{0x08, 0x00, -1, 0x03, 0x0142, 0x0008},
+	cmdLutElement{0x08, 0x00, -1, 0x03, 0x0142, 0x0009},
+	cmdLutElement{0x09, 0x00, -1, 0x00, 0x0242, 0x0002},
+	cmdLutElement{0x09, 0x00, -1, 0x01, 0x0242, 0x0003},
+	cmdLutElement{0x09, 0x00, -1, 0x02, 0x0242, 0x0004},
+	cmdLutElement{0x09, 0x00, -1, 0x03, 0x0242, 0x0005},
+	cmdLutElement{0x09, 0x00, -1, 0x03, 0x0242, 0x0006},
+	cmdLutElement{0x09, 0x00, -1, 0x03, 0x0242, 0x0007},
+	cmdLutElement{0x09, 0x00, -1, 0x03, 0x0242, 0x0008},
+	cmdLutElement{0x09, 0x00, -1, 0x03, 0x0242, 0x0009},
+	cmdLutElement{0x0a, 0x00, -1, 0x00, 0x0442, 0x0002},
+	cmdLutElement{0x0a, 0x00, -1, 0x01, 0x0442, 0x0003},
+	cmdLutElement{0x0a, 0x00, -1, 0x02, 0x0442, 0x0004},
+	cmdLutElement{0x0a, 0x00, -1, 0x03, 0x0442, 0x0005},
+	cmdLutElement{0x0a, 0x00, -1, 0x03, 0x0442, 0x0006},
+	cmdLutElement{0x0a, 0x00, -1, 0x03, 0x0442, 0x0007},
+	cmdLutElement{0x0a, 0x00, -1, 0x03, 0x0442, 0x0008},
+	cmdLutElement{0x0a, 0x00, -1, 0x03, 0x0442, 0x0009},
+	cmdLutElement{0x0c, 0x00, -1, 0x00, 0x0842, 0x0002},
+	cmdLutElement{0x0c, 0x00, -1, 0x01, 0x0842, 0x0003},
+	cmdLutElement{0x0c, 0x00, -1, 0x02, 0x0842, 0x0004},
+	cmdLutElement{0x0c, 0x00, -1, 0x03, 0x0842, 0x0005},
+	cmdLutElement{0x0c, 0x00, -1, 0x03, 0x0842, 0x0006},
+	cmdLutElement{0x0c, 0x00, -1, 0x03, 0x0842, 0x0007},
+	cmdLutElement{0x0c, 0x00, -1, 0x03, 0x0842, 0x0008},
+	cmdLutElement{0x0c, 0x00, -1, 0x03, 0x0842, 0x0009},
+	cmdLutElement{0x0e, 0x00, -1, 0x00, 0x1842, 0x0002},
+	cmdLutElement{0x0e, 0x00, -1, 0x01, 0x1842, 0x0003},
+	cmdLutElement{0x0e, 0x00, -1, 0x02, 0x1842, 0x0004},
+	cmdLutElement{0x0e, 0x00, -1, 0x03, 0x1842, 0x0005},
+	cmdLutElement{0x0e, 0x00, -1, 0x03, 0x1842, 0x0006},
+	cmdLutElement{0x0e, 0x00, -1, 0x03, 0x1842, 0x0007},
+	cmdLutElement{0x0e, 0x00, -1, 0x03, 0x1842, 0x0008},
+	cmdLutElement{0x0e, 0x00, -1, 0x03, 0x1842, 0x0009},
+	cmdLutElement{0x18, 0x00, -1, 0x00, 0x5842, 0x0002},
+	cmdLutElement{0x18, 0x00, -1, 0x01, 0x5842, 0x0003},
+	cmdLutElement{0x18, 0x00, -1, 0x02, 0x5842, 0x0004},
+	cmdLutElement{0x18, 0x00, -1, 0x03, 0x5842, 0x0005},
+	cmdLutElement{0x18, 0x00, -1, 0x03, 0x5842, 0x0006},
+	cmdLutElement{0x18, 0x00, -1, 0x03, 0x5842, 0x0007},
+	cmdLutElement{0x18, 0x00, -1, 0x03, 0x5842, 0x0008},
+	cmdLutElement{0x18, 0x00, -1, 0x03, 0x5842, 0x0009},
+	cmdLutElement{0x02, 0x05, -1, 0x03, 0x000a, 0x0046},
+	cmdLutElement{0x02, 0x05, -1, 0x03, 0x000a, 0x0066},
+	cmdLutElement{0x02, 0x06, -1, 0x03, 0x000a, 0x0086},
+	cmdLutElement{0x02, 0x07, -1, 0x03, 0x000a, 0x00c6},
+	cmdLutElement{0x02, 0x08, -1, 0x03, 0x000a, 0x0146},
+	cmdLutElement{0x02, 0x09, -1, 0x03, 0x000a, 0x0246},
+	cmdLutElement{0x02, 0x0a, -1, 0x03, 0x000a, 0x0446},
+	cmdLutElement{0x02, 0x18, -1, 0x03, 0x000a, 0x0846},
+	cmdLutElement{0x02, 0x05, -1, 0x03, 0x000e, 0x0046},
+	cmdLutElement{0x02, 0x05, -1, 0x03, 0x000e, 0x0066},
+	cmdLutElement{0x02, 0x06, -1, 0x03, 0x000e, 0x0086},
+	cmdLutElement{0x02, 0x07, -1, 0x03, 0x000e, 0x00c6},
+	cmdLutElement{0x02, 0x08, -1, 0x03, 0x000e, 0x0146},
+	cmdLutElement{0x02, 0x09, -1, 0x03, 0x000e, 0x0246},
+	cmdLutElement{0x02, 0x0a, -1, 0x03, 0x000e, 0x0446},
+	cmdLutElement{0x02, 0x18, -1, 0x03, 0x000e, 0x0846},
+	cmdLutElement{0x03, 0x05, -1, 0x03, 0x0012, 0x0046},
+	cmdLutElement{0x03, 0x05, -1, 0x03, 0x0012, 0x0066},
+	cmdLutElement{0x03, 0x06, -1, 0x03, 0x0012, 0x0086},
+	cmdLutElement{0x03, 0x07, -1, 0x03, 0x0012, 0x00c6},
+	cmdLutElement{0x03, 0x08, -1, 0x03, 0x0012, 0x0146},
+	cmdLutElement{0x03, 0x09, -1, 0x03, 0x0012, 0x0246},
+	cmdLutElement{0x03, 0x0a, -1, 0x03, 0x0012, 0x0446},
+	cmdLutElement{0x03, 0x18, -1, 0x03, 0x0012, 0x0846},
+	cmdLutElement{0x03, 0x05, -1, 0x03, 0x001a, 0x0046},
+	cmdLutElement{0x03, 0x05, -1, 0x03, 0x001a, 0x0066},
+	cmdLutElement{0x03, 0x06, -1, 0x03, 0x001a, 0x0086},
+	cmdLutElement{0x03, 0x07, -1, 0x03, 0x001a, 0x00c6},
+	cmdLutElement{0x03, 0x08, -1, 0x03, 0x001a, 0x0146},
+	cmdLutElement{0x03, 0x09, -1, 0x03, 0x001a, 0x0246},
+	cmdLutElement{0x03, 0x0a, -1, 0x03, 0x001a, 0x0446},
+	cmdLutElement{0x03, 0x18, -1, 0x03, 0x001a, 0x0846},
+	cmdLutElement{0x04, 0x05, -1, 0x03, 0x0022, 0x0046},
+	cmdLutElement{0x04, 0x05, -1, 0x03, 0x0022, 0x0066},
+	cmdLutElement{0x04, 0x06, -1, 0x03, 0x0022, 0x0086},
+	cmdLutElement{0x04, 0x07, -1, 0x03, 0x0022, 0x00c6},
+	cmdLutElement{0x04, 0x08, -1, 0x03, 0x0022, 0x0146},
+	cmdLutElement{0x04, 0x09, -1, 0x03, 0x0022, 0x0246},
+	cmdLutElement{0x04, 0x0a, -1, 0x03, 0x0022, 0x0446},
+	cmdLutElement{0x04, 0x18, -1, 0x03, 0x0022, 0x0846},
+	cmdLutElement{0x04, 0x05, -1, 0x03, 0x0032, 0x0046},
+	cmdLutElement{0x04, 0x05, -1, 0x03, 0x0032, 0x0066},
+	cmdLutElement{0x04, 0x06, -1, 0x03, 0x0032, 0x0086},
+	cmdLutElement{0x04, 0x07, -1, 0x03, 0x0032, 0x00c6},
+	cmdLutElement{0x04, 0x08, -1, 0x03, 0x0032, 0x0146},
+	cmdLutElement{0x04, 0x09, -1, 0x03, 0x0032, 0x0246},
+	cmdLutElement{0x04, 0x0a, -1, 0x03, 0x0032, 0x0446},
+	cmdLutElement{0x04, 0x18, -1, 0x03, 0x0032, 0x0846},
+	cmdLutElement{0x05, 0x05, -1, 0x03, 0x0042, 0x0046},
+	cmdLutElement{0x05, 0x05, -1, 0x03, 0x0042, 0x0066},
+	cmdLutElement{0x05, 0x06, -1, 0x03, 0x0042, 0x0086},
+	cmdLutElement{0x05, 0x07, -1, 0x03, 0x0042, 0x00c6},
+	cmdLutElement{0x05, 0x08, -1, 0x03, 0x0042, 0x0146},
+	cmdLutElement{0x05, 0x09, -1, 0x03, 0x0042, 0x0246},
+	cmdLutElement{0x05, 0x0a, -1, 0x03, 0x0042, 0x0446},
+	cmdLutElement{0x05, 0x18, -1, 0x03, 0x0042, 0x0846},
+	cmdLutElement{0x05, 0x05, -1, 0x03, 0x0062, 0x0046},
+	cmdLutElement{0x05, 0x05, -1, 0x03, 0x0062, 0x0066},
+	cmdLutElement{0x05, 0x06, -1, 0x03, 0x0062, 0x0086},
+	cmdLutElement{0x05, 0x07, -1, 0x03, 0x0062, 0x00c6},
+	cmdLutElement{0x05, 0x08, -1, 0x03, 0x0062, 0x0146},
+	cmdLutElement{0x05, 0x09, -1, 0x03, 0x0062, 0x0246},
+	cmdLutElement{0x05, 0x0a, -1, 0x03, 0x0062, 0x0446},
+	cmdLutElement{0x05, 0x18, -1, 0x03, 0x0062, 0x0846},
+	cmdLutElement{0x06, 0x01, -1, 0x03, 0x0082, 0x000a},
+	cmdLutElement{0x06, 0x01, -1, 0x03, 0x0082, 0x000c},
+	cmdLutElement{0x06, 0x02, -1, 0x03, 0x0082, 0x000e},
+	cmdLutElement{0x06, 0x02, -1, 0x03, 0x0082, 0x0012},
+	cmdLutElement{0x06, 0x03, -1, 0x03, 0x0082, 0x0016},
+	cmdLutElement{0x06, 0x03, -1, 0x03, 0x0082, 0x001e},
+	cmdLutElement{0x06, 0x04, -1, 0x03, 0x0082, 0x0026},
+	cmdLutElement{0x06, 0x04, -1, 0x03, 0x0082, 0x0036},
+	cmdLutElement{0x07, 0x01, -1, 0x03, 0x00c2, 0x000a},
+	cmdLutElement{0x07, 0x01, -1, 0x03, 0x00c2, 0x000c},
+	cmdLutElement{0x07, 0x02, -1, 0x03, 0x00c2, 0x000e},
+	cmdLutElement{0x07, 0x02, -1, 0x03, 0x00c2, 0x0012},
+	cmdLutElement{0x07, 0x03, -1, 0x03, 0x00c2, 0x0016},
+	cmdLutElement{0x07, 0x03, -1, 0x03, 0x00c2, 0x001e},
+	cmdLutElement{0x07, 0x04, -1, 0x03, 0x00c2, 0x0026},
+	cmdLutElement{0x07, 0x04, -1, 0x03, 0x00c2, 0x0036},
+	cmdLutElement{0x08, 0x01, -1, 0x03, 0x0142, 0x000a},
+	cmdLutElement{0x08, 0x01, -1, 0x03, 0x0142, 0x000c},
+	cmdLutElement{0x08, 0x02, -1, 0x03, 0x0142, 0x000e},
+	cmdLutElement{0x08, 0x02, -1, 0x03, 0x0142, 0x0012},
+	cmdLutElement{0x08, 0x03, -1, 0x03, 0x0142, 0x0016},
+	cmdLutElement{0x08, 0x03, -1, 0x03, 0x0142, 0x001e},
+	cmdLutElement{0x08, 0x04, -1, 0x03, 0x0142, 0x0026},
+	cmdLutElement{0x08, 0x04, -1, 0x03, 0x0142, 0x0036},
+	cmdLutElement{0x09, 0x01, -1, 0x03, 0x0242, 0x000a},
+	cmdLutElement{0x09, 0x01, -1, 0x03, 0x0242, 0x000c},
+	cmdLutElement{0x09, 0x02, -1, 0x03, 0x0242, 0x000e},
+	cmdLutElement{0x09, 0x02, -1, 0x03, 0x0242, 0x0012},
+	cmdLutElement{0x09, 0x03, -1, 0x03, 0x0242, 0x0016},
+	cmdLutElement{0x09, 0x03, -1, 0x03, 0x0242, 0x001e},
+	cmdLutElement{0x09, 0x04, -1, 0x03, 0x0242, 0x0026},
+	cmdLutElement{0x09, 0x04, -1, 0x03, 0x0242, 0x0036},
+	cmdLutElement{0x0a, 0x01, -1, 0x03, 0x0442, 0x000a},
+	cmdLutElement{0x0a, 0x01, -1, 0x03, 0x0442, 0x000c},
+	cmdLutElement{0x0a, 0x02, -1, 0x03, 0x0442, 0x000e},
+	cmdLutElement{0x0a, 0x02, -1, 0x03, 0x0442, 0x0012},
+	cmdLutElement{0x0a, 0x03, -1, 0x03, 0x0442, 0x0016},
+	cmdLutElement{0x0a, 0x03, -1, 0x03, 0x0442, 0x001e},
+	cmdLutElement{0x0a, 0x04, -1, 0x03, 0x0442, 0x0026},
+	cmdLutElement{0x0a, 0x04, -1, 0x03, 0x0442, 0x0036},
+	cmdLutElement{0x0c, 0x01, -1, 0x03, 0x0842, 0x000a},
+	cmdLutElement{0x0c, 0x01, -1, 0x03, 0x0842, 0x000c},
+	cmdLutElement{0x0c, 0x02, -1, 0x03, 0x0842, 0x000e},
+	cmdLutElement{0x0c, 0x02, -1, 0x03, 0x0842, 0x0012},
+	cmdLutElement{0x0c, 0x03, -1, 0x03, 0x0842, 0x0016},
+	cmdLutElement{0x0c, 0x03, -1, 0x03, 0x0842, 0x001e},
+	cmdLutElement{0x0c, 0x04, -1, 0x03, 0x0842, 0x0026},
+	cmdLutElement{0x0c, 0x04, -1, 0x03, 0x0842, 0x0036},
+	cmdLutElement{0x0e, 0x01, -1, 0x03, 0x1842, 0x000a},
+	cmdLutElement{0x0e, 0x01, -1, 0x03, 0x1842, 0x000c},
+	cmdLutElement{0x0e, 0x02, -1, 0x03, 0x1842, 0x000e},
+	cmdLutElement{0x0e, 0x02, -1, 0x03, 0x1842, 0x0012},
+	cmdLutElement{0x0e, 0x03, -1, 0x03, 0x1842, 0x0016},
+	cmdLutElement{0x0e, 0x03, -1, 0x03, 0x1842, 0x001e},
+	cmdLutElement{0x0e, 0x04, -1, 0x03, 0x1842, 0x0026},
+	cmdLutElement{0x0e, 0x04, -1, 0x03, 0x1842, 0x0036},
+	cmdLutElement{0x18, 0x01, -1, 0x03, 0x5842, 0x000a},
+	cmdLutElement{0x18, 0x01, -1, 0x03, 0x5842, 0x000c},
+	cmdLutElement{0x18, 0x02, -1, 0x03, 0x5842, 0x000e},
+	cmdLutElement{0x18, 0x02, -1, 0x03, 0x5842, 0x0012},
+	cmdLutElement{0x18, 0x03, -1, 0x03, 0x5842, 0x0016},
+	cmdLutElement{0x18, 0x03, -1, 0x03, 0x5842, 0x001e},
+	cmdLutElement{0x18, 0x04, -1, 0x03, 0x5842, 0x0026},
+	cmdLutElement{0x18, 0x04, -1, 0x03, 0x5842, 0x0036},
+	cmdLutElement{0x06, 0x05, -1, 0x03, 0x0082, 0x0046},
+	cmdLutElement{0x06, 0x05, -1, 0x03, 0x0082, 0x0066},
+	cmdLutElement{0x06, 0x06, -1, 0x03, 0x0082, 0x0086},
+	cmdLutElement{0x06, 0x07, -1, 0x03, 0x0082, 0x00c6},
+	cmdLutElement{0x06, 0x08, -1, 0x03, 0x0082, 0x0146},
+	cmdLutElement{0x06, 0x09, -1, 0x03, 0x0082, 0x0246},
+	cmdLutElement{0x06, 0x0a, -1, 0x03, 0x0082, 0x0446},
+	cmdLutElement{0x06, 0x18, -1, 0x03, 0x0082, 0x0846},
+	cmdLutElement{0x07, 0x05, -1, 0x03, 0x00c2, 0x0046},
+	cmdLutElement{0x07, 0x05, -1, 0x03, 0x00c2, 0x0066},
+	cmdLutElement{0x07, 0x06, -1, 0x03, 0x00c2, 0x0086},
+	cmdLutElement{0x07, 0x07, -1, 0x03, 0x00c2, 0x00c6},
+	cmdLutElement{0x07, 0x08, -1, 0x03, 0x00c2, 0x0146},
+	cmdLutElement{0x07, 0x09, -1, 0x03, 0x00c2, 0x0246},
+	cmdLutElement{0x07, 0x0a, -1, 0x03, 0x00c2, 0x0446},
+	cmdLutElement{0x07, 0x18, -1, 0x03, 0x00c2, 0x0846},
+	cmdLutElement{0x08, 0x05, -1, 0x03, 0x0142, 0x0046},
+	cmdLutElement{0x08, 0x05, -1, 0x03, 0x0142, 0x0066},
+	cmdLutElement{0x08, 0x06, -1, 0x03, 0x0142, 0x0086},
+	cmdLutElement{0x08, 0x07, -1, 0x03, 0x0142, 0x00c6},
+	cmdLutElement{0x08, 0x08, -1, 0x03, 0x0142, 0x0146},
+	cmdLutElement{0x08, 0x09, -1, 0x03, 0x0142, 0x0246},
+	cmdLutElement{0x08, 0x0a, -1, 0x03, 0x0142, 0x0446},
+	cmdLutElement{0x08, 0x18, -1, 0x03, 0x0142, 0x0846},
+	cmdLutElement{0x09, 0x05, -1, 0x03, 0x0242, 0x0046},
+	cmdLutElement{0x09, 0x05, -1, 0x03, 0x0242, 0x0066},
+	cmdLutElement{0x09, 0x06, -1, 0x03, 0x0242, 0x0086},
+	cmdLutElement{0x09, 0x07, -1, 0x03, 0x0242, 0x00c6},
+	cmdLutElement{0x09, 0x08, -1, 0x03, 0x0242, 0x0146},
+	cmdLutElement{0x09, 0x09, -1, 0x03, 0x0242, 0x0246},
+	cmdLutElement{0x09, 0x0a, -1, 0x03, 0x0242, 0x0446},
+	cmdLutElement{0x09, 0x18, -1, 0x03, 0x0242, 0x0846},
+	cmdLutElement{0x0a, 0x05, -1, 0x03, 0x0442, 0x0046},
+	cmdLutElement{0x0a, 0x05, -1, 0x03, 0x0442, 0x0066},
+	cmdLutElement{0x0a, 0x06, -1, 0x03, 0x0442, 0x0086},
+	cmdLutElement{0x0a, 0x07, -1, 0x03, 0x0442, 0x00c6},
+	cmdLutElement{0x0a, 0x08, -1, 0x03, 0x0442, 0x0146},
+	cmdLutElement{0x0a, 0x09, -1, 0x03, 0x0442, 0x0246},
+	cmdLutElement{0x0a, 0x0a, -1, 0x03, 0x0442, 0x0446},
+	cmdLutElement{0x0a, 0x18, -1, 0x03, 0x0442, 0x0846},
+	cmdLutElement{0x0c, 0x05, -1, 0x03, 0x0842, 0x0046},
+	cmdLutElement{0x0c, 0x05, -1, 0x03, 0x0842, 0x0066},
+	cmdLutElement{0x0c, 0x06, -1, 0x03, 0x0842, 0x0086},
+	cmdLutElement{0x0c, 0x07, -1, 0x03, 0x0842, 0x00c6},
+	cmdLutElement{0x0c, 0x08, -1, 0x03, 0x0842, 0x0146},
+	cmdLutElement{0x0c, 0x09, -1, 0x03, 0x0842, 0x0246},
+	cmdLutElement{0x0c, 0x0a, -1, 0x03, 0x0842, 0x0446},
+	cmdLutElement{0x0c, 0x18, -1, 0x03, 0x0842, 0x0846},
+	cmdLutElement{0x0e, 0x05, -1, 0x03, 0x1842, 0x0046},
+	cmdLutElement{0x0e, 0x05, -1, 0x03, 0x1842, 0x0066},
+	cmdLutElement{0x0e, 0x06, -1, 0x03, 0x1842, 0x0086},
+	cmdLutElement{0x0e, 0x07, -1, 0x03, 0x1842, 0x00c6},
+	cmdLutElement{0x0e, 0x08, -1, 0x03, 0x1842, 0x0146},
+	cmdLutElement{0x0e, 0x09, -1, 0x03, 0x1842, 0x0246},
+	cmdLutElement{0x0e, 0x0a, -1, 0x03, 0x1842, 0x0446},
+	cmdLutElement{0x0e, 0x18, -1, 0x03, 0x1842, 0x0846},
+	cmdLutElement{0x18, 0x05, -1, 0x03, 0x5842, 0x0046},
+	cmdLutElement{0x18, 0x05, -1, 0x03, 0x5842, 0x0066},
+	cmdLutElement{0x18, 0x06, -1, 0x03, 0x5842, 0x0086},
+	cmdLutElement{0x18, 0x07, -1, 0x03, 0x5842, 0x00c6},
+	cmdLutElement{0x18, 0x08, -1, 0x03, 0x5842, 0x0146},
+	cmdLutElement{0x18, 0x09, -1, 0x03, 0x5842, 0x0246},
+	cmdLutElement{0x18, 0x0a, -1, 0x03, 0x5842, 0x0446},
+	cmdLutElement{0x18, 0x18, -1, 0x03, 0x5842, 0x0846},
+}
@@ -0,0 +1,196 @@
+package brotli
+
+const fastOnePassCompressionQuality = 0
+
+const fastTwoPassCompressionQuality = 1
+
+const zopflificationQuality = 10
+
+const hqZopflificationQuality = 11
+
+const maxQualityForStaticEntropyCodes = 2
+
+const minQualityForBlockSplit = 4
+
+const minQualityForNonzeroDistanceParams = 4
+
+const minQualityForOptimizeHistograms = 4
+
+const minQualityForExtensiveReferenceSearch = 5
+
+const minQualityForContextModeling = 5
+
+const minQualityForHqContextModeling = 7
+
+const minQualityForHqBlockSplitting = 10
+
+/* For quality below MIN_QUALITY_FOR_BLOCK_SPLIT there is no block splitting,
+   so we buffer at most this much literals and commands. */
+const maxNumDelayedSymbols = 0x2FFF
+
+/* Returns hash-table size for quality levels 0 and 1. */
+func maxHashTableSize(quality int) uint {
+	if quality == fastOnePassCompressionQuality {
+		return 1 << 15
+	} else {
+		return 1 << 17
+	}
+}
+
+/* The maximum length for which the zopflification uses distinct distances. */
+const maxZopfliLenQuality10 = 150
+
+const maxZopfliLenQuality11 = 325
+
+/* Do not thoroughly search when a long copy is found. */
+const longCopyQuickStep = 16384
+
+func maxZopfliLen(params *encoderParams) uint {
+	if params.quality <= 10 {
+		return maxZopfliLenQuality10
+	} else {
+		return maxZopfliLenQuality11
+	}
+}
+
+/* Number of best candidates to evaluate to expand Zopfli chain. */
+func maxZopfliCandidates(params *encoderParams) uint {
+	if params.quality <= 10 {
+		return 1
+	} else {
+		return 5
+	}
+}
+
+func sanitizeParams(params *encoderParams) {
+	params.quality = brotli_min_int(maxQuality, brotli_max_int(minQuality, params.quality))
+	if params.quality <= maxQualityForStaticEntropyCodes {
+		params.large_window = false
+	}
+
+	if params.lgwin < minWindowBits {
+		params.lgwin = minWindowBits
+	} else {
+		var max_lgwin int
+		if params.large_window {
+			max_lgwin = largeMaxWindowBits
+		} else {
+			max_lgwin = maxWindowBits
+		}
+		if params.lgwin > uint(max_lgwin) {
+			params.lgwin = uint(max_lgwin)
+		}
+	}
+}
+
+/* Returns optimized lg_block value. */
+func computeLgBlock(params *encoderParams) int {
+	var lgblock int = params.lgblock
+	if params.quality == fastOnePassCompressionQuality || params.quality == fastTwoPassCompressionQuality {
+		lgblock = int(params.lgwin)
+	} else if params.quality < minQualityForBlockSplit {
+		lgblock = 14
+	} else if lgblock == 0 {
+		lgblock = 16
+		if params.quality >= 9 && params.lgwin > uint(lgblock) {
+			lgblock = brotli_min_int(18, int(params.lgwin))
+		}
+	} else {
+		lgblock = brotli_min_int(maxInputBlockBits, brotli_max_int(minInputBlockBits, lgblock))
+	}
+
+	return lgblock
+}
+
+/* Returns log2 of the size of main ring buffer area.
+   Allocate at least lgwin + 1 bits for the ring buffer so that the newly
+   added block fits there completely and we still get lgwin bits and at least
+   read_block_size_bits + 1 bits because the copy tail length needs to be
+   smaller than ring-buffer size. */
+func computeRbBits(params *encoderParams) int {
+	return 1 + brotli_max_int(int(params.lgwin), params.lgblock)
+}
+
+func maxMetablockSize(params *encoderParams) uint {
+	var bits int = brotli_min_int(computeRbBits(params), maxInputBlockBits)
+	return uint(1) << uint(bits)
+}
+
+/* When searching for backward references and have not seen matches for a long
+   time, we can skip some match lookups. Unsuccessful match lookups are very
+   expensive and this kind of a heuristic speeds up compression quite a lot.
+   At first 8 byte strides are taken and every second byte is put to hasher.
+   After 4x more literals stride by 16 bytes, every put 4-th byte to hasher.
+   Applied only to qualities 2 to 9. */
+func literalSpreeLengthForSparseSearch(params *encoderParams) uint {
+	if params.quality < 9 {
+		return 64
+	} else {
+		return 512
+	}
+}
+
+func chooseHasher(params *encoderParams, hparams *hasherParams) {
+	if params.quality > 9 {
+		hparams.type_ = 10
+	} else if params.quality == 4 && params.size_hint >= 1<<20 {
+		hparams.type_ = 54
+	} else if params.quality < 5 {
+		hparams.type_ = params.quality
+	} else if params.lgwin <= 16 {
+		if params.quality < 7 {
+			hparams.type_ = 40
+		} else if params.quality < 9 {
+			hparams.type_ = 41
+		} else {
+			hparams.type_ = 42
+		}
+	} else if params.size_hint >= 1<<20 && params.lgwin >= 19 {
+		hparams.type_ = 6
+		hparams.block_bits = params.quality - 1
+		hparams.bucket_bits = 15
+		hparams.hash_len = 5
+		if params.quality < 7 {
+			hparams.num_last_distances_to_check = 4
+		} else if params.quality < 9 {
+			hparams.num_last_distances_to_check = 10
+		} else {
+			hparams.num_last_distances_to_check = 16
+		}
+	} else {
+		hparams.type_ = 5
+		hparams.block_bits = params.quality - 1
+		if params.quality < 7 {
+			hparams.bucket_bits = 14
+		} else {
+			hparams.bucket_bits = 15
+		}
+		if params.quality < 7 {
+			hparams.num_last_distances_to_check = 4
+		} else if params.quality < 9 {
+			hparams.num_last_distances_to_check = 10
+		} else {
+			hparams.num_last_distances_to_check = 16
+		}
+	}
+
+	if params.lgwin > 24 {
+		/* Different hashers for large window brotli: not for qualities <= 2,
+		   these are too fast for large window. Not for qualities >= 10: their
+		   hasher already works well with large window. So the changes are:
+		   H3 --> H35: for quality 3.
+		   H54 --> H55: for quality 4 with size hint > 1MB
+		   H6 --> H65: for qualities 5, 6, 7, 8, 9. */
+		if hparams.type_ == 3 {
+			hparams.type_ = 35
+		}
+
+		if hparams.type_ == 54 {
+			hparams.type_ = 55
+		}
+
+		if hparams.type_ == 6 {
+			hparams.type_ = 65
+		}
+	}
+}
@@ -0,0 +1,108 @@
+package brotli
+
+import (
+	"errors"
+	"io"
+)
+
+type decodeError int
+
+func (err decodeError) Error() string {
+	return "brotli: " + string(decoderErrorString(int(err)))
+}
+
+var errExcessiveInput = errors.New("brotli: excessive input")
+var errInvalidState = errors.New("brotli: invalid state")
+
+// readBufSize is a "good" buffer size that avoids excessive round-trips
+// between C and Go but doesn't waste too much memory on buffering.
+// It is arbitrarily chosen to be equal to the constant used in io.Copy.
+const readBufSize = 32 * 1024
+
+// NewReader creates a new Reader reading the given reader.
+func NewReader(src io.Reader) *Reader {
+	r := new(Reader)
+	r.Reset(src)
+	return r
+}
+
+// Reset discards the Reader's state and makes it equivalent to the result of
+// its original state from NewReader, but reading from src instead.
+// This permits reusing a Reader rather than allocating a new one.
+// Error is always nil
+func (r *Reader) Reset(src io.Reader) error {
+	if r.error_code < 0 {
+		// There was an unrecoverable error, leaving the Reader's state
+		// undefined. Clear out everything but the buffer.
+		*r = Reader{buf: r.buf}
+	}
+
+	decoderStateInit(r)
+	r.src = src
+	if r.buf == nil {
+		r.buf = make([]byte, readBufSize)
+	}
+	return nil
+}
+
+func (r *Reader) Read(p []byte) (n int, err error) {
+	if !decoderHasMoreOutput(r) && len(r.in) == 0 {
+		m, readErr := r.src.Read(r.buf)
+		if m == 0 {
+			// If readErr is `nil`, we just proxy underlying stream behavior.
+			return 0, readErr
+		}
+		r.in = r.buf[:m]
+	}
+
+	if len(p) == 0 {
+		return 0, nil
+	}
+
+	for {
+		var written uint
+		in_len := uint(len(r.in))
+		out_len := uint(len(p))
+		in_remaining := in_len
+		out_remaining := out_len
+		result := decoderDecompressStream(r, &in_remaining, &r.in, &out_remaining, &p)
+		written = out_len - out_remaining
+		n = int(written)
+
+		switch result {
+		case decoderResultSuccess:
+			if len(r.in) > 0 {
+				return n, errExcessiveInput
+			}
+			return n, nil
+		case decoderResultError:
+			return n, decodeError(decoderGetErrorCode(r))
+		case decoderResultNeedsMoreOutput:
+			if n == 0 {
+				return 0, io.ErrShortBuffer
+			}
+			return n, nil
+		case decoderNeedsMoreInput:
+		}
+
+		if len(r.in) != 0 {
+			return 0, errInvalidState
+		}
+
+		// Calling r.src.Read may block. Don't block if we have data to return.
+		if n > 0 {
+			return n, nil
+		}
+
+		// Top off the buffer.
+		encN, err := r.src.Read(r.buf)
+		if encN == 0 {
+			// Not enough data to complete decoding.
+			if err == io.EOF {
+				return 0, io.ErrUnexpectedEOF
+			}
+			return 0, err
+		}
+		r.in = r.buf[:encN]
+	}
+}
@@ -0,0 +1,134 @@
+package brotli
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* A ringBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
+   data in a circular manner: writing a byte writes it to:
+     `position() % (1 << window_bits)'.
+   For convenience, the ringBuffer array contains another copy of the
+   first `1 << tail_bits' bytes:
+     buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits),
+   and another copy of the last two bytes:
+     buffer_[-1] == buffer_[(1 << window_bits) - 1] and
+     buffer_[-2] == buffer_[(1 << window_bits) - 2]. */
+type ringBuffer struct {
+	size_       uint32
+	mask_       uint32
+	tail_size_  uint32
+	total_size_ uint32
+	cur_size_   uint32
+	pos_        uint32
+	data_       []byte
+	buffer_     []byte
+}
+
+func ringBufferInit(rb *ringBuffer) {
+	rb.pos_ = 0
+}
+
+func ringBufferSetup(params *encoderParams, rb *ringBuffer) {
+	var window_bits int = computeRbBits(params)
+	var tail_bits int = params.lgblock
+	*(*uint32)(&rb.size_) = 1 << uint(window_bits)
+	*(*uint32)(&rb.mask_) = (1 << uint(window_bits)) - 1
+	*(*uint32)(&rb.tail_size_) = 1 << uint(tail_bits)
+	*(*uint32)(&rb.total_size_) = rb.size_ + rb.tail_size_
+}
+
+const kSlackForEightByteHashingEverywhere uint = 7
+
+/* Allocates or re-allocates data_ to the given length + plus some slack
+   region before and after. Fills the slack regions with zeros. */
+func ringBufferInitBuffer(buflen uint32, rb *ringBuffer) {
+	var new_data []byte
+	var i uint
+	size := 2 + int(buflen) + int(kSlackForEightByteHashingEverywhere)
+	if cap(rb.data_) < size {
+		new_data = make([]byte, size)
+	} else {
+		new_data = rb.data_[:size]
+	}
+	if rb.data_ != nil {
+		copy(new_data, rb.data_[:2+rb.cur_size_+uint32(kSlackForEightByteHashingEverywhere)])
+	}
+
+	rb.data_ = new_data
+	rb.cur_size_ = buflen
+	rb.buffer_ = rb.data_[2:]
+	rb.data_[1] = 0
+	rb.data_[0] = rb.data_[1]
+	for i = 0; i < kSlackForEightByteHashingEverywhere; i++ {
+		rb.buffer_[rb.cur_size_+uint32(i)] = 0
+	}
+}
+
+func ringBufferWriteTail(bytes []byte, n uint, rb *ringBuffer) {
+	var masked_pos uint = uint(rb.pos_ & rb.mask_)
+	if uint32(masked_pos) < rb.tail_size_ {
+		/* Just fill the tail buffer with the beginning data. */
+		var p uint = uint(rb.size_ + uint32(masked_pos))
+		copy(rb.buffer_[p:], bytes[:brotli_min_size_t(n, uint(rb.tail_size_-uint32(masked_pos)))])
+	}
+}
+
+/* Push bytes into the ring buffer. */
+func ringBufferWrite(bytes []byte, n uint, rb *ringBuffer) {
+	if rb.pos_ == 0 && uint32(n) < rb.tail_size_ {
+		/* Special case for the first write: to process the first block, we don't
+		   need to allocate the whole ring-buffer and we don't need the tail
+		   either. However, we do this memory usage optimization only if the
+		   first write is less than the tail size, which is also the input block
+		   size, otherwise it is likely that other blocks will follow and we
+		   will need to reallocate to the full size anyway. */
+		rb.pos_ = uint32(n)
+
+		ringBufferInitBuffer(rb.pos_, rb)
+		copy(rb.buffer_, bytes[:n])
+		return
+	}
+
+	if rb.cur_size_ < rb.total_size_ {
+		/* Lazily allocate the full buffer. */
+		ringBufferInitBuffer(rb.total_size_, rb)
+
+		/* Initialize the last two bytes to zero, so that we don't have to worry
+		   later when we copy the last two bytes to the first two positions. */
+		rb.buffer_[rb.size_-2] = 0
+
+		rb.buffer_[rb.size_-1] = 0
+	}
+	{
+		var masked_pos uint = uint(rb.pos_ & rb.mask_)
+
+		/* The length of the writes is limited so that we do not need to worry
+		   about a write */
+		ringBufferWriteTail(bytes, n, rb)
+
+		if uint32(masked_pos+n) <= rb.size_ {
+			/* A single write fits. */
+			copy(rb.buffer_[masked_pos:], bytes[:n])
+		} else {
+			/* Split into two writes.
+			   Copy into the end of the buffer, including the tail buffer. */
+			copy(rb.buffer_[masked_pos:], bytes[:brotli_min_size_t(n, uint(rb.total_size_-uint32(masked_pos)))])
+
+			/* Copy into the beginning of the buffer */
+			copy(rb.buffer_, bytes[rb.size_-uint32(masked_pos):][:uint32(n)-(rb.size_-uint32(masked_pos))])
+		}
+	}
+	{
+		var not_first_lap bool = rb.pos_&(1<<31) != 0
+		var rb_pos_mask uint32 = (1 << 31) - 1
+		rb.data_[0] = rb.buffer_[rb.size_-2]
+		rb.data_[1] = rb.buffer_[rb.size_-1]
+		rb.pos_ = (rb.pos_ & rb_pos_mask) + uint32(uint32(n)&rb_pos_mask)
+		if not_first_lap {
+			/* Wrap, but preserve not-a-first-lap feature. */
+			rb.pos_ |= 1 << 31
+		}
+	}
+}
@@ -0,0 +1,294 @@
+package brotli
+
+import "io"
+
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Brotli state for partial streaming decoding. */
+const (
+	stateUninited = iota
+	stateLargeWindowBits
+	stateInitialize
+	stateMetablockBegin
+	stateMetablockHeader
+	stateMetablockHeader2
+	stateContextModes
+	stateCommandBegin
+	stateCommandInner
+	stateCommandPostDecodeLiterals
+	stateCommandPostWrapCopy
+	stateUncompressed
+	stateMetadata
+	stateCommandInnerWrite
+	stateMetablockDone
+	stateCommandPostWrite1
+	stateCommandPostWrite2
+	stateHuffmanCode0
+	stateHuffmanCode1
+	stateHuffmanCode2
+	stateHuffmanCode3
+	stateContextMap1
+	stateContextMap2
+	stateTreeGroup
+	stateDone
+)
+
+const (
+	stateMetablockHeaderNone = iota
+	stateMetablockHeaderEmpty
+	stateMetablockHeaderNibbles
+	stateMetablockHeaderSize
+	stateMetablockHeaderUncompressed
+	stateMetablockHeaderReserved
+	stateMetablockHeaderBytes
+	stateMetablockHeaderMetadata
+)
+
+const (
+	stateUncompressedNone = iota
+	stateUncompressedWrite
+)
+
+const (
+	stateTreeGroupNone = iota
+	stateTreeGroupLoop
+)
+
+const (
+	stateContextMapNone = iota
+	stateContextMapReadPrefix
+	stateContextMapHuffman
+	stateContextMapDecode
+	stateContextMapTransform
+)
+
+const (
+	stateHuffmanNone = iota
+	stateHuffmanSimpleSize
+	stateHuffmanSimpleRead
+	stateHuffmanSimpleBuild
+	stateHuffmanComplex
+	stateHuffmanLengthSymbols
+)
+
+const (
+	stateDecodeUint8None = iota
+	stateDecodeUint8Short
+	stateDecodeUint8Long
+)
+
+const (
+	stateReadBlockLengthNone = iota
+	stateReadBlockLengthSuffix
+)
+
+type Reader struct {
+	src io.Reader
+	buf []byte // scratch space for reading from src
+	in  []byte // current chunk to decode; usually aliases buf
+
+	state        int
+	loop_counter int
+	br           bitReader
+	buffer       struct {
+		u64 uint64
+		u8  [8]byte
+	}
+	buffer_length               uint32
+	pos                         int
+	max_backward_distance       int
+	max_distance                int
+	ringbuffer_size             int
+	ringbuffer_mask             int
+	dist_rb_idx                 int
+	dist_rb                     [4]int
+	error_code                  int
+	sub_loop_counter            uint32
+	ringbuffer                  []byte
+	ringbuffer_end              []byte
+	htree_command               []huffmanCode
+	context_lookup              []byte
+	context_map_slice           []byte
+	dist_context_map_slice      []byte
+	literal_hgroup              huffmanTreeGroup
+	insert_copy_hgroup          huffmanTreeGroup
+	distance_hgroup             huffmanTreeGroup
+	block_type_trees            []huffmanCode
+	block_len_trees             []huffmanCode
+	trivial_literal_context     int
+	distance_context            int
+	meta_block_remaining_len    int
+	block_length_index          uint32
+	block_length                [3]uint32
+	num_block_types             [3]uint32
+	block_type_rb               [6]uint32
+	distance_postfix_bits       uint32
+	num_direct_distance_codes   uint32
+	distance_postfix_mask       int
+	num_dist_htrees             uint32
+	dist_context_map            []byte
+	literal_htree               []huffmanCode
+	dist_htree_index            byte
+	repeat_code_len             uint32
+	prev_code_len               uint32
+	copy_length                 int
+	distance_code               int
+	rb_roundtrips               uint
+	partial_pos_out             uint
+	symbol                      uint32
+	repeat                      uint32
+	space                       uint32
+	table                       [32]huffmanCode
+	symbol_lists                symbolList
+	symbols_lists_array         [huffmanMaxCodeLength + 1 + numCommandSymbols]uint16
+	next_symbol                 [32]int
+	code_length_code_lengths    [codeLengthCodes]byte
+	code_length_histo           [16]uint16
+	htree_index                 int
+	next                        []huffmanCode
+	context_index               uint32
+	max_run_length_prefix       uint32
+	code                        uint32
+	context_map_table           [huffmanMaxSize272]huffmanCode
+	substate_metablock_header   int
+	substate_tree_group         int
+	substate_context_map        int
+	substate_uncompressed       int
+	substate_huffman            int
+	substate_decode_uint8       int
+	substate_read_block_length  int
+	is_last_metablock           uint
+	is_uncompressed             uint
+	is_metadata                 uint
+	should_wrap_ringbuffer      uint
+	canny_ringbuffer_allocation uint
+	large_window                bool
+	size_nibbles                uint
+	window_bits                 uint32
+	new_ringbuffer_size         int
+	num_literal_htrees          uint32
+	context_map                 []byte
+	context_modes               []byte
+	dictionary                  *dictionary
+	transforms                  *transforms
+	trivial_literal_contexts    [8]uint32
+}
+
+func decoderStateInit(s *Reader) bool {
+	s.error_code = 0 /* BROTLI_DECODER_NO_ERROR */
+
+	initBitReader(&s.br)
+	s.state = stateUninited
+	s.large_window = false
+	s.substate_metablock_header = stateMetablockHeaderNone
+	s.substate_tree_group = stateTreeGroupNone
+	s.substate_context_map = stateContextMapNone
+	s.substate_uncompressed = stateUncompressedNone
+	s.substate_huffman = stateHuffmanNone
+	s.substate_decode_uint8 = stateDecodeUint8None
+	s.substate_read_block_length = stateReadBlockLengthNone
+
+	s.buffer_length = 0
+	s.loop_counter = 0
+	s.pos = 0
+	s.rb_roundtrips = 0
+	s.partial_pos_out = 0
+
+	s.block_type_trees = nil
+	s.block_len_trees = nil
+	s.ringbuffer_size = 0
+	s.new_ringbuffer_size = 0
+	s.ringbuffer_mask = 0
+
+	s.context_map = nil
+	s.context_modes = nil
+	s.dist_context_map = nil
+	s.context_map_slice = nil
+	s.dist_context_map_slice = nil
+
+	s.sub_loop_counter = 0
+
+	s.literal_hgroup.codes = nil
+	s.literal_hgroup.htrees = nil
+	s.insert_copy_hgroup.codes = nil
+	s.insert_copy_hgroup.htrees = nil
+	s.distance_hgroup.codes = nil
+	s.distance_hgroup.htrees = nil
+
+	s.is_last_metablock = 0
+	s.is_uncompressed = 0
+	s.is_metadata = 0
+	s.should_wrap_ringbuffer = 0
+	s.canny_ringbuffer_allocation = 1
+
+	s.window_bits = 0
+	s.max_distance = 0
+	s.dist_rb[0] = 16
+	s.dist_rb[1] = 15
+	s.dist_rb[2] = 11
+	s.dist_rb[3] = 4
+	s.dist_rb_idx = 0
+	s.block_type_trees = nil
+	s.block_len_trees = nil
+
+	s.symbol_lists.storage = s.symbols_lists_array[:]
+	s.symbol_lists.offset = huffmanMaxCodeLength + 1
+
+	s.dictionary = getDictionary()
+	s.transforms = getTransforms()
+
+	return true
+}
+
+func decoderStateMetablockBegin(s *Reader) {
+	s.meta_block_remaining_len = 0
+	s.block_length[0] = 1 << 24
+	s.block_length[1] = 1 << 24
+	s.block_length[2] = 1 << 24
+	s.num_block_types[0] = 1
+	s.num_block_types[1] = 1
+	s.num_block_types[2] = 1
+	s.block_type_rb[0] = 1
+	s.block_type_rb[1] = 0
+	s.block_type_rb[2] = 1
+	s.block_type_rb[3] = 0
+	s.block_type_rb[4] = 1
+	s.block_type_rb[5] = 0
+	s.context_map = nil
+	s.context_modes = nil
+	s.dist_context_map = nil
+	s.context_map_slice = nil
+	s.literal_htree = nil
+	s.dist_context_map_slice = nil
+	s.dist_htree_index = 0
+	s.context_lookup = nil
+	s.literal_hgroup.codes = nil
+	s.literal_hgroup.htrees = nil
+	s.insert_copy_hgroup.codes = nil
+	s.insert_copy_hgroup.htrees = nil
+	s.distance_hgroup.codes = nil
+	s.distance_hgroup.htrees = nil
+}
+
+func decoderStateCleanupAfterMetablock(s *Reader) {
+	s.context_modes = nil
+	s.context_map = nil
+	s.dist_context_map = nil
+	s.literal_hgroup.htrees = nil
+	s.insert_copy_hgroup.htrees = nil
+	s.distance_hgroup.htrees = nil
+}
+
+func decoderHuffmanTreeGroupInit(s *Reader, group *huffmanTreeGroup, alphabet_size uint32, max_symbol uint32, ntrees uint32) bool {
+	var max_table_size uint = uint(kMaxHuffmanTableSize[(alphabet_size+31)>>5])
+	group.alphabet_size = uint16(alphabet_size)
+	group.max_symbol = uint16(max_symbol)
+	group.num_htrees = uint16(ntrees)
+	group.htrees = make([][]huffmanCode, ntrees)
+	group.codes = make([]huffmanCode, (uint(ntrees) * max_table_size))
+	return !(group.codes == nil)
+}
@@ -0,0 +1,662 @@
+package brotli
+
+import "encoding/binary"
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Class to model the static dictionary. */
+
+const maxStaticDictionaryMatchLen = 37
+
+const kInvalidMatch uint32 = 0xFFFFFFF
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+func hash(data []byte) uint32 {
+	var h uint32 = binary.LittleEndian.Uint32(data) * kDictHashMul32
+
+	/* The higher bits contain more mixture from the multiplication,
+	   so we take our results from there. */
+	return h >> uint(32-kDictNumBits)
+}
+
+func addMatch(distance uint, len uint, len_code uint, matches []uint32) {
+	var match uint32 = uint32((distance << 5) + len_code)
+	matches[len] = brotli_min_uint32_t(matches[len], match)
+}
+
+func dictMatchLength(dict *dictionary, data []byte, id uint, len uint, maxlen uint) uint {
+	var offset uint = uint(dict.offsets_by_length[len]) + len*id
+	return findMatchLengthWithLimit(dict.data[offset:], data, brotli_min_size_t(uint(len), maxlen))
+}
+
+func isMatch(d *dictionary, w dictWord, data []byte, max_length uint) bool {
+	if uint(w.len) > max_length {
+		return false
+	} else {
+		var offset uint = uint(d.offsets_by_length[w.len]) + uint(w.len)*uint(w.idx)
+		var dict []byte = d.data[offset:]
+		if w.transform == 0 {
+			/* Match against base dictionary word. */
+			return findMatchLengthWithLimit(dict, data, uint(w.len)) == uint(w.len)
+		} else if w.transform == 10 {
+			/* Match against uppercase first transform.
+			   Note that there are only ASCII uppercase words in the lookup table. */
+			return dict[0] >= 'a' && dict[0] <= 'z' && (dict[0]^32) == data[0] && findMatchLengthWithLimit(dict[1:], data[1:], uint(w.len)-1) == uint(w.len-1)
+		} else {
+			/* Match against uppercase all transform.
+			   Note that there are only ASCII uppercase words in the lookup table. */
+			var i uint
+			for i = 0; i < uint(w.len); i++ {
+				if dict[i] >= 'a' && dict[i] <= 'z' {
+					if (dict[i] ^ 32) != data[i] {
+						return false
+					}
+				} else {
+					if dict[i] != data[i] {
+						return false
+					}
+				}
+			}
+
+			return true
+		}
+	}
+}
+
+func findAllStaticDictionaryMatches(dict *encoderDictionary, data []byte, min_length uint, max_length uint, matches []uint32) bool {
+	var has_found_match bool = false
+	{
+		var offset uint = uint(dict.buckets[hash(data)])
+		var end bool = offset == 0
+		for !end {
+			w := dict.dict_words[offset]
+			offset++
+			var l uint = uint(w.len) & 0x1F
+			var n uint = uint(1) << dict.words.size_bits_by_length[l]
+			var id uint = uint(w.idx)
+			end = !(w.len&0x80 == 0)
+			w.len = byte(l)
+			if w.transform == 0 {
+				var matchlen uint = dictMatchLength(dict.words, data, id, l, max_length)
+				var s []byte
+				var minlen uint
+				var maxlen uint
+				var len uint
+
+				/* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */
+				if matchlen == l {
+					addMatch(id, l, l, matches)
+					has_found_match = true
+				}
+
+				/* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and
+				   "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */
+				if matchlen >= l-1 {
+					addMatch(id+12*n, l-1, l, matches)
+					if l+2 < max_length && data[l-1] == 'i' && data[l] == 'n' && data[l+1] == 'g' && data[l+2] == ' ' {
+						addMatch(id+49*n, l+3, l, matches)
+					}
+
+					has_found_match = true
+				}
+
+				/* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */
+				minlen = min_length
+
+				if l > 9 {
+					minlen = brotli_max_size_t(minlen, l-9)
+				}
+				maxlen = brotli_min_size_t(matchlen, l-2)
+				for len = minlen; len <= maxlen; len++ {
+					var cut uint = l - len
+					var transform_id uint = (cut << 2) + uint((dict.cutoffTransforms>>(cut*6))&0x3F)
+					addMatch(id+transform_id*n, uint(len), l, matches)
+					has_found_match = true
+				}
+
+				if matchlen < l || l+6 >= max_length {
+					continue
+				}
+
+				s = data[l:]
+
+				/* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */
+				if s[0] == ' ' {
+					addMatch(id+n, l+1, l, matches)
+					if s[1] == 'a' {
+						if s[2] == ' ' {
+							addMatch(id+28*n, l+3, l, matches)
+						} else if s[2] == 's' {
+							if s[3] == ' ' {
+								addMatch(id+46*n, l+4, l, matches)
+							}
+						} else if s[2] == 't' {
+							if s[3] == ' ' {
+								addMatch(id+60*n, l+4, l, matches)
+							}
+						} else if s[2] == 'n' {
+							if s[3] == 'd' && s[4] == ' ' {
+								addMatch(id+10*n, l+5, l, matches)
+							}
+						}
+					} else if s[1] == 'b' {
+						if s[2] == 'y' && s[3] == ' ' {
+							addMatch(id+38*n, l+4, l, matches)
+						}
+					} else if s[1] == 'i' {
+						if s[2] == 'n' {
+							if s[3] == ' ' {
+								addMatch(id+16*n, l+4, l, matches)
+							}
+						} else if s[2] == 's' {
+							if s[3] == ' ' {
+								addMatch(id+47*n, l+4, l, matches)
+							}
+						}
+					} else if s[1] == 'f' {
+						if s[2] == 'o' {
+							if s[3] == 'r' && s[4] == ' ' {
+								addMatch(id+25*n, l+5, l, matches)
+							}
+						} else if s[2] == 'r' {
+							if s[3] == 'o' && s[4] == 'm' && s[5] == ' ' {
+								addMatch(id+37*n, l+6, l, matches)
+							}
+						}
+					} else if s[1] == 'o' {
+						if s[2] == 'f' {
+							if s[3] == ' ' {
+								addMatch(id+8*n, l+4, l, matches)
+							}
+						} else if s[2] == 'n' {
+							if s[3] == ' ' {
+								addMatch(id+45*n, l+4, l, matches)
+							}
+						}
+					} else if s[1] == 'n' {
+						if s[2] == 'o' && s[3] == 't' && s[4] == ' ' {
+							addMatch(id+80*n, l+5, l, matches)
+						}
+					} else if s[1] == 't' {
+						if s[2] == 'h' {
+							if s[3] == 'e' {
+								if s[4] == ' ' {
+									addMatch(id+5*n, l+5, l, matches)
+								}
+							} else if s[3] == 'a' {
+								if s[4] == 't' && s[5] == ' ' {
+									addMatch(id+29*n, l+6, l, matches)
+								}
+							}
+						} else if s[2] == 'o' {
+							if s[3] == ' ' {
+								addMatch(id+17*n, l+4, l, matches)
+							}
+						}
+					} else if s[1] == 'w' {
+						if s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ' {
+							addMatch(id+35*n, l+6, l, matches)
+						}
+					}
+				} else if s[0] == '"' {
+					addMatch(id+19*n, l+1, l, matches)
+					if s[1] == '>' {
+						addMatch(id+21*n, l+2, l, matches)
+					}
+				} else if s[0] == '.' {
+					addMatch(id+20*n, l+1, l, matches)
+					if s[1] == ' ' {
+						addMatch(id+31*n, l+2, l, matches)
+						if s[2] == 'T' && s[3] == 'h' {
+							if s[4] == 'e' {
+								if s[5] == ' ' {
+									addMatch(id+43*n, l+6, l, matches)
+								}
+							} else if s[4] == 'i' {
+								if s[5] == 's' && s[6] == ' ' {
+									addMatch(id+75*n, l+7, l, matches)
+								}
+							}
+						}
+					}
+				} else if s[0] == ',' {
+					addMatch(id+76*n, l+1, l, matches)
+					if s[1] == ' ' {
+						addMatch(id+14*n, l+2, l, matches)
+					}
+				} else if s[0] == '\n' {
+					addMatch(id+22*n, l+1, l, matches)
+					if s[1] == '\t' {
+						addMatch(id+50*n, l+2, l, matches)
+					}
+				} else if s[0] == ']' {
+					addMatch(id+24*n, l+1, l, matches)
+				} else if s[0] == '\'' {
+					addMatch(id+36*n, l+1, l, matches)
+				} else if s[0] == ':' {
+					addMatch(id+51*n, l+1, l, matches)
+				} else if s[0] == '(' {
+					addMatch(id+57*n, l+1, l, matches)
+				} else if s[0] == '=' {
+					if s[1] == '"' {
+						addMatch(id+70*n, l+2, l, matches)
+					} else if s[1] == '\'' {
+						addMatch(id+86*n, l+2, l, matches)
+					}
+				} else if s[0] == 'a' {
+					if s[1] == 'l' && s[2] == ' ' {
+						addMatch(id+84*n, l+3, l, matches)
+					}
+				} else if s[0] == 'e' {
+					if s[1] == 'd' {
+						if s[2] == ' ' {
+							addMatch(id+53*n, l+3, l, matches)
+						}
+					} else if s[1] == 'r' {
+						if s[2] == ' ' {
+							addMatch(id+82*n, l+3, l, matches)
+						}
+					} else if s[1] == 's' {
+						if s[2] == 't' && s[3] == ' ' {
+							addMatch(id+95*n, l+4, l, matches)
+						}
+					}
+				} else if s[0] == 'f' {
+					if s[1] == 'u' && s[2] == 'l' && s[3] == ' ' {
+						addMatch(id+90*n, l+4, l, matches)
+					}
+				} else if s[0] == 'i' {
+					if s[1] == 'v' {
+						if s[2] == 'e' && s[3] == ' ' {
+							addMatch(id+92*n, l+4, l, matches)
+						}
+					} else if s[1] == 'z' {
+						if s[2] == 'e' && s[3] == ' ' {
+							addMatch(id+100*n, l+4, l, matches)
+						}
+					}
+				} else if s[0] == 'l' {
+					if s[1] == 'e' {
+						if s[2] == 's' && s[3] == 's' && s[4] == ' ' {
+							addMatch(id+93*n, l+5, l, matches)
+						}
+					} else if s[1] == 'y' {
+						if s[2] == ' ' {
+							addMatch(id+61*n, l+3, l, matches)
+						}
+					}
+				} else if s[0] == 'o' {
+					if s[1] == 'u' && s[2] == 's' && s[3] == ' ' {
+						addMatch(id+106*n, l+4, l, matches)
+					}
+				}
+			} else {
+				var is_all_caps bool = (w.transform != transformUppercaseFirst)
+				/* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
+				    is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
+				transform. */
+
+				var s []byte
+				if !isMatch(dict.words, w, data, max_length) {
+					continue
+				}
+
+				/* Transform "" + kUppercase{First,All} + "" */
+				var tmp int
+				if is_all_caps {
+					tmp = 44
+				} else {
+					tmp = 9
+				}
+				addMatch(id+uint(tmp)*n, l, l, matches)
+
+				has_found_match = true
+				if l+1 >= max_length {
+					continue
+				}
+
+				/* Transforms "" + kUppercase{First,All} + <suffix> */
+				s = data[l:]
+
+				if s[0] == ' ' {
+					var tmp int
+					if is_all_caps {
+						tmp = 68
+					} else {
+						tmp = 4
+					}
+					addMatch(id+uint(tmp)*n, l+1, l, matches)
+				} else if s[0] == '"' {
+					var tmp int
+					if is_all_caps {
+						tmp = 87
+					} else {
+						tmp = 66
+					}
+					addMatch(id+uint(tmp)*n, l+1, l, matches)
+					if s[1] == '>' {
+						var tmp int
+						if is_all_caps {
+							tmp = 97
+						} else {
+							tmp = 69
+						}
+						addMatch(id+uint(tmp)*n, l+2, l, matches)
+					}
+				} else if s[0] == '.' {
+					var tmp int
+					if is_all_caps {
+						tmp = 101
+					} else {
+						tmp = 79
+					}
+					addMatch(id+uint(tmp)*n, l+1, l, matches)
+					if s[1] == ' ' {
+						var tmp int
+						if is_all_caps {
+							tmp = 114
+						} else {
+							tmp = 88
+						}
+						addMatch(id+uint(tmp)*n, l+2, l, matches)
+					}
+				} else if s[0] == ',' {
+					var tmp int
+					if is_all_caps {
+						tmp = 112
+					} else {
+						tmp = 99
+					}
+					addMatch(id+uint(tmp)*n, l+1, l, matches)
+					if s[1] == ' ' {
+						var tmp int
+						if is_all_caps {
+							tmp = 107
+						} else {
+							tmp = 58
+						}
+						addMatch(id+uint(tmp)*n, l+2, l, matches)
+					}
+				} else if s[0] == '\'' {
+					var tmp int
+					if is_all_caps {
+						tmp = 94
+					} else {
+						tmp = 74
+					}
+					addMatch(id+uint(tmp)*n, l+1, l, matches)
+				} else if s[0] == '(' {
+					var tmp int
+					if is_all_caps {
+						tmp = 113
+					} else {
+						tmp = 78
+					}
+					addMatch(id+uint(tmp)*n, l+1, l, matches)
+				} else if s[0] == '=' {
+					if s[1] == '"' {
+						var tmp int
+						if is_all_caps {
+							tmp = 105
+						} else {
+							tmp = 104
+						}
+						addMatch(id+uint(tmp)*n, l+2, l, matches)
+					} else if s[1] == '\'' {
+						var tmp int
+						if is_all_caps {
+							tmp = 116
+						} else {
+							tmp = 108
+						}
+						addMatch(id+uint(tmp)*n, l+2, l, matches)
+					}
+				}
+			}
+		}
+	}
+
+	/* Transforms with prefixes " " and "." */
+	if max_length >= 5 && (data[0] == ' ' || data[0] == '.') {
+		var is_space bool = (data[0] == ' ')
+		var offset uint = uint(dict.buckets[hash(data[1:])])
+		var end bool = offset == 0
+		for !end {
+			w := dict.dict_words[offset]
+			offset++
+			var l uint = uint(w.len) & 0x1F
+			var n uint = uint(1) << dict.words.size_bits_by_length[l]
+			var id uint = uint(w.idx)
+			end = !(w.len&0x80 == 0)
+			w.len = byte(l)
+			if w.transform == 0 {
+				var s []byte
+				if !isMatch(dict.words, w, data[1:], max_length-1) {
+					continue
+				}
+
+				/* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and
+				   "." + BROTLI_TRANSFORM_IDENTITY + "" */
+				var tmp int
+				if is_space {
+					tmp = 6
+				} else {
+					tmp = 32
+				}
+				addMatch(id+uint(tmp)*n, l+1, l, matches)
+
+				has_found_match = true
+				if l+2 >= max_length {
+					continue
+				}
+
+				/* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and
+				   "." + BROTLI_TRANSFORM_IDENTITY + <suffix>
+				*/
+				s = data[l+1:]
+
+				if s[0] == ' ' {
+					var tmp int
+					if is_space {
+						tmp = 2
+					} else {
+						tmp = 77
+					}
+					addMatch(id+uint(tmp)*n, l+2, l, matches)
+				} else if s[0] == '(' {
+					var tmp int
+					if is_space {
+						tmp = 89
+					} else {
+						tmp = 67
+					}
+					addMatch(id+uint(tmp)*n, l+2, l, matches)
+				} else if is_space {
+					if s[0] == ',' {
+						addMatch(id+103*n, l+2, l, matches)
+						if s[1] == ' ' {
+							addMatch(id+33*n, l+3, l, matches)
+						}
+					} else if s[0] == '.' {
+						addMatch(id+71*n, l+2, l, matches)
+						if s[1] == ' ' {
+							addMatch(id+52*n, l+3, l, matches)
+						}
+					} else if s[0] == '=' {
+						if s[1] == '"' {
+							addMatch(id+81*n, l+3, l, matches)
+						} else if s[1] == '\'' {
+							addMatch(id+98*n, l+3, l, matches)
+						}
+					}
+				}
+			} else if is_space {
+				var is_all_caps bool = (w.transform != transformUppercaseFirst)
+				/* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
+				    is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
+				transform. */
+
+				var s []byte
+				if !isMatch(dict.words, w, data[1:], max_length-1) {
+					continue
+				}
+
+				/* Transforms " " + kUppercase{First,All} + "" */
+				var tmp int
+				if is_all_caps {
+					tmp = 85
+				} else {
+					tmp = 30
+				}
+				addMatch(id+uint(tmp)*n, l+1, l, matches)
+
+				has_found_match = true
+				if l+2 >= max_length {
+					continue
+				}
+
+				/* Transforms " " + kUppercase{First,All} + <suffix> */
+				s = data[l+1:]
+
+				if s[0] == ' ' {
+					var tmp int
+					if is_all_caps {
+						tmp = 83
+					} else {
+						tmp = 15
+					}
+					addMatch(id+uint(tmp)*n, l+2, l, matches)
+				} else if s[0] == ',' {
+					if !is_all_caps {
+						addMatch(id+109*n, l+2, l, matches)
+					}
+
+					if s[1] == ' ' {
+						var tmp int
+						if is_all_caps {
+							tmp = 111
+						} else {
+							tmp = 65
+						}
+						addMatch(id+uint(tmp)*n, l+3, l, matches)
+					}
+				} else if s[0] == '.' {
+					var tmp int
+					if is_all_caps {
+						tmp = 115
+					} else {
+						tmp = 96
+					}
+					addMatch(id+uint(tmp)*n, l+2, l, matches)
+					if s[1] == ' ' {
+						var tmp int
+						if is_all_caps {
+							tmp = 117
+						} else {
+							tmp = 91
+						}
+						addMatch(id+uint(tmp)*n, l+3, l, matches)
+					}
+				} else if s[0] == '=' {
+					if s[1] == '"' {
+						var tmp int
+						if is_all_caps {
+							tmp = 110
+						} else {
+							tmp = 118
+						}
+						addMatch(id+uint(tmp)*n, l+3, l, matches)
+					} else if s[1] == '\'' {
+						var tmp int
+						if is_all_caps {
+							tmp = 119
+						} else {
+							tmp = 120
+						}
+						addMatch(id+uint(tmp)*n, l+3, l, matches)
+					}
+				}
+			}
+		}
+	}
+
+	if max_length >= 6 {
+		/* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */
+		if (data[1] == ' ' && (data[0] == 'e' || data[0] == 's' || data[0] == ',')) || (data[0] == 0xC2 && data[1] == 0xA0) {
+			var offset uint = uint(dict.buckets[hash(data[2:])])
+			var end bool = offset == 0
+			for !end {
+				w := dict.dict_words[offset]
+				offset++
+				var l uint = uint(w.len) & 0x1F
+				var n uint = uint(1) << dict.words.size_bits_by_length[l]
+				var id uint = uint(w.idx)
+				end = !(w.len&0x80 == 0)
+				w.len = byte(l)
+				if w.transform == 0 && isMatch(dict.words, w, data[2:], max_length-2) {
+					if data[0] == 0xC2 {
+						addMatch(id+102*n, l+2, l, matches)
+						has_found_match = true
+					} else if l+2 < max_length && data[l+2] == ' ' {
+						var t uint = 13
+						if data[0] == 'e' {
+							t = 18
+						} else if data[0] == 's' {
+							t = 7
+						}
+						addMatch(id+t*n, l+3, l, matches)
+						has_found_match = true
+					}
+				}
+			}
+		}
+	}
+
+	if max_length >= 9 {
+		/* Transforms with prefixes " the " and ".com/" */
+		if (data[0] == ' ' && data[1] == 't' && data[2] == 'h' && data[3] == 'e' && data[4] == ' ') || (data[0] == '.' && data[1] == 'c' && data[2] == 'o' && data[3] == 'm' && data[4] == '/') {
+			var offset uint = uint(dict.buckets[hash(data[5:])])
+			var end bool = offset == 0
+			for !end {
+				w := dict.dict_words[offset]
+				offset++
+				var l uint = uint(w.len) & 0x1F
+				var n uint = uint(1) << dict.words.size_bits_by_length[l]
+				var id uint = uint(w.idx)
+				end = !(w.len&0x80 == 0)
+				w.len = byte(l)
+				if w.transform == 0 && isMatch(dict.words, w, data[5:], max_length-5) {
+					var tmp int
+					if data[0] == ' ' {
+						tmp = 41
+					} else {
+						tmp = 72
+					}
+					addMatch(id+uint(tmp)*n, l+5, l, matches)
+					has_found_match = true
+					if l+5 < max_length {
+						var s []byte = data[l+5:]
+						if data[0] == ' ' {
+							if l+8 < max_length && s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ' {
+								addMatch(id+62*n, l+9, l, matches)
+								if l+12 < max_length && s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ' {
+									addMatch(id+73*n, l+13, l, matches)
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+
+	return has_found_match
+}
@@ -0,0 +1,22 @@
+package brotli
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Utilities for building Huffman decoding tables. */
+
+type symbolList struct {
+	storage []uint16
+	offset  int
+}
+
+func symbolListGet(sl symbolList, i int) uint16 {
+	return sl.storage[i+sl.offset]
+}
+
+func symbolListPut(sl symbolList, i int, val uint16) {
+	sl.storage[i+sl.offset] = val
+}
@@ -0,0 +1,641 @@
+package brotli
+
+const (
+	transformIdentity       = 0
+	transformOmitLast1      = 1
+	transformOmitLast2      = 2
+	transformOmitLast3      = 3
+	transformOmitLast4      = 4
+	transformOmitLast5      = 5
+	transformOmitLast6      = 6
+	transformOmitLast7      = 7
+	transformOmitLast8      = 8
+	transformOmitLast9      = 9
+	transformUppercaseFirst = 10
+	transformUppercaseAll   = 11
+	transformOmitFirst1     = 12
+	transformOmitFirst2     = 13
+	transformOmitFirst3     = 14
+	transformOmitFirst4     = 15
+	transformOmitFirst5     = 16
+	transformOmitFirst6     = 17
+	transformOmitFirst7     = 18
+	transformOmitFirst8     = 19
+	transformOmitFirst9     = 20
+	transformShiftFirst     = 21
+	transformShiftAll       = 22 + iota - 22
+	numTransformTypes
+)
+
+const transformsMaxCutOff = transformOmitLast9
+
+type transforms struct {
+	prefix_suffix_size uint16
+	prefix_suffix      []byte
+	prefix_suffix_map  []uint16
+	num_transforms     uint32
+	transforms         []byte
+	params             []byte
+	cutOffTransforms   [transformsMaxCutOff + 1]int16
+}
+
+func transformPrefixId(t *transforms, I int) byte {
+	return t.transforms[(I*3)+0]
+}
+
+func transformType(t *transforms, I int) byte {
+	return t.transforms[(I*3)+1]
+}
+
+func transformSuffixId(t *transforms, I int) byte {
+	return t.transforms[(I*3)+2]
+}
+
+func transformPrefix(t *transforms, I int) []byte {
+	return t.prefix_suffix[t.prefix_suffix_map[transformPrefixId(t, I)]:]
+}
+
+func transformSuffix(t *transforms, I int) []byte {
+	return t.prefix_suffix[t.prefix_suffix_map[transformSuffixId(t, I)]:]
+}
+
+/* RFC 7932 transforms string data */
+const kPrefixSuffix string = "\001 \002, \010 of the \004 of \002s \001.\005 and \004 " + "in \001\"\004 to \002\">\001\n\002. \001]\005 for \003 a \006 " + "that \001'\006 with \006 from \004 by \001(\006. T" + "he \004 on \004 as \004 is \004ing \002\n\t\001:\003ed " + "\002=\"\004 at \003ly \001,\002='\005.com/\007. This \005" + " not \003er \003al \004ful \004ive \005less \004es" + "t \004ize \002\xc2\xa0\004ous \005 the \002e \000"
+
+var kPrefixSuffixMap = [50]uint16{
+	0x00,
+	0x02,
+	0x05,
+	0x0E,
+	0x13,
+	0x16,
+	0x18,
+	0x1E,
+	0x23,
+	0x25,
+	0x2A,
+	0x2D,
+	0x2F,
+	0x32,
+	0x34,
+	0x3A,
+	0x3E,
+	0x45,
+	0x47,
+	0x4E,
+	0x55,
+	0x5A,
+	0x5C,
+	0x63,
+	0x68,
+	0x6D,
+	0x72,
+	0x77,
+	0x7A,
+	0x7C,
+	0x80,
+	0x83,
+	0x88,
+	0x8C,
+	0x8E,
+	0x91,
+	0x97,
+	0x9F,
+	0xA5,
+	0xA9,
+	0xAD,
+	0xB2,
+	0xB7,
+	0xBD,
+	0xC2,
+	0xC7,
+	0xCA,
+	0xCF,
+	0xD5,
+	0xD8,
+}
+
+/* RFC 7932 transforms */
+var kTransformsData = []byte{
+	49,
+	transformIdentity,
+	49,
+	49,
+	transformIdentity,
+	0,
+	0,
+	transformIdentity,
+	0,
+	49,
+	transformOmitFirst1,
+	49,
+	49,
+	transformUppercaseFirst,
+	0,
+	49,
+	transformIdentity,
+	47,
+	0,
+	transformIdentity,
+	49,
+	4,
+	transformIdentity,
+	0,
+	49,
+	transformIdentity,
+	3,
+	49,
+	transformUppercaseFirst,
+	49,
+	49,
+	transformIdentity,
+	6,
+	49,
+	transformOmitFirst2,
+	49,
+	49,
+	transformOmitLast1,
+	49,
+	1,
+	transformIdentity,
+	0,
+	49,
+	transformIdentity,
+	1,
+	0,
+	transformUppercaseFirst,
+	0,
+	49,
+	transformIdentity,
+	7,
+	49,
+	transformIdentity,
+	9,
+	48,
+	transformIdentity,
+	0,
+	49,
+	transformIdentity,
+	8,
+	49,
+	transformIdentity,
+	5,
+	49,
+	transformIdentity,
+	10,
+	49,
+	transformIdentity,
+	11,
+	49,
+	transformOmitLast3,
+	49,
+	49,
+	transformIdentity,
+	13,
+	49,
+	transformIdentity,
+	14,
+	49,
+	transformOmitFirst3,
+	49,
+	49,
+	transformOmitLast2,
+	49,
+	49,
+	transformIdentity,
+	15,
+	49,
+	transformIdentity,
+	16,
+	0,
+	transformUppercaseFirst,
+	49,
+	49,
+	transformIdentity,
+	12,
+	5,
+	transformIdentity,
+	49,
+	0,
+	transformIdentity,
+	1,
+	49,
+	transformOmitFirst4,
+	49,
+	49,
+	transformIdentity,
+	18,
+	49,
+	transformIdentity,
+	17,
+	49,
+	transformIdentity,
+	19,
+	49,
+	transformIdentity,
+	20,
+	49,
+	transformOmitFirst5,
+	49,
+	49,
+	transformOmitFirst6,
+	49,
+	47,
+	transformIdentity,
+	49,
+	49,
+	transformOmitLast4,
+	49,
+	49,
+	transformIdentity,
+	22,
+	49,
+	transformUppercaseAll,
+	49,
+	49,
+	transformIdentity,
+	23,
+	49,
+	transformIdentity,
+	24,
+	49,
+	transformIdentity,
+	25,
+	49,
+	transformOmitLast7,
+	49,
+	49,
+	transformOmitLast1,
+	26,
+	49,
+	transformIdentity,
+	27,
+	49,
+	transformIdentity,
+	28,
+	0,
+	transformIdentity,
+	12,
+	49,
+	transformIdentity,
+	29,
+	49,
+	transformOmitFirst9,
+	49,
+	49,
+	transformOmitFirst7,
+	49,
+	49,
+	transformOmitLast6,
+	49,
+	49,
+	transformIdentity,
+	21,
+	49,
+	transformUppercaseFirst,
+	1,
+	49,
+	transformOmitLast8,
+	49,
+	49,
+	transformIdentity,
+	31,
+	49,
+	transformIdentity,
+	32,
+	47,
+	transformIdentity,
+	3,
+	49,
+	transformOmitLast5,
+	49,
+	49,
+	transformOmitLast9,
+	49,
+	0,
+	transformUppercaseFirst,
+	1,
+	49,
+	transformUppercaseFirst,
+	8,
+	5,
+	transformIdentity,
+	21,
+	49,
+	transformUppercaseAll,
+	0,
+	49,
+	transformUppercaseFirst,
+	10,
+	49,
+	transformIdentity,
+	30,
+	0,
+	transformIdentity,
+	5,
+	35,
+	transformIdentity,
+	49,
+	47,
+	transformIdentity,
+	2,
+	49,
+	transformUppercaseFirst,
+	17,
+	49,
+	transformIdentity,
+	36,
+	49,
+	transformIdentity,
+	33,
+	5,
+	transformIdentity,
+	0,
+	49,
+	transformUppercaseFirst,
+	21,
+	49,
+	transformUppercaseFirst,
+	5,
+	49,
+	transformIdentity,
+	37,
+	0,
+	transformIdentity,
+	30,
+	49,
+	transformIdentity,
+	38,
+	0,
+	transformUppercaseAll,
+	0,
+	49,
+	transformIdentity,
+	39,
+	0,
+	transformUppercaseAll,
+	49,
+	49,
+	transformIdentity,
+	34,
+	49,
+	transformUppercaseAll,
+	8,
+	49,
+	transformUppercaseFirst,
+	12,
+	0,
+	transformIdentity,
+	21,
+	49,
+	transformIdentity,
+	40,
+	0,
+	transformUppercaseFirst,
+	12,
+	49,
+	transformIdentity,
+	41,
+	49,
+	transformIdentity,
+	42,
+	49,
+	transformUppercaseAll,
+	17,
+	49,
+	transformIdentity,
+	43,
+	0,
+	transformUppercaseFirst,
+	5,
+	49,
+	transformUppercaseAll,
+	10,
+	0,
+	transformIdentity,
+	34,
+	49,
+	transformUppercaseFirst,
+	33,
+	49,
+	transformIdentity,
+	44,
+	49,
+	transformUppercaseAll,
+	5,
+	45,
+	transformIdentity,
+	49,
+	0,
+	transformIdentity,
+	33,
+	49,
+	transformUppercaseFirst,
+	30,
+	49,
+	transformUppercaseAll,
+	30,
+	49,
+	transformIdentity,
+	46,
+	49,
+	transformUppercaseAll,
+	1,
+	49,
+	transformUppercaseFirst,
+	34,
+	0,
+	transformUppercaseFirst,
+	33,
+	0,
+	transformUppercaseAll,
+	30,
+	0,
+	transformUppercaseAll,
+	1,
+	49,
+	transformUppercaseAll,
+	33,
+	49,
+	transformUppercaseAll,
+	21,
+	49,
+	transformUppercaseAll,
+	12,
+	0,
+	transformUppercaseAll,
+	5,
+	49,
+	transformUppercaseAll,
+	34,
+	0,
+	transformUppercaseAll,
+	12,
+	0,
+	transformUppercaseFirst,
+	30,
+	0,
+	transformUppercaseAll,
+	34,
+	0,
+	transformUppercaseFirst,
+	34,
+}
+
+var kBrotliTransforms = transforms{
+	217,
+	[]byte(kPrefixSuffix),
+	kPrefixSuffixMap[:],
+	121,
+	kTransformsData,
+	nil, /* no extra parameters */
+	[transformsMaxCutOff + 1]int16{0, 12, 27, 23, 42, 63, 56, 48, 59, 64},
+}
+
+func getTransforms() *transforms {
+	return &kBrotliTransforms
+}
+
+func toUpperCase(p []byte) int {
+	if p[0] < 0xC0 {
+		if p[0] >= 'a' && p[0] <= 'z' {
+			p[0] ^= 32
+		}
+
+		return 1
+	}
+
+	/* An overly simplified uppercasing model for UTF-8. */
+	if p[0] < 0xE0 {
+		p[1] ^= 32
+		return 2
+	}
+
+	/* An arbitrary transform for three byte characters. */
+	p[2] ^= 5
+
+	return 3
+}
+
+func shiftTransform(word []byte, word_len int, parameter uint16) int {
+	/* Limited sign extension: scalar < (1 << 24). */
+	var scalar uint32 = (uint32(parameter) & 0x7FFF) + (0x1000000 - (uint32(parameter) & 0x8000))
+	if word[0] < 0x80 {
+		/* 1-byte rune / 0sssssss / 7 bit scalar (ASCII). */
+		scalar += uint32(word[0])
+
+		word[0] = byte(scalar & 0x7F)
+		return 1
+	} else if word[0] < 0xC0 {
+		/* Continuation / 10AAAAAA. */
+		return 1
+	} else if word[0] < 0xE0 {
+		/* 2-byte rune / 110sssss AAssssss / 11 bit scalar. */
+		if word_len < 2 {
+			return 1
+		}
+		scalar += uint32(word[1]&0x3F | (word[0]&0x1F)<<6)
+		word[0] = byte(0xC0 | (scalar>>6)&0x1F)
+		word[1] = byte(uint32(word[1]&0xC0) | scalar&0x3F)
+		return 2
+	} else if word[0] < 0xF0 {
+		/* 3-byte rune / 1110ssss AAssssss BBssssss / 16 bit scalar. */
+		if word_len < 3 {
+			return word_len
+		}
+		scalar += uint32(word[2])&0x3F | uint32(word[1]&0x3F)<<6 | uint32(word[0]&0x0F)<<12
+		word[0] = byte(0xE0 | (scalar>>12)&0x0F)
+		word[1] = byte(uint32(word[1]&0xC0) | (scalar>>6)&0x3F)
+		word[2] = byte(uint32(word[2]&0xC0) | scalar&0x3F)
+		return 3
+	} else if word[0] < 0xF8 {
+		/* 4-byte rune / 11110sss AAssssss BBssssss CCssssss / 21 bit scalar. */
+		if word_len < 4 {
+			return word_len
+		}
+		scalar += uint32(word[3])&0x3F | uint32(word[2]&0x3F)<<6 | uint32(word[1]&0x3F)<<12 | uint32(word[0]&0x07)<<18
+		word[0] = byte(0xF0 | (scalar>>18)&0x07)
+		word[1] = byte(uint32(word[1]&0xC0) | (scalar>>12)&0x3F)
+		word[2] = byte(uint32(word[2]&0xC0) | (scalar>>6)&0x3F)
+		word[3] = byte(uint32(word[3]&0xC0) | scalar&0x3F)
+		return 4
+	}
+
+	return 1
+}
+
+func transformDictionaryWord(dst []byte, word []byte, len int, trans *transforms, transform_idx int) int {
+	var idx int = 0
+	var prefix []byte = transformPrefix(trans, transform_idx)
+	var type_ byte = transformType(trans, transform_idx)
+	var suffix []byte = transformSuffix(trans, transform_idx)
+	{
+		var prefix_len int = int(prefix[0])
+		prefix = prefix[1:]
+		for {
+			tmp1 := prefix_len
+			prefix_len--
+			if tmp1 == 0 {
+				break
+			}
+			dst[idx] = prefix[0]
+			idx++
+			prefix = prefix[1:]
+		}
+	}
+	{
+		var t int = int(type_)
+		var i int = 0
+		if t <= transformOmitLast9 {
+			len -= t
+		} else if t >= transformOmitFirst1 && t <= transformOmitFirst9 {
+			var skip int = t - (transformOmitFirst1 - 1)
+			word = word[skip:]
+			len -= skip
+		}
+
+		for i < len {
+			dst[idx] = word[i]
+			idx++
+			i++
+		}
+		if t == transformUppercaseFirst {
+			toUpperCase(dst[idx-len:])
+		} else if t == transformUppercaseAll {
+			var uppercase []byte = dst
+			uppercase = uppercase[idx-len:]
+			for len > 0 {
+				var step int = toUpperCase(uppercase)
+				uppercase = uppercase[step:]
+				len -= step
+			}
+		} else if t == transformShiftFirst {
+			var param uint16 = uint16(trans.params[transform_idx*2]) + uint16(trans.params[transform_idx*2+1])<<8
+			shiftTransform(dst[idx-len:], int(len), param)
+		} else if t == transformShiftAll {
+			var param uint16 = uint16(trans.params[transform_idx*2]) + uint16(trans.params[transform_idx*2+1])<<8
+			var shift []byte = dst
+			shift = shift[idx-len:]
+			for len > 0 {
+				var step int = shiftTransform(shift, int(len), param)
+				shift = shift[step:]
+				len -= step
+			}
+		}
+	}
+	{
+		var suffix_len int = int(suffix[0])
+		suffix = suffix[1:]
+		for {
+			tmp2 := suffix_len
+			suffix_len--
+			if tmp2 == 0 {
+				break
+			}
+			dst[idx] = suffix[0]
+			idx++
+			suffix = suffix[1:]
+		}
+		return idx
+	}
+}
@@ -0,0 +1,70 @@
+package brotli
+
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Heuristics for deciding about the UTF8-ness of strings. */
+
+const kMinUTF8Ratio float64 = 0.75
+
+/* Returns 1 if at least min_fraction of the bytes between pos and
+   pos + length in the (data, mask) ring-buffer is UTF8-encoded, otherwise
+   returns 0. */
+func parseAsUTF8(symbol *int, input []byte, size uint) uint {
+	/* ASCII */
+	if input[0]&0x80 == 0 {
+		*symbol = int(input[0])
+		if *symbol > 0 {
+			return 1
+		}
+	}
+
+	/* 2-byte UTF8 */
+	if size > 1 && input[0]&0xE0 == 0xC0 && input[1]&0xC0 == 0x80 {
+		*symbol = (int(input[0])&0x1F)<<6 | int(input[1])&0x3F
+		if *symbol > 0x7F {
+			return 2
+		}
+	}
+
+	/* 3-byte UFT8 */
+	if size > 2 && input[0]&0xF0 == 0xE0 && input[1]&0xC0 == 0x80 && input[2]&0xC0 == 0x80 {
+		*symbol = (int(input[0])&0x0F)<<12 | (int(input[1])&0x3F)<<6 | int(input[2])&0x3F
+		if *symbol > 0x7FF {
+			return 3
+		}
+	}
+
+	/* 4-byte UFT8 */
+	if size > 3 && input[0]&0xF8 == 0xF0 && input[1]&0xC0 == 0x80 && input[2]&0xC0 == 0x80 && input[3]&0xC0 == 0x80 {
+		*symbol = (int(input[0])&0x07)<<18 | (int(input[1])&0x3F)<<12 | (int(input[2])&0x3F)<<6 | int(input[3])&0x3F
+		if *symbol > 0xFFFF && *symbol <= 0x10FFFF {
+			return 4
+		}
+	}
+
+	/* Not UTF8, emit a special symbol above the UTF8-code space */
+	*symbol = 0x110000 | int(input[0])
+
+	return 1
+}
+
+/* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/
+func isMostlyUTF8(data []byte, pos uint, mask uint, length uint, min_fraction float64) bool {
+	var size_utf8 uint = 0
+	var i uint = 0
+	for i < length {
+		var symbol int
+		current_data := data[(pos+i)&mask:]
+		var bytes_read uint = parseAsUTF8(&symbol, current_data, length-i)
+		i += bytes_read
+		if symbol < 0x110000 {
+			size_utf8 += bytes_read
+		}
+	}
+
+	return float64(size_utf8) > min_fraction*float64(length)
+}
@@ -0,0 +1,7 @@
+package brotli
+
+func assert(cond bool) {
+	if !cond {
+		panic("assertion failure")
+	}
+}
@@ -0,0 +1,52 @@
+package brotli
+
+import "encoding/binary"
+
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Write bits into a byte array. */
+
+/* This function writes bits into bytes in increasing addresses, and within
+   a byte least-significant-bit first.
+
+   The function can write up to 56 bits in one go with WriteBits
+   Example: let's assume that 3 bits (Rs below) have been written already:
+
+   BYTE-0     BYTE+1       BYTE+2
+
+   0000 0RRR    0000 0000    0000 0000
+
+   Now, we could write 5 or less bits in MSB by just sifting by 3
+   and OR'ing to BYTE-0.
+
+   For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
+   and locate the rest in BYTE+1, BYTE+2, etc. */
+func writeBits(n_bits uint, bits uint64, pos *uint, array []byte) {
+	/* This branch of the code can write up to 56 bits at a time,
+	   7 bits are lost by being perhaps already in *p and at least
+	   1 bit is needed to initialize the bit-stream ahead (i.e. if 7
+	   bits are in *p and we write 57 bits, then the next write will
+	   access a byte that was never initialized). */
+	p := array[*pos>>3:]
+	v := uint64(p[0])
+	v |= bits << (*pos & 7)
+	binary.LittleEndian.PutUint64(p, v)
+	*pos += n_bits
+}
+
+func writeSingleBit(bit bool, pos *uint, array []byte) {
+	if bit {
+		writeBits(1, 1, pos, array)
+	} else {
+		writeBits(1, 0, pos, array)
+	}
+}
+
+func writeBitsPrepareStorage(pos uint, array []byte) {
+	assert(pos&7 == 0)
+	array[pos>>3] = 0
+}
@@ -0,0 +1,162 @@
+package brotli
+
+import (
+	"errors"
+	"io"
+
+	"github.com/andybalholm/brotli/matchfinder"
+)
+
+const (
+	BestSpeed          = 0
+	BestCompression    = 11
+	DefaultCompression = 6
+)
+
+// WriterOptions configures Writer.
+type WriterOptions struct {
+	// Quality controls the compression-speed vs compression-density trade-offs.
+	// The higher the quality, the slower the compression. Range is 0 to 11.
+	Quality int
+	// LGWin is the base 2 logarithm of the sliding window size.
+	// Range is 10 to 24. 0 indicates automatic configuration based on Quality.
+	LGWin int
+}
+
+var (
+	errEncode       = errors.New("brotli: encode error")
+	errWriterClosed = errors.New("brotli: Writer is closed")
+)
+
+// Writes to the returned writer are compressed and written to dst.
+// It is the caller's responsibility to call Close on the Writer when done.
+// Writes may be buffered and not flushed until Close.
+func NewWriter(dst io.Writer) *Writer {
+	return NewWriterLevel(dst, DefaultCompression)
+}
+
+// NewWriterLevel is like NewWriter but specifies the compression level instead
+// of assuming DefaultCompression.
+// The compression level can be DefaultCompression or any integer value between
+// BestSpeed and BestCompression inclusive.
+func NewWriterLevel(dst io.Writer, level int) *Writer {
+	return NewWriterOptions(dst, WriterOptions{
+		Quality: level,
+	})
+}
+
+// NewWriterOptions is like NewWriter but specifies WriterOptions
+func NewWriterOptions(dst io.Writer, options WriterOptions) *Writer {
+	w := new(Writer)
+	w.options = options
+	w.Reset(dst)
+	return w
+}
+
+// Reset discards the Writer's state and makes it equivalent to the result of
+// its original state from NewWriter or NewWriterLevel, but writing to dst
+// instead. This permits reusing a Writer rather than allocating a new one.
+func (w *Writer) Reset(dst io.Writer) {
+	encoderInitState(w)
+	w.params.quality = w.options.Quality
+	if w.options.LGWin > 0 {
+		w.params.lgwin = uint(w.options.LGWin)
+	}
+	w.dst = dst
+	w.err = nil
+}
+
+func (w *Writer) writeChunk(p []byte, op int) (n int, err error) {
+	if w.dst == nil {
+		return 0, errWriterClosed
+	}
+	if w.err != nil {
+		return 0, w.err
+	}
+
+	for {
+		availableIn := uint(len(p))
+		nextIn := p
+		success := encoderCompressStream(w, op, &availableIn, &nextIn)
+		bytesConsumed := len(p) - int(availableIn)
+		p = p[bytesConsumed:]
+		n += bytesConsumed
+		if !success {
+			return n, errEncode
+		}
+
+		if len(p) == 0 || w.err != nil {
+			return n, w.err
+		}
+	}
+}
+
+// Flush outputs encoded data for all input provided to Write. The resulting
+// output can be decoded to match all input before Flush, but the stream is
+// not yet complete until after Close.
+// Flush has a negative impact on compression.
+func (w *Writer) Flush() error {
+	_, err := w.writeChunk(nil, operationFlush)
+	return err
+}
+
+// Close flushes remaining data to the decorated writer.
+func (w *Writer) Close() error {
+	// If stream is already closed, it is reported by `writeChunk`.
+	_, err := w.writeChunk(nil, operationFinish)
+	w.dst = nil
+	return err
+}
+
+// Write implements io.Writer. Flush or Close must be called to ensure that the
+// encoded bytes are actually flushed to the underlying Writer.
+func (w *Writer) Write(p []byte) (n int, err error) {
+	return w.writeChunk(p, operationProcess)
+}
+
+type nopCloser struct {
+	io.Writer
+}
+
+func (nopCloser) Close() error { return nil }
+
+// NewWriterV2 is like NewWriterLevel, but it uses the new implementation
+// based on the matchfinder package. It currently supports up to level 7;
+// if a higher level is specified, level 7 will be used.
+func NewWriterV2(dst io.Writer, level int) *matchfinder.Writer {
+	var mf matchfinder.MatchFinder
+	if level < 2 {
+		mf = matchfinder.M0{Lazy: level == 1}
+	} else {
+		hashLen := 6
+		if level >= 6 {
+			hashLen = 5
+		}
+		chainLen := 64
+		switch level {
+		case 2:
+			chainLen = 0
+		case 3:
+			chainLen = 1
+		case 4:
+			chainLen = 2
+		case 5:
+			chainLen = 4
+		case 6:
+			chainLen = 8
+		}
+		mf = &matchfinder.M4{
+			MaxDistance:     1 << 20,
+			ChainLength:     chainLen,
+			HashLen:         hashLen,
+			DistanceBitCost: 57,
+		}
+	}
+
+	return &matchfinder.Writer{
+		Dest:        dst,
+		MatchFinder: mf,
+		Encoder:     &Encoder{},
+		BlockSize:   1 << 16,
+	}
+}
@@ -0,0 +1,54 @@
+*.o
+*.swp
+*.swm
+*.swn
+*.a
+*.so
+_obj
+_test
+*.[568vq]
+[568vq].out
+*.cgo1.go
+*.cgo2.c
+_cgo_defun.c
+_cgo_gotypes.go
+_cgo_export.*
+_testmain.go
+*.exe
+*.exe~
+*.test
+*.prof
+*.rar
+*.zip
+*.gz
+*.psd
+*.bmd
+*.cfg
+*.pptx
+*.log
+*nohup.out
+*settings.pyc
+*.sublime-project
+*.sublime-workspace
+.DS_Store
+/.idea/
+/.vscode/
+/output/
+/vendor/
+/Gopkg.lock
+/Gopkg.toml
+coverage.html
+coverage.out
+coverage.xml
+junit.xml
+*.profile
+*.svg
+*.out
+ast/test.out
+ast/bench.sh
+
+!testdata/*.json.gz
+fuzz/testdata
+*__debug_bin*
+*pprof
+*coverage.txt
@@ -0,0 +1,6 @@
+[submodule "cloudwego"]
+	path = tools/asm2asm
+	url = https://github.com/cloudwego/asm2asm.git
+[submodule "tools/simde"]
+	path = tools/simde
+	url = https://github.com/simd-everywhere/simde.git
@@ -0,0 +1,24 @@
+header:
+  license:
+    spdx-id: Apache-2.0
+    copyright-owner: ByteDance Inc.
+
+  paths:
+    - '**/*.go'
+    - '**/*.s'
+
+  paths-ignore:
+    - 'ast/asm.s'                                   # empty file
+    - 'decoder/asm.s'                               # empty file
+    - 'encoder/asm.s'                               # empty file
+    - 'internal/caching/asm.s'                      # empty file
+    - 'internal/jit/asm.s'                          # empty file
+    - 'internal/native/avx/native_amd64.s'          # auto-generated by asm2asm
+    - 'internal/native/avx/native_subr_amd64.go'    # auto-generated by asm2asm
+    - 'internal/native/avx2/native_amd64.s'         # auto-generated by asm2asm
+    - 'internal/native/avx2/native_subr_amd64.go'   # auto-generated by asm2asm
+    - 'internal/resolver/asm.s'                     # empty file
+    - 'internal/rt/asm.s'                           # empty file
+    - 'internal/loader/asm.s'                       # empty file
+
+  comment: on-failure
@@ -0,0 +1,128 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, religion, or sexual identity
+and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment for our
+community include:
+
+* Demonstrating empathy and kindness toward other people
+* Being respectful of differing opinions, viewpoints, and experiences
+* Giving and gracefully accepting constructive feedback
+* Accepting responsibility and apologizing to those affected by our mistakes,
+  and learning from the experience
+* Focusing on what is best not just for us as individuals, but for the
+  overall community
+
+Examples of unacceptable behavior include:
+
+* The use of sexualized language or imagery, and sexual attention or
+  advances of any kind
+* Trolling, insulting or derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or email
+  address, without their explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Enforcement Responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+
+## Scope
+
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official e-mail address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+wudi.daniel@bytedance.com.
+All complaints will be reviewed and investigated promptly and fairly.
+
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+
+## Enforcement Guidelines
+
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+
+### 1. Correction
+
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+
+**Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+
+### 2. Warning
+
+**Community Impact**: A violation through a single incident or series
+of actions.
+
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or
+permanent ban.
+
+### 3. Temporary Ban
+
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+
+### 4. Permanent Ban
+
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior,  harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+
+**Consequence**: A permanent ban from any sort of public interaction within
+the community.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.0, available at
+https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
+
+Community Impact Guidelines were inspired by [Mozilla's code of conduct
+enforcement ladder](https://github.com/mozilla/diversity).
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see the FAQ at
+https://www.contributor-covenant.org/faq. Translations are available at
+https://www.contributor-covenant.org/translations.
@@ -0,0 +1,63 @@
+# How to Contribute
+
+## Your First Pull Request
+We use GitHub for our codebase. You can start by reading [How To Pull Request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests).
+
+## Without Semantic Versioning
+We keep the stable code in branch `main` like `golang.org/x`. Development base on branch `develop`. We promise the **Forward Compatibility** by adding new package directory with suffix `v2/v3` when code has break changes.
+
+## Branch Organization
+We use [git-flow](https://nvie.com/posts/a-successful-git-branching-model/) as our branch organization, as known as [FDD](https://en.wikipedia.org/wiki/Feature-driven_development)
+
+
+## Bugs
+### 1. How to Find Known Issues
+We are using [Github Issues](https://github.com/bytedance/sonic/issues) for our public bugs. We keep a close eye on this and try to make it clear when we have an internal fix in progress. Before filing a new task, try to make sure your problem doesn’t already exist.
+
+### 2. Reporting New Issues
+Providing a reduced test code is a recommended way for reporting issues. Then can be placed in:
+- Just in issues
+- [Golang Playground](https://play.golang.org/)
+
+### 3. Security Bugs
+Please do not report the safe disclosure of bugs to public issues. Contact us by [Support Email](mailto:sonic@bytedance.com)
+
+## How to Get in Touch
+- [Email](mailto:wudi.daniel@bytedance.com)
+
+## Submit a Pull Request
+Before you submit your Pull Request (PR) consider the following guidelines:
+1. Search [GitHub](https://github.com/bytedance/sonic/pulls) for an open or closed PR that relates to your submission. You don't want to duplicate existing efforts.
+2. Be sure that an issue describes the problem you're fixing, or documents the design for the feature you'd like to add. Discussing the design upfront helps to ensure that we're ready to accept your work.
+3. [Fork](https://docs.github.com/en/github/getting-started-with-github/fork-a-repo) the bytedance/sonic repo.
+4. In your forked repository, make your changes in a new git branch:
+    ```
+    git checkout -b bugfix/security_bug develop
+    ```
+5. Create your patch, including appropriate test cases.
+6. Follow our [Style Guides](#code-style-guides).
+7. Commit your changes using a descriptive commit message that follows [AngularJS Git Commit Message Conventions](https://docs.google.com/document/d/1QrDFcIiPjSLDn3EL15IJygNPiHORgU1_OOAqWjiDU5Y/edit).
+   Adherence to these conventions is necessary because release notes will be automatically generated from these messages.
+8. Push your branch to GitHub:
+    ```
+    git push origin bugfix/security_bug
+    ```
+9. In GitHub, send a pull request to `sonic:main`
+
+Note: you must use one of `optimize/feature/bugfix/doc/ci/test/refactor` following a slash(`/`) as the branch prefix.
+
+Your pr title and commit message should follow https://www.conventionalcommits.org/.
+
+## Contribution Prerequisites
+- Our development environment keeps up with [Go Official](https://golang.org/project/).
+- You need fully checking with lint tools before submit your pull request. [gofmt](https://golang.org/pkg/cmd/gofmt/) & [golangci-lint](https://github.com/golangci/golangci-lint)
+- You are familiar with [Github](https://github.com) 
+- Maybe you need familiar with [Actions](https://github.com/features/actions)(our default workflow tool).
+
+## Code Style Guides
+See [Go Code Review Comments](https://github.com/golang/go/wiki/CodeReviewComments).
+
+Good resources:
+- [Effective Go](https://golang.org/doc/effective_go)
+- [Pingcap General advice](https://pingcap.github.io/style-guide/general.html)
+- [Uber Go Style Guide](https://github.com/uber-go/guide/blob/master/style.md)
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
@@ -0,0 +1,487 @@
+# Sonic
+
+English | [中文](README_ZH_CN.md)
+
+A blazingly fast JSON serializing &amp; deserializing library, accelerated by JIT (just-in-time compiling) and SIMD (single-instruction-multiple-data).
+
+## Requirement
+
+- Go: 1.17~1.23
+- OS: Linux / MacOS / Windows
+- CPU: AMD64 / ARM64(need go1.20 above)
+
+## Features
+
+- Runtime object binding without code generation
+- Complete APIs for JSON value manipulation
+- Fast, fast, fast!
+
+## APIs
+
+see [go.dev](https://pkg.go.dev/github.com/bytedance/sonic)
+
+## Benchmarks
+
+For **all sizes** of json and **all scenarios** of usage, **Sonic performs best**.
+
+- [Medium](https://github.com/bytedance/sonic/blob/main/decoder/testdata_test.go#L19) (13KB, 300+ key, 6 layers)
+
+```powershell
+goversion: 1.17.1
+goos: darwin
+goarch: amd64
+cpu: Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
+BenchmarkEncoder_Generic_Sonic-16                      32393 ns/op         402.40 MB/s       11965 B/op          4 allocs/op
+BenchmarkEncoder_Generic_Sonic_Fast-16                 21668 ns/op         601.57 MB/s       10940 B/op          4 allocs/op
+BenchmarkEncoder_Generic_JsonIter-16                   42168 ns/op         309.12 MB/s       14345 B/op        115 allocs/op
+BenchmarkEncoder_Generic_GoJson-16                     65189 ns/op         199.96 MB/s       23261 B/op         16 allocs/op
+BenchmarkEncoder_Generic_StdLib-16                    106322 ns/op         122.60 MB/s       49136 B/op        789 allocs/op
+BenchmarkEncoder_Binding_Sonic-16                       6269 ns/op        2079.26 MB/s       14173 B/op          4 allocs/op
+BenchmarkEncoder_Binding_Sonic_Fast-16                  5281 ns/op        2468.16 MB/s       12322 B/op          4 allocs/op
+BenchmarkEncoder_Binding_JsonIter-16                   20056 ns/op         649.93 MB/s        9488 B/op          2 allocs/op
+BenchmarkEncoder_Binding_GoJson-16                      8311 ns/op        1568.32 MB/s        9481 B/op          1 allocs/op
+BenchmarkEncoder_Binding_StdLib-16                     16448 ns/op         792.52 MB/s        9479 B/op          1 allocs/op
+BenchmarkEncoder_Parallel_Generic_Sonic-16              6681 ns/op        1950.93 MB/s       12738 B/op          4 allocs/op
+BenchmarkEncoder_Parallel_Generic_Sonic_Fast-16         4179 ns/op        3118.99 MB/s       10757 B/op          4 allocs/op
+BenchmarkEncoder_Parallel_Generic_JsonIter-16           9861 ns/op        1321.84 MB/s       14362 B/op        115 allocs/op
+BenchmarkEncoder_Parallel_Generic_GoJson-16            18850 ns/op         691.52 MB/s       23278 B/op         16 allocs/op
+BenchmarkEncoder_Parallel_Generic_StdLib-16            45902 ns/op         283.97 MB/s       49174 B/op        789 allocs/op
+BenchmarkEncoder_Parallel_Binding_Sonic-16              1480 ns/op        8810.09 MB/s       13049 B/op          4 allocs/op
+BenchmarkEncoder_Parallel_Binding_Sonic_Fast-16         1209 ns/op        10785.23 MB/s      11546 B/op          4 allocs/op
+BenchmarkEncoder_Parallel_Binding_JsonIter-16           6170 ns/op        2112.58 MB/s        9504 B/op          2 allocs/op
+BenchmarkEncoder_Parallel_Binding_GoJson-16             3321 ns/op        3925.52 MB/s        9496 B/op          1 allocs/op
+BenchmarkEncoder_Parallel_Binding_StdLib-16             3739 ns/op        3486.49 MB/s        9480 B/op          1 allocs/op
+
+BenchmarkDecoder_Generic_Sonic-16                      66812 ns/op         195.10 MB/s       57602 B/op        723 allocs/op
+BenchmarkDecoder_Generic_Sonic_Fast-16                 54523 ns/op         239.07 MB/s       49786 B/op        313 allocs/op
+BenchmarkDecoder_Generic_StdLib-16                    124260 ns/op         104.90 MB/s       50869 B/op        772 allocs/op
+BenchmarkDecoder_Generic_JsonIter-16                   91274 ns/op         142.81 MB/s       55782 B/op       1068 allocs/op
+BenchmarkDecoder_Generic_GoJson-16                     88569 ns/op         147.17 MB/s       66367 B/op        973 allocs/op
+BenchmarkDecoder_Binding_Sonic-16                      32557 ns/op         400.38 MB/s       28302 B/op        137 allocs/op
+BenchmarkDecoder_Binding_Sonic_Fast-16                 28649 ns/op         455.00 MB/s       24999 B/op         34 allocs/op
+BenchmarkDecoder_Binding_StdLib-16                    111437 ns/op         116.97 MB/s       10576 B/op        208 allocs/op
+BenchmarkDecoder_Binding_JsonIter-16                   35090 ns/op         371.48 MB/s       14673 B/op        385 allocs/op
+BenchmarkDecoder_Binding_GoJson-16                     28738 ns/op         453.59 MB/s       22039 B/op         49 allocs/op
+BenchmarkDecoder_Parallel_Generic_Sonic-16             12321 ns/op        1057.91 MB/s       57233 B/op        723 allocs/op
+BenchmarkDecoder_Parallel_Generic_Sonic_Fast-16        10644 ns/op        1224.64 MB/s       49362 B/op        313 allocs/op
+BenchmarkDecoder_Parallel_Generic_StdLib-16            57587 ns/op         226.35 MB/s       50874 B/op        772 allocs/op
+BenchmarkDecoder_Parallel_Generic_JsonIter-16          38666 ns/op         337.12 MB/s       55789 B/op       1068 allocs/op
+BenchmarkDecoder_Parallel_Generic_GoJson-16            30259 ns/op         430.79 MB/s       66370 B/op        974 allocs/op
+BenchmarkDecoder_Parallel_Binding_Sonic-16              5965 ns/op        2185.28 MB/s       27747 B/op        137 allocs/op
+BenchmarkDecoder_Parallel_Binding_Sonic_Fast-16         5170 ns/op        2521.31 MB/s       24715 B/op         34 allocs/op
+BenchmarkDecoder_Parallel_Binding_StdLib-16            27582 ns/op         472.58 MB/s       10576 B/op        208 allocs/op
+BenchmarkDecoder_Parallel_Binding_JsonIter-16          13571 ns/op         960.51 MB/s       14685 B/op        385 allocs/op
+BenchmarkDecoder_Parallel_Binding_GoJson-16            10031 ns/op        1299.51 MB/s       22111 B/op         49 allocs/op
+
+BenchmarkGetOne_Sonic-16                                3276 ns/op        3975.78 MB/s          24 B/op          1 allocs/op
+BenchmarkGetOne_Gjson-16                                9431 ns/op        1380.81 MB/s           0 B/op          0 allocs/op
+BenchmarkGetOne_Jsoniter-16                            51178 ns/op         254.46 MB/s       27936 B/op        647 allocs/op
+BenchmarkGetOne_Parallel_Sonic-16                      216.7 ns/op       60098.95 MB/s          24 B/op          1 allocs/op
+BenchmarkGetOne_Parallel_Gjson-16                       1076 ns/op        12098.62 MB/s          0 B/op          0 allocs/op
+BenchmarkGetOne_Parallel_Jsoniter-16                   17741 ns/op         734.06 MB/s       27945 B/op        647 allocs/op
+BenchmarkSetOne_Sonic-16                               9571 ns/op         1360.61 MB/s        1584 B/op         17 allocs/op
+BenchmarkSetOne_Sjson-16                               36456 ns/op         357.22 MB/s       52180 B/op          9 allocs/op
+BenchmarkSetOne_Jsoniter-16                            79475 ns/op         163.86 MB/s       45862 B/op        964 allocs/op
+BenchmarkSetOne_Parallel_Sonic-16                      850.9 ns/op       15305.31 MB/s        1584 B/op         17 allocs/op
+BenchmarkSetOne_Parallel_Sjson-16                      18194 ns/op         715.77 MB/s       52247 B/op          9 allocs/op
+BenchmarkSetOne_Parallel_Jsoniter-16                   33560 ns/op         388.05 MB/s       45892 B/op        964 allocs/op
+BenchmarkLoadNode/LoadAll()-16                         11384 ns/op        1143.93 MB/s        6307 B/op         25 allocs/op
+BenchmarkLoadNode_Parallel/LoadAll()-16                 5493 ns/op        2370.68 MB/s        7145 B/op         25 allocs/op
+BenchmarkLoadNode/Interface()-16                       17722 ns/op         734.85 MB/s       13323 B/op         88 allocs/op
+BenchmarkLoadNode_Parallel/Interface()-16              10330 ns/op        1260.70 MB/s       15178 B/op         88 allocs/op
+```
+
+- [Small](https://github.com/bytedance/sonic/blob/main/testdata/small.go) (400B, 11 keys, 3 layers)
+![small benchmarks](./docs/imgs/bench-small.png)
+- [Large](https://github.com/bytedance/sonic/blob/main/testdata/twitter.json) (635KB, 10000+ key, 6 layers)
+![large benchmarks](./docs/imgs/bench-large.png)
+
+See [bench.sh](https://github.com/bytedance/sonic/blob/main/scripts/bench.sh) for benchmark codes.
+
+## How it works
+
+See [INTRODUCTION.md](./docs/INTRODUCTION.md).
+
+## Usage
+
+### Marshal/Unmarshal
+
+Default behaviors are mostly consistent with `encoding/json`, except HTML escaping form (see [Escape HTML](https://github.com/bytedance/sonic/blob/main/README.md#escape-html)) and `SortKeys` feature (optional support see [Sort Keys](https://github.com/bytedance/sonic/blob/main/README.md#sort-keys)) that is **NOT** in conformity to [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259).
+
+ ```go
+import "github.com/bytedance/sonic"
+
+var data YourSchema
+// Marshal
+output, err := sonic.Marshal(&data)
+// Unmarshal
+err := sonic.Unmarshal(output, &data)
+ ```
+
+### Streaming IO
+
+Sonic supports decoding json from `io.Reader` or encoding objects into `io.Writer`, aims at handling multiple values as well as reducing memory consumption.
+
+- encoder
+
+```go
+var o1 = map[string]interface{}{
+    "a": "b",
+}
+var o2 = 1
+var w = bytes.NewBuffer(nil)
+var enc = sonic.ConfigDefault.NewEncoder(w)
+enc.Encode(o1)
+enc.Encode(o2)
+fmt.Println(w.String())
+// Output:
+// {"a":"b"}
+// 1
+```
+
+- decoder
+
+```go
+var o =  map[string]interface{}{}
+var r = strings.NewReader(`{"a":"b"}{"1":"2"}`)
+var dec = sonic.ConfigDefault.NewDecoder(r)
+dec.Decode(&o)
+dec.Decode(&o)
+fmt.Printf("%+v", o)
+// Output:
+// map[1:2 a:b]
+```
+
+### Use Number/Use Int64
+
+ ```go
+import "github.com/bytedance/sonic/decoder"
+
+var input = `1`
+var data interface{}
+
+// default float64
+dc := decoder.NewDecoder(input)
+dc.Decode(&data) // data == float64(1)
+// use json.Number
+dc = decoder.NewDecoder(input)
+dc.UseNumber()
+dc.Decode(&data) // data == json.Number("1")
+// use int64
+dc = decoder.NewDecoder(input)
+dc.UseInt64()
+dc.Decode(&data) // data == int64(1)
+
+root, err := sonic.GetFromString(input)
+// Get json.Number
+jn := root.Number()
+jm := root.InterfaceUseNumber().(json.Number) // jn == jm
+// Get float64
+fn := root.Float64()
+fm := root.Interface().(float64) // jn == jm
+ ```
+
+### Sort Keys
+
+On account of the performance loss from sorting (roughly 10%), sonic doesn't enable this feature by default. If your component depends on it to work (like [zstd](https://github.com/facebook/zstd)), Use it like this:
+
+```go
+import "github.com/bytedance/sonic"
+import "github.com/bytedance/sonic/encoder"
+
+// Binding map only
+m := map[string]interface{}{}
+v, err := encoder.Encode(m, encoder.SortMapKeys)
+
+// Or ast.Node.SortKeys() before marshal
+var root := sonic.Get(JSON)
+err := root.SortKeys()
+```
+
+### Escape HTML
+
+On account of the performance loss (roughly 15%), sonic doesn't enable this feature by default. You can use `encoder.EscapeHTML` option to open this feature (align with `encoding/json.HTMLEscape`).
+
+```go
+import "github.com/bytedance/sonic"
+
+v := map[string]string{"&&":"<>"}
+ret, err := Encode(v, EscapeHTML) // ret == `{"\u0026\u0026":{"X":"\u003c\u003e"}}`
+```
+
+### Compact Format
+
+Sonic encodes primitive objects (struct/map...) as compact-format JSON by default, except marshaling `json.RawMessage` or `json.Marshaler`: sonic ensures validating their output JSON but **DONOT** compacting them for performance concerns. We provide the option `encoder.CompactMarshaler` to add compacting process.
+
+### Print Error
+
+If there invalid syntax in input JSON, sonic will return `decoder.SyntaxError`, which supports pretty-printing of error position
+
+```go
+import "github.com/bytedance/sonic"
+import "github.com/bytedance/sonic/decoder"
+
+var data interface{}
+err := sonic.UnmarshalString("[[[}]]", &data)
+if err != nil {
+    /* One line by default */
+    println(e.Error()) // "Syntax error at index 3: invalid char\n\n\t[[[}]]\n\t...^..\n"
+    /* Pretty print */
+    if e, ok := err.(decoder.SyntaxError); ok {
+        /*Syntax error at index 3: invalid char
+
+            [[[}]]
+            ...^..
+        */
+        print(e.Description())
+    } else if me, ok := err.(*decoder.MismatchTypeError); ok {
+        // decoder.MismatchTypeError is new to Sonic v1.6.0
+        print(me.Description())
+    }
+}
+```
+
+#### Mismatched Types [Sonic v1.6.0]
+
+If there a **mismatch-typed** value for a given key, sonic will report `decoder.MismatchTypeError` (if there are many, report the last one), but still skip wrong the value and keep decoding next JSON.
+
+```go
+import "github.com/bytedance/sonic"
+import "github.com/bytedance/sonic/decoder"
+
+var data = struct{
+    A int
+    B int
+}{}
+err := UnmarshalString(`{"A":"1","B":1}`, &data)
+println(err.Error())    // Mismatch type int with value string "at index 5: mismatched type with value\n\n\t{\"A\":\"1\",\"B\":1}\n\t.....^.........\n"
+fmt.Printf("%+v", data) // {A:0 B:1}
+```
+
+### Ast.Node
+
+Sonic/ast.Node is a completely self-contained AST for JSON. It implements serialization and deserialization both and provides robust APIs for obtaining and modification of generic data.
+
+#### Get/Index
+
+Search partial JSON by given paths, which must be non-negative integer or string, or nil
+
+```go
+import "github.com/bytedance/sonic"
+
+input := []byte(`{"key1":[{},{"key2":{"key3":[1,2,3]}}]}`)
+
+// no path, returns entire json
+root, err := sonic.Get(input)
+raw := root.Raw() // == string(input)
+
+// multiple paths
+root, err := sonic.Get(input, "key1", 1, "key2")
+sub := root.Get("key3").Index(2).Int64() // == 3
+```
+
+**Tip**: since `Index()` uses offset to locate data, which is much faster than scanning like `Get()`, we suggest you use it as much as possible. And sonic also provides another API `IndexOrGet()` to underlying use offset as well as ensure the key is matched.
+
+#### SearchOption
+`Searcher` provides some options for user to meet different needs:
+```go
+opts := ast.SearchOption{ CopyReturn: true ... }
+val, err := sonic.GetWithOptions(JSON, opts, "key")
+```
+- CopyReturn
+Indicate the searcher to copy the result JSON string instead of refer from the input. This can help to reduce memory usage if you cache the results
+- ConcurentRead
+Since `ast.Node` use `Lazy-Load` design, it doesn't support Concurrently-Read by default. If you want to read it concurrently, please specify it.
+- ValidateJSON
+Indicate the searcher to validate the entire JSON. This option is enabled by default, which slow down the search speed a little.
+
+
+#### Set/Unset
+
+Modify the json content by Set()/Unset()
+
+```go
+import "github.com/bytedance/sonic"
+
+// Set
+exist, err := root.Set("key4", NewBool(true)) // exist == false
+alias1 := root.Get("key4")
+println(alias1.Valid()) // true
+alias2 := root.Index(1)
+println(alias1 == alias2) // true
+
+// Unset
+exist, err := root.UnsetByIndex(1) // exist == true
+println(root.Get("key4").Check()) // "value not exist"
+```
+#### Serialize
+
+To encode `ast.Node` as json, use `MarshalJson()` or `json.Marshal()` (MUST pass the node's pointer)
+
+```go
+import (
+    "encoding/json"
+    "github.com/bytedance/sonic"
+)
+
+buf, err := root.MarshalJson()
+println(string(buf))                // {"key1":[{},{"key2":{"key3":[1,2,3]}}]}
+exp, err := json.Marshal(&root)     // WARN: use pointer
+println(string(buf) == string(exp)) // true
+```
+
+#### APIs
+
+- validation: `Check()`, `Error()`, `Valid()`, `Exist()`
+- searching: `Index()`, `Get()`, `IndexPair()`, `IndexOrGet()`, `GetByPath()`
+- go-type casting: `Int64()`, `Float64()`, `String()`, `Number()`, `Bool()`, `Map[UseNumber|UseNode]()`, `Array[UseNumber|UseNode]()`, `Interface[UseNumber|UseNode]()`
+- go-type packing: `NewRaw()`, `NewNumber()`, `NewNull()`, `NewBool()`, `NewString()`, `NewObject()`, `NewArray()`
+- iteration: `Values()`, `Properties()`, `ForEach()`, `SortKeys()`
+- modification: `Set()`, `SetByIndex()`, `Add()`
+
+### Ast.Visitor
+
+Sonic provides an advanced API for fully parsing JSON into non-standard types (neither `struct` not `map[string]interface{}`) without using any intermediate representation (`ast.Node` or `interface{}`). For example, you might have the following types which are like `interface{}` but actually not `interface{}`:
+
+```go
+type UserNode interface {}
+
+// the following types implement the UserNode interface.
+type (
+    UserNull    struct{}
+    UserBool    struct{ Value bool }
+    UserInt64   struct{ Value int64 }
+    UserFloat64 struct{ Value float64 }
+    UserString  struct{ Value string }
+    UserObject  struct{ Value map[string]UserNode }
+    UserArray   struct{ Value []UserNode }
+)
+```
+
+Sonic provides the following API to return **the preorder traversal of a JSON AST**. The `ast.Visitor` is a SAX style interface which is used in some C++ JSON library. You should implement `ast.Visitor` by yourself and pass it to `ast.Preorder()` method. In your visitor you can make your custom types to represent JSON values. There may be an O(n) space container (such as stack) in your visitor to record the object / array hierarchy.
+
+```go
+func Preorder(str string, visitor Visitor, opts *VisitorOptions) error
+
+type Visitor interface {
+    OnNull() error
+    OnBool(v bool) error
+    OnString(v string) error
+    OnInt64(v int64, n json.Number) error
+    OnFloat64(v float64, n json.Number) error
+    OnObjectBegin(capacity int) error
+    OnObjectKey(key string) error
+    OnObjectEnd() error
+    OnArrayBegin(capacity int) error
+    OnArrayEnd() error
+}
+```
+
+See [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go) for detailed usage. We also implement a demo visitor for `UserNode` in [ast/visitor_test.go](https://github.com/bytedance/sonic/blob/main/ast/visitor_test.go).
+
+## Compatibility
+
+Sonic **DOES NOT** ensure to support all environments, due to the difficulty of developing high-performance codes. For developers who use sonic to build their applications in different environments, we have the following suggestions:
+
+- Developing on **Mac M1**: Make sure you have Rosetta 2 installed on your machine, and set `GOARCH=amd64` when building your application. Rosetta 2 can automatically translate x86 binaries to arm64 binaries and run x86 applications on Mac M1.
+- Developing on **Linux arm64**: You can install qemu and use the `qemu-x86_64 -cpu max` command to convert x86 binaries to amr64 binaries for applications built with sonic. The qemu can achieve a similar transfer effect to Rosetta 2 on Mac M1.
+
+For developers who want to use sonic on Linux arm64 without qemu, or those who want to handle JSON strictly consistent with `encoding/json`, we provide some compatible APIs as `sonic.API`
+
+- `ConfigDefault`: the sonic's default config (`EscapeHTML=false`,`SortKeys=false`...) to run on sonic-supporting environment. It will fall back to `encoding/json` with the corresponding config, and some options like `SortKeys=false` will be invalid.
+- `ConfigStd`: the std-compatible config (`EscapeHTML=true`,`SortKeys=true`...) to run on sonic-supporting environment. It will fall back to `encoding/json`.
+- `ConfigFastest`: the fastest config (`NoQuoteTextMarshaler=true`) to run on sonic-supporting environment. It will fall back to `encoding/json` with the corresponding config, and some options will be invalid.
+
+## Tips
+
+### Pretouch
+
+Since Sonic uses [golang-asm](https://github.com/twitchyliquid64/golang-asm) as a JIT assembler, which is NOT very suitable for runtime compiling, first-hit running of a huge schema may cause request-timeout or even process-OOM. For better stability, we advise **using `Pretouch()` for huge-schema or compact-memory applications** before `Marshal()/Unmarshal()`.
+
+```go
+import (
+    "reflect"
+    "github.com/bytedance/sonic"
+    "github.com/bytedance/sonic/option"
+)
+
+func init() {
+    var v HugeStruct
+
+    // For most large types (nesting depth <= option.DefaultMaxInlineDepth)
+    err := sonic.Pretouch(reflect.TypeOf(v))
+
+    // with more CompileOption...
+    err := sonic.Pretouch(reflect.TypeOf(v),
+        // If the type is too deep nesting (nesting depth > option.DefaultMaxInlineDepth),
+        // you can set compile recursive loops in Pretouch for better stability in JIT.
+        option.WithCompileRecursiveDepth(loop),
+        // For a large nested struct, try to set a smaller depth to reduce compiling time.
+        option.WithCompileMaxInlineDepth(depth),
+    )
+}
+```
+
+### Copy string
+
+When decoding **string values without any escaped characters**, sonic references them from the origin JSON buffer instead of mallocing a new buffer to copy. This helps a lot for CPU performance but may leave the whole JSON buffer in memory as long as the decoded objects are being used. In practice, we found the extra memory introduced by referring JSON buffer is usually 20% ~ 80% of decoded objects. Once an application holds these objects for a long time (for example, cache the decoded objects for reusing), its in-use memory on the server may go up. - `Config.CopyString`/`decoder.CopyString()`: We provide the option for `Decode()` / `Unmarshal()` users to choose not to reference the JSON buffer, which may cause a decline in CPU performance to some degree.
+
+- `GetFromStringNoCopy()`: For memory safety, `sonic.Get()` / `sonic.GetFromString()` now copies return JSON. If users want to get json more quickly and not care about memory usage, you can use `GetFromStringNoCopy()` to return a JSON directly referenced from source.
+
+### Pass string or []byte?
+
+For alignment to `encoding/json`, we provide API to pass `[]byte` as an argument, but the string-to-bytes copy is conducted at the same time considering safety, which may lose performance when the origin JSON is huge. Therefore, you can use `UnmarshalString()` and `GetFromString()` to pass a string, as long as your origin data is a string or **nocopy-cast** is safe for your []byte. We also provide API `MarshalString()` for convenient **nocopy-cast** of encoded JSON []byte, which is safe since sonic's output bytes is always duplicated and unique.
+
+### Accelerate `encoding.TextMarshaler`
+
+To ensure data security, sonic.Encoder quotes and escapes string values from `encoding.TextMarshaler` interfaces by default, which may degrade performance much if most of your data is in form of them. We provide `encoder.NoQuoteTextMarshaler` to skip these operations, which means you **MUST** ensure their output string escaped and quoted following [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259).
+
+### Better performance for generic data
+
+In **fully-parsed** scenario, `Unmarshal()` performs better than `Get()`+`Node.Interface()`. But if you only have a part of the schema for specific json, you can combine `Get()` and `Unmarshal()` together:
+
+```go
+import "github.com/bytedance/sonic"
+
+node, err := sonic.GetFromString(_TwitterJson, "statuses", 3, "user")
+var user User // your partial schema...
+err = sonic.UnmarshalString(node.Raw(), &user)
+```
+
+Even if you don't have any schema, use `ast.Node` as the container of generic values instead of `map` or `interface`:
+
+```go
+import "github.com/bytedance/sonic"
+
+root, err := sonic.GetFromString(_TwitterJson)
+user := root.GetByPath("statuses", 3, "user")  // === root.Get("status").Index(3).Get("user")
+err = user.Check()
+
+// err = user.LoadAll() // only call this when you want to use 'user' concurrently...
+go someFunc(user)
+```
+
+Why? Because `ast.Node` stores its children using `array`:
+
+- `Array`'s performance is **much better** than `Map` when Inserting (Deserialize) and Scanning (Serialize) data;
+- **Hashing** (`map[x]`) is not as efficient as **Indexing** (`array[x]`), which `ast.Node` can conduct on **both array and object**;
+- Using `Interface()`/`Map()` means Sonic must parse all the underlying values, while `ast.Node` can parse them **on demand**.
+
+**CAUTION:** `ast.Node` **DOESN'T** ensure concurrent security directly, due to its **lazy-load** design. However, you can call `Node.Load()`/`Node.LoadAll()` to achieve that, which may bring performance reduction while it still works faster than converting to `map` or `interface{}`
+
+### Ast.Node or Ast.Visitor?
+
+For generic data, `ast.Node` should be enough for your needs in most cases.
+
+However, `ast.Node` is designed for partially processing JSON string. It has some special designs such as lazy-load which might not be suitable for directly parsing the whole JSON string like `Unmarshal()`. Although `ast.Node` is better then `map` or `interface{}`, it's also a kind of intermediate representation after all if your final types are customized and you have to convert the above types to your custom types after parsing.
+
+For better performance, in previous case the `ast.Visitor` will be the better choice. It performs JSON decoding like `Unmarshal()` and you can directly use your final types to represents a JSON AST without any intermediate representations.
+
+But `ast.Visitor` is not a very handy API. You might need to write a lot of code to implement your visitor and carefully maintain the tree hierarchy during decoding. Please read the comments in [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go) carefully if you decide to use this API.
+
+### Buffer Size
+Sonic use memory pool in many places like `encoder.Encode`, `ast.Node.MarshalJSON` to improve performace, which may produce more memory usage (in-use) when server's load is high. See [issue 614](https://github.com/bytedance/sonic/issues/614). Therefore, we introduce some options to let user control the behavior of memory pool. See [option](https://pkg.go.dev/github.com/bytedance/sonic@v1.11.9/option#pkg-variables) package.
+
+## Community
+
+Sonic is a subproject of [CloudWeGo](https://www.cloudwego.io/). We are committed to building a cloud native ecosystem.
@@ -0,0 +1,485 @@
+# Sonic
+
+[English](README.md) | 中文
+
+一个速度奇快的 JSON 序列化/反序列化库，由 JIT （即时编译）和 SIMD （单指令流多数据流）加速。
+
+## 依赖
+
+- Go: 1.17~1.23
+- OS: Linux / MacOS / Windows
+- CPU: AMD64 / ARM64（需要 Go1.20 以上）
+
+## 接口
+
+详见 [go.dev](https://pkg.go.dev/github.com/bytedance/sonic)
+
+## 特色
+
+- 运行时对象绑定，无需代码生成
+- 完备的 JSON 操作 API
+- 快，更快，还要更快！
+
+## 基准测试
+
+对于**所有大小**的 json 和**所有使用场景**， **Sonic 表现均为最佳**。
+
+- [中型](https://github.com/bytedance/sonic/blob/main/decoder/testdata_test.go#L19) (13kB, 300+ 键, 6 层)
+
+```powershell
+goversion: 1.17.1
+goos: darwin
+goarch: amd64
+cpu: Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
+BenchmarkEncoder_Generic_Sonic-16                      32393 ns/op         402.40 MB/s       11965 B/op          4 allocs/op
+BenchmarkEncoder_Generic_Sonic_Fast-16                 21668 ns/op         601.57 MB/s       10940 B/op          4 allocs/op
+BenchmarkEncoder_Generic_JsonIter-16                   42168 ns/op         309.12 MB/s       14345 B/op        115 allocs/op
+BenchmarkEncoder_Generic_GoJson-16                     65189 ns/op         199.96 MB/s       23261 B/op         16 allocs/op
+BenchmarkEncoder_Generic_StdLib-16                    106322 ns/op         122.60 MB/s       49136 B/op        789 allocs/op
+BenchmarkEncoder_Binding_Sonic-16                       6269 ns/op        2079.26 MB/s       14173 B/op          4 allocs/op
+BenchmarkEncoder_Binding_Sonic_Fast-16                  5281 ns/op        2468.16 MB/s       12322 B/op          4 allocs/op
+BenchmarkEncoder_Binding_JsonIter-16                   20056 ns/op         649.93 MB/s        9488 B/op          2 allocs/op
+BenchmarkEncoder_Binding_GoJson-16                      8311 ns/op        1568.32 MB/s        9481 B/op          1 allocs/op
+BenchmarkEncoder_Binding_StdLib-16                     16448 ns/op         792.52 MB/s        9479 B/op          1 allocs/op
+BenchmarkEncoder_Parallel_Generic_Sonic-16              6681 ns/op        1950.93 MB/s       12738 B/op          4 allocs/op
+BenchmarkEncoder_Parallel_Generic_Sonic_Fast-16         4179 ns/op        3118.99 MB/s       10757 B/op          4 allocs/op
+BenchmarkEncoder_Parallel_Generic_JsonIter-16           9861 ns/op        1321.84 MB/s       14362 B/op        115 allocs/op
+BenchmarkEncoder_Parallel_Generic_GoJson-16            18850 ns/op         691.52 MB/s       23278 B/op         16 allocs/op
+BenchmarkEncoder_Parallel_Generic_StdLib-16            45902 ns/op         283.97 MB/s       49174 B/op        789 allocs/op
+BenchmarkEncoder_Parallel_Binding_Sonic-16              1480 ns/op        8810.09 MB/s       13049 B/op          4 allocs/op
+BenchmarkEncoder_Parallel_Binding_Sonic_Fast-16         1209 ns/op        10785.23 MB/s      11546 B/op          4 allocs/op
+BenchmarkEncoder_Parallel_Binding_JsonIter-16           6170 ns/op        2112.58 MB/s        9504 B/op          2 allocs/op
+BenchmarkEncoder_Parallel_Binding_GoJson-16             3321 ns/op        3925.52 MB/s        9496 B/op          1 allocs/op
+BenchmarkEncoder_Parallel_Binding_StdLib-16             3739 ns/op        3486.49 MB/s        9480 B/op          1 allocs/op
+
+BenchmarkDecoder_Generic_Sonic-16                      66812 ns/op         195.10 MB/s       57602 B/op        723 allocs/op
+BenchmarkDecoder_Generic_Sonic_Fast-16                 54523 ns/op         239.07 MB/s       49786 B/op        313 allocs/op
+BenchmarkDecoder_Generic_StdLib-16                    124260 ns/op         104.90 MB/s       50869 B/op        772 allocs/op
+BenchmarkDecoder_Generic_JsonIter-16                   91274 ns/op         142.81 MB/s       55782 B/op       1068 allocs/op
+BenchmarkDecoder_Generic_GoJson-16                     88569 ns/op         147.17 MB/s       66367 B/op        973 allocs/op
+BenchmarkDecoder_Binding_Sonic-16                      32557 ns/op         400.38 MB/s       28302 B/op        137 allocs/op
+BenchmarkDecoder_Binding_Sonic_Fast-16                 28649 ns/op         455.00 MB/s       24999 B/op         34 allocs/op
+BenchmarkDecoder_Binding_StdLib-16                    111437 ns/op         116.97 MB/s       10576 B/op        208 allocs/op
+BenchmarkDecoder_Binding_JsonIter-16                   35090 ns/op         371.48 MB/s       14673 B/op        385 allocs/op
+BenchmarkDecoder_Binding_GoJson-16                     28738 ns/op         453.59 MB/s       22039 B/op         49 allocs/op
+BenchmarkDecoder_Parallel_Generic_Sonic-16             12321 ns/op        1057.91 MB/s       57233 B/op        723 allocs/op
+BenchmarkDecoder_Parallel_Generic_Sonic_Fast-16        10644 ns/op        1224.64 MB/s       49362 B/op        313 allocs/op
+BenchmarkDecoder_Parallel_Generic_StdLib-16            57587 ns/op         226.35 MB/s       50874 B/op        772 allocs/op
+BenchmarkDecoder_Parallel_Generic_JsonIter-16          38666 ns/op         337.12 MB/s       55789 B/op       1068 allocs/op
+BenchmarkDecoder_Parallel_Generic_GoJson-16            30259 ns/op         430.79 MB/s       66370 B/op        974 allocs/op
+BenchmarkDecoder_Parallel_Binding_Sonic-16              5965 ns/op        2185.28 MB/s       27747 B/op        137 allocs/op
+BenchmarkDecoder_Parallel_Binding_Sonic_Fast-16         5170 ns/op        2521.31 MB/s       24715 B/op         34 allocs/op
+BenchmarkDecoder_Parallel_Binding_StdLib-16            27582 ns/op         472.58 MB/s       10576 B/op        208 allocs/op
+BenchmarkDecoder_Parallel_Binding_JsonIter-16          13571 ns/op         960.51 MB/s       14685 B/op        385 allocs/op
+BenchmarkDecoder_Parallel_Binding_GoJson-16            10031 ns/op        1299.51 MB/s       22111 B/op         49 allocs/op
+
+BenchmarkGetOne_Sonic-16                                3276 ns/op        3975.78 MB/s          24 B/op          1 allocs/op
+BenchmarkGetOne_Gjson-16                                9431 ns/op        1380.81 MB/s           0 B/op          0 allocs/op
+BenchmarkGetOne_Jsoniter-16                            51178 ns/op         254.46 MB/s       27936 B/op        647 allocs/op
+BenchmarkGetOne_Parallel_Sonic-16                      216.7 ns/op       60098.95 MB/s          24 B/op          1 allocs/op
+BenchmarkGetOne_Parallel_Gjson-16                       1076 ns/op        12098.62 MB/s          0 B/op          0 allocs/op
+BenchmarkGetOne_Parallel_Jsoniter-16                   17741 ns/op         734.06 MB/s       27945 B/op        647 allocs/op
+BenchmarkSetOne_Sonic-16                               9571 ns/op         1360.61 MB/s        1584 B/op         17 allocs/op
+BenchmarkSetOne_Sjson-16                               36456 ns/op         357.22 MB/s       52180 B/op          9 allocs/op
+BenchmarkSetOne_Jsoniter-16                            79475 ns/op         163.86 MB/s       45862 B/op        964 allocs/op
+BenchmarkSetOne_Parallel_Sonic-16                      850.9 ns/op       15305.31 MB/s        1584 B/op         17 allocs/op
+BenchmarkSetOne_Parallel_Sjson-16                      18194 ns/op         715.77 MB/s       52247 B/op          9 allocs/op
+BenchmarkSetOne_Parallel_Jsoniter-16                   33560 ns/op         388.05 MB/s       45892 B/op        964 allocs/op
+BenchmarkLoadNode/LoadAll()-16                         11384 ns/op        1143.93 MB/s        6307 B/op         25 allocs/op
+BenchmarkLoadNode_Parallel/LoadAll()-16                 5493 ns/op        2370.68 MB/s        7145 B/op         25 allocs/op
+BenchmarkLoadNode/Interface()-16                       17722 ns/op         734.85 MB/s       13323 B/op         88 allocs/op
+BenchmarkLoadNode_Parallel/Interface()-16              10330 ns/op        1260.70 MB/s       15178 B/op         88 allocs/op
+```
+
+- [小型](https://github.com/bytedance/sonic/blob/main/testdata/small.go) (400B, 11 个键, 3 层)
+![small benchmarks](./docs/imgs/bench-small.png)
+- [大型](https://github.com/bytedance/sonic/blob/main/testdata/twitter.json) (635kB, 10000+ 个键, 6 层)
+![large benchmarks](./docs/imgs/bench-large.png)
+
+要查看基准测试代码，请参阅 [bench.sh](https://github.com/bytedance/sonic/blob/main/scripts/bench.sh) 。
+
+## 工作原理
+
+请参阅 [INTRODUCTION_ZH_CN.md](./docs/INTRODUCTION_ZH_CN.md).
+
+## 使用方式
+
+### 序列化/反序列化
+
+默认的行为基本上与 `encoding/json` 相一致，除了 HTML 转义形式（参见 [Escape HTML](https://github.com/bytedance/sonic/blob/main/README.md#escape-html)) 和 `SortKeys` 功能（参见 [Sort Keys](https://github.com/bytedance/sonic/blob/main/README.md#sort-keys)）**没有**遵循 [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259) 。
+
+ ```go
+import "github.com/bytedance/sonic"
+
+var data YourSchema
+// Marshal
+output, err := sonic.Marshal(&data)
+// Unmarshal
+err := sonic.Unmarshal(output, &data)
+ ```
+
+### 流式输入输出
+
+Sonic 支持解码 `io.Reader` 中输入的 json，或将对象编码为 json 后输出至 `io.Writer`，以处理多个值并减少内存消耗。
+
+- 编码器
+
+```go
+var o1 = map[string]interface{}{
+    "a": "b",
+}
+var o2 = 1
+var w = bytes.NewBuffer(nil)
+var enc = sonic.ConfigDefault.NewEncoder(w)
+enc.Encode(o1)
+enc.Encode(o2)
+fmt.Println(w.String())
+// Output:
+// {"a":"b"}
+// 1
+```
+
+- 解码器
+
+```go
+var o =  map[string]interface{}{}
+var r = strings.NewReader(`{"a":"b"}{"1":"2"}`)
+var dec = sonic.ConfigDefault.NewDecoder(r)
+dec.Decode(&o)
+dec.Decode(&o)
+fmt.Printf("%+v", o)
+// Output:
+// map[1:2 a:b]
+```
+
+### 使用 `Number` / `int64`
+
+```go
+import "github.com/bytedance/sonic/decoder"
+
+var input = `1`
+var data interface{}
+
+// default float64
+dc := decoder.NewDecoder(input)
+dc.Decode(&data) // data == float64(1)
+// use json.Number
+dc = decoder.NewDecoder(input)
+dc.UseNumber()
+dc.Decode(&data) // data == json.Number("1")
+// use int64
+dc = decoder.NewDecoder(input)
+dc.UseInt64()
+dc.Decode(&data) // data == int64(1)
+
+root, err := sonic.GetFromString(input)
+// Get json.Number
+jn := root.Number()
+jm := root.InterfaceUseNumber().(json.Number) // jn == jm
+// Get float64
+fn := root.Float64()
+fm := root.Interface().(float64) // jn == jm
+ ```
+
+### 对键排序
+
+考虑到排序带来的性能损失（约 10% ）， sonic 默认不会启用这个功能。如果你的组件依赖这个行为（如 [zstd](https://github.com/facebook/zstd)) ，可以仿照下面的例子：
+
+```go
+import "github.com/bytedance/sonic"
+import "github.com/bytedance/sonic/encoder"
+
+// Binding map only
+m := map[string]interface{}{}
+v, err := encoder.Encode(m, encoder.SortMapKeys)
+
+// Or ast.Node.SortKeys() before marshal
+var root := sonic.Get(JSON)
+err := root.SortKeys()
+```
+
+### HTML 转义
+
+考虑到性能损失（约15%）， sonic 默认不会启用这个功能。你可以使用 `encoder.EscapeHTML` 选项来开启（与 `encoding/json.HTMLEscape` 行为一致）。
+
+```go
+import "github.com/bytedance/sonic"
+
+v := map[string]string{"&&":"<>"}
+ret, err := Encode(v, EscapeHTML) // ret == `{"\u0026\u0026":{"X":"\u003c\u003e"}}`
+```
+
+### 紧凑格式
+
+Sonic 默认将基本类型（ `struct` ， `map` 等）编码为紧凑格式的 JSON ，除非使用 `json.RawMessage` or `json.Marshaler` 进行编码： sonic 确保输出的 JSON 合法，但出于性能考虑，**不会**加工成紧凑格式。我们提供选项 `encoder.CompactMarshaler` 来添加此过程，
+
+### 打印错误
+
+如果输入的 JSON 存在无效的语法，sonic 将返回 `decoder.SyntaxError`，该错误支持错误位置的美化输出。
+
+```go
+import "github.com/bytedance/sonic"
+import "github.com/bytedance/sonic/decoder"
+
+var data interface{}
+err := sonic.UnmarshalString("[[[}]]", &data)
+if err != nil {
+    /* One line by default */
+    println(e.Error()) // "Syntax error at index 3: invalid char\n\n\t[[[}]]\n\t...^..\n"
+    /* Pretty print */
+    if e, ok := err.(decoder.SyntaxError); ok {
+        /*Syntax error at index 3: invalid char
+
+            [[[}]]
+            ...^..
+        */
+        print(e.Description())
+    } else if me, ok := err.(*decoder.MismatchTypeError); ok {
+        // decoder.MismatchTypeError is new to Sonic v1.6.0
+        print(me.Description())
+    }
+}
+```
+
+#### 类型不匹配 [Sonic v1.6.0]
+
+如果给定键中存在**类型不匹配**的值， sonic 会抛出 `decoder.MismatchTypeError` （如果有多个，只会报告最后一个），但仍会跳过错误的值并解码下一个 JSON 。
+
+```go
+import "github.com/bytedance/sonic"
+import "github.com/bytedance/sonic/decoder"
+
+var data = struct{
+    A int
+    B int
+}{}
+err := UnmarshalString(`{"A":"1","B":1}`, &data)
+println(err.Error())    // Mismatch type int with value string "at index 5: mismatched type with value\n\n\t{\"A\":\"1\",\"B\":1}\n\t.....^.........\n"
+fmt.Printf("%+v", data) // {A:0 B:1}
+```
+
+### `Ast.Node`
+
+Sonic/ast.Node 是完全独立的 JSON 抽象语法树库。它实现了序列化和反序列化，并提供了获取和修改JSON数据的鲁棒的 API。
+
+#### 查找/索引
+
+通过给定的路径搜索 JSON 片段，路径必须为非负整数，字符串或 `nil` 。
+
+```go
+import "github.com/bytedance/sonic"
+
+input := []byte(`{"key1":[{},{"key2":{"key3":[1,2,3]}}]}`)
+
+// no path, returns entire json
+root, err := sonic.Get(input)
+raw := root.Raw() // == string(input)
+
+// multiple paths
+root, err := sonic.Get(input, "key1", 1, "key2")
+sub := root.Get("key3").Index(2).Int64() // == 3
+```
+
+**注意**：由于 `Index()` 使用偏移量来定位数据，比使用扫描的 `Get()` 要快的多，建议尽可能的使用 `Index` 。 Sonic 也提供了另一个 API， `IndexOrGet()` ，以偏移量为基础并且也确保键的匹配。
+
+#### 查找选项
+`ast.Searcher`提供了一些选项，以满足用户的不同需求:
+```
+opts:= ast.SearchOption{CopyReturn: true…}
+Val, err:= sonic。gettwithoptions (JSON, opts， "key")
+```
+- CopyReturn
+指示搜索器复制结果JSON字符串，而不是从输入引用。如果用户缓存结果，这有助于减少内存使用
+- ConcurentRead
+因为`ast.Node`使用`Lazy-Load`设计，默认不支持并发读取。如果您想同时读取，请指定它。
+- ValidateJSON
+指示搜索器来验证整个JSON。默认情况下启用该选项, 但是对于查找速度有一定影响。
+
+#### 修改
+
+使用 `Set()` / `Unset()` 修改 json 的内容
+
+```go
+import "github.com/bytedance/sonic"
+
+// Set
+exist, err := root.Set("key4", NewBool(true)) // exist == false
+alias1 := root.Get("key4")
+println(alias1.Valid()) // true
+alias2 := root.Index(1)
+println(alias1 == alias2) // true
+
+// Unset
+exist, err := root.UnsetByIndex(1) // exist == true
+println(root.Get("key4").Check()) // "value not exist"
+```
+
+#### 序列化
+
+要将 `ast.Node` 编码为 json ，使用 `MarshalJson()` 或者 `json.Marshal()` （必须传递指向节点的指针）
+
+```go
+import (
+    "encoding/json"
+    "github.com/bytedance/sonic"
+)
+
+buf, err := root.MarshalJson()
+println(string(buf))                // {"key1":[{},{"key2":{"key3":[1,2,3]}}]}
+exp, err := json.Marshal(&root)     // WARN: use pointer
+println(string(buf) == string(exp)) // true
+```
+
+#### APIs
+
+- 合法性检查： `Check()`, `Error()`, `Valid()`, `Exist()`
+- 索引： `Index()`, `Get()`, `IndexPair()`, `IndexOrGet()`, `GetByPath()`
+- 转换至 go 内置类型： `Int64()`, `Float64()`, `String()`, `Number()`, `Bool()`, `Map[UseNumber|UseNode]()`, `Array[UseNumber|UseNode]()`, `Interface[UseNumber|UseNode]()`
+- go 类型打包： `NewRaw()`, `NewNumber()`, `NewNull()`, `NewBool()`, `NewString()`, `NewObject()`, `NewArray()`
+- 迭代： `Values()`, `Properties()`, `ForEach()`, `SortKeys()`
+- 修改： `Set()`, `SetByIndex()`, `Add()`
+
+### `Ast.Visitor`
+
+Sonic 提供了一个高级的 API 用于直接全量解析 JSON 到非标准容器里 (既不是 `struct` 也不是 `map[string]interface{}`) 且不需要借助任何中间表示 (`ast.Node` 或 `interface{}`)。举个例子，你可能定义了下述的类型，它们看起来像 `interface{}`，但实际上并不是：
+
+```go
+type UserNode interface {}
+
+// the following types implement the UserNode interface.
+type (
+    UserNull    struct{}
+    UserBool    struct{ Value bool }
+    UserInt64   struct{ Value int64 }
+    UserFloat64 struct{ Value float64 }
+    UserString  struct{ Value string }
+    UserObject  struct{ Value map[string]UserNode }
+    UserArray   struct{ Value []UserNode }
+)
+```
+
+Sonic 提供了下述的 API 来返回 **“对 JSON AST 的前序遍历”**。`ast.Visitor` 是一个 SAX 风格的接口，这在某些 C++ 的 JSON 解析库中被使用到。你需要自己实现一个 `ast.Visitor`，将它传递给 `ast.Preorder()` 方法。在你的实现中你可以使用自定义的类型来表示 JSON 的值。在你的 `ast.Visitor` 中，可能需要有一个 O(n) 空间复杂度的容器（比如说栈）来记录 object / array 的层级。
+
+```go
+func Preorder(str string, visitor Visitor, opts *VisitorOptions) error
+
+type Visitor interface {
+    OnNull() error
+    OnBool(v bool) error
+    OnString(v string) error
+    OnInt64(v int64, n json.Number) error
+    OnFloat64(v float64, n json.Number) error
+    OnObjectBegin(capacity int) error
+    OnObjectKey(key string) error
+    OnObjectEnd() error
+    OnArrayBegin(capacity int) error
+    OnArrayEnd() error
+}
+```
+
+详细用法参看 [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go)，我们还为 `UserNode` 实现了一个示例 `ast.Visitor`，你可以在 [ast/visitor_test.go](https://github.com/bytedance/sonic/blob/main/ast/visitor_test.go) 中找到它。
+
+## 兼容性
+
+由于开发高性能代码的困难性， Sonic **不**保证对所有环境的支持。对于在不同环境中使用 Sonic 构建应用程序的开发者，我们有以下建议：
+
+- 在 **Mac M1** 上开发：确保在您的计算机上安装了 Rosetta 2，并在构建时设置 `GOARCH=amd64` 。 Rosetta 2 可以自动将 x86 二进制文件转换为 arm64 二进制文件，并在 Mac M1 上运行 x86 应用程序。
+- 在 **Linux arm64** 上开发：您可以安装 qemu 并使用 `qemu-x86_64 -cpu max` 命令来将 x86 二进制文件转换为 arm64 二进制文件。qemu可以实现与Mac M1上的Rosetta 2类似的转换效果。
+
+对于希望在不使用 qemu 下使用 sonic 的开发者，或者希望处理 JSON 时与 `encoding/JSON` 严格保持一致的开发者，我们在 `sonic.API` 中提供了一些兼容性 API
+
+- `ConfigDefault`: 在支持 sonic 的环境下 sonic 的默认配置（`EscapeHTML=false`，`SortKeys=false`等）。行为与具有相应配置的 `encoding/json` 一致，一些选项，如 `SortKeys=false` 将无效。
+- `ConfigStd`: 在支持 sonic 的环境下与标准库兼容的配置（`EscapeHTML=true`，`SortKeys=true`等）。行为与 `encoding/json` 一致。
+- `ConfigFastest`: 在支持 sonic 的环境下运行最快的配置（`NoQuoteTextMarshaler=true`）。行为与具有相应配置的 `encoding/json` 一致，某些选项将无效。
+
+## 注意事项
+
+### 预热
+
+由于 Sonic 使用 [golang-asm](https://github.com/twitchyliquid64/golang-asm) 作为 JIT 汇编器，这个库并不适用于运行时编译，第一次运行一个大型模式可能会导致请求超时甚至进程内存溢出。为了更好地稳定性，我们建议在运行大型模式或在内存有限的应用中，在使用 `Marshal()/Unmarshal()` 前运行 `Pretouch()`。
+
+```go
+import (
+    "reflect"
+    "github.com/bytedance/sonic"
+    "github.com/bytedance/sonic/option"
+)
+
+func init() {
+    var v HugeStruct
+
+    // For most large types (nesting depth <= option.DefaultMaxInlineDepth)
+    err := sonic.Pretouch(reflect.TypeOf(v))
+
+    // with more CompileOption...
+    err := sonic.Pretouch(reflect.TypeOf(v),
+        // If the type is too deep nesting (nesting depth > option.DefaultMaxInlineDepth),
+        // you can set compile recursive loops in Pretouch for better stability in JIT.
+        option.WithCompileRecursiveDepth(loop),
+        // For a large nested struct, try to set a smaller depth to reduce compiling time.
+        option.WithCompileMaxInlineDepth(depth),
+    )
+}
+```
+
+### 拷贝字符串
+
+当解码 **没有转义字符的字符串**时， sonic 会从原始的 JSON 缓冲区内引用而不是复制到新的一个缓冲区中。这对 CPU 的性能方面很有帮助，但是可能因此在解码后对象仍在使用的时候将整个 JSON 缓冲区保留在内存中。实践中我们发现，通过引用 JSON 缓冲区引入的额外内存通常是解码后对象的 20% 至 80% ，一旦应用长期保留这些对象（如缓存以备重用），服务器所使用的内存可能会增加。我们提供了选项 `decoder.CopyString()` 供用户选择，不引用 JSON 缓冲区。这可能在一定程度上降低 CPU 性能。
+
+### 传递字符串还是字节数组？
+
+为了和 `encoding/json` 保持一致，我们提供了传递 `[]byte` 作为参数的 API ，但考虑到安全性，字符串到字节的复制是同时进行的，这在原始 JSON 非常大时可能会导致性能损失。因此，你可以使用 `UnmarshalString()` 和 `GetFromString()` 来传递字符串，只要你的原始数据是字符串，或**零拷贝类型转换**对于你的字节数组是安全的。我们也提供了 `MarshalString()` 的 API ，以便对编码的 JSON 字节数组进行**零拷贝类型转换**，因为 sonic 输出的字节始终是重复并且唯一的，所以这样是安全的。
+
+### 加速 `encoding.TextMarshaler`
+
+为了保证数据安全性， `sonic.Encoder` 默认会对来自 `encoding.TextMarshaler` 接口的字符串进行引用和转义，如果大部分数据都是这种形式那可能会导致很大的性能损失。我们提供了 `encoder.NoQuoteTextMarshaler` 选项来跳过这些操作，但你**必须**保证他们的输出字符串依照 [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259) 进行了转义和引用。
+
+### 泛型的性能优化
+
+在 **完全解析**的场景下， `Unmarshal()` 表现得比 `Get()`+`Node.Interface()` 更好。但是如果你只有特定 JSON 的部分模式，你可以将 `Get()` 和 `Unmarshal()` 结合使用：
+
+```go
+import "github.com/bytedance/sonic"
+
+node, err := sonic.GetFromString(_TwitterJson, "statuses", 3, "user")
+var user User // your partial schema...
+err = sonic.UnmarshalString(node.Raw(), &user)
+```
+
+甚至如果你没有任何模式，可以用 `ast.Node` 代替 `map` 或 `interface` 作为泛型的容器：
+
+```go
+import "github.com/bytedance/sonic"
+
+root, err := sonic.GetFromString(_TwitterJson)
+user := root.GetByPath("statuses", 3, "user")  // === root.Get("status").Index(3).Get("user")
+err = user.Check()
+
+// err = user.LoadAll() // only call this when you want to use 'user' concurrently...
+go someFunc(user)
+```
+
+为什么？因为 `ast.Node` 使用 `array` 来存储其子节点：
+
+- 在插入（反序列化）和扫描（序列化）数据时，`Array` 的性能比 `Map` **好得多**；
+- **哈希**（`map[x]`）的效率不如**索引**（`array[x]`）高效，而 `ast.Node` 可以在数组和对象上使用索引；
+- 使用 `Interface()` / `Map()` 意味着 sonic 必须解析所有的底层值，而 `ast.Node` 可以**按需解析**它们。
+
+**注意**：由于 `ast.Node` 的惰性加载设计，其**不能**直接保证并发安全性，但你可以调用 `Node.Load()` / `Node.LoadAll()` 来实现并发安全。尽管可能会带来性能损失，但仍比转换成 `map` 或 `interface{}` 更为高效。
+
+### 使用 `ast.Node` 还是 `ast.Visitor`？
+
+对于泛型数据的解析，`ast.Node` 在大多数场景上应该能够满足你的需求。
+
+然而，`ast.Node` 是一种针对部分解析 JSON 而设计的泛型容器，它包含一些特殊设计，比如惰性加载，如果你希望像 `Unmarshal()` 那样直接解析整个 JSON，这些设计可能并不合适。尽管 `ast.Node` 相较于 `map` 或 `interface{}` 来说是更好的一种泛型容器，但它毕竟也是一种中间表示，如果你的最终类型是自定义的，你还得在解析完成后将上述类型转化成你自定义的类型。
+
+在上述场景中，如果想要有更极致的性能，`ast.Visitor` 会是更好的选择。它采用和 `Unmarshal()` 类似的形式解析 JSON，并且你可以直接使用你的最终类型去表示 JSON AST，而不需要经过额外的任何中间表示。
+
+但是，`ast.Visitor` 并不是一个很易用的 API。你可能需要写大量的代码去实现自己的 `ast.Visitor`，并且需要在解析过程中仔细维护树的层级。如果你决定要使用这个 API，请先仔细阅读 [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go) 中的注释。
+
+### 缓冲区大小
+Sonic在许多地方使用内存池，如`encoder.Encode`, `ast.Node.MarshalJSON`等来提高性能，这可能会在服务器负载高时产生更多的内存使用(in-use)。参见[issue 614](https://github.com/bytedance/sonic/issues/614)。因此，我们引入了一些选项来让用户配置内存池的行为。参见[option](https://pkg.go.dev/github.com/bytedance/sonic@v1.11.9/option#pkg-variables)包。
+
+## 社区
+
+Sonic 是 [CloudWeGo](https://www.cloudwego.io/) 下的一个子项目。我们致力于构建云原生生态系统。
@@ -0,0 +1,242 @@
+/*
+ * Copyright 2021 ByteDance Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package sonic
+
+import (
+    `io`
+
+    `github.com/bytedance/sonic/ast`
+    `github.com/bytedance/sonic/internal/rt`
+)
+
+const (
+    // UseStdJSON indicates you are using fallback implementation (encoding/json)
+	UseStdJSON = iota
+    // UseSonicJSON indicates you are using real sonic implementation
+	UseSonicJSON
+)
+
+// APIKind is the kind of API, 0 is std json, 1 is sonic.
+const APIKind = apiKind
+
+// Config is a combination of sonic/encoder.Options and sonic/decoder.Options
+type Config struct {
+    // EscapeHTML indicates encoder to escape all HTML characters 
+    // after serializing into JSON (see https://pkg.go.dev/encoding/json#HTMLEscape).
+    // WARNING: This hurts performance A LOT, USE WITH CARE.
+    EscapeHTML                    bool
+
+    // SortMapKeys indicates encoder that the keys of a map needs to be sorted 
+    // before serializing into JSON.
+    // WARNING: This hurts performance A LOT, USE WITH CARE.
+    SortMapKeys                   bool
+
+    // CompactMarshaler indicates encoder that the output JSON from json.Marshaler 
+    // is always compact and needs no validation 
+    CompactMarshaler              bool
+
+    // NoQuoteTextMarshaler indicates encoder that the output text from encoding.TextMarshaler 
+    // is always escaped string and needs no quoting
+    NoQuoteTextMarshaler          bool
+
+    // NoNullSliceOrMap indicates encoder that all empty Array or Object are encoded as '[]' or '{}',
+    // instead of 'null'
+    NoNullSliceOrMap              bool
+
+    // UseInt64 indicates decoder to unmarshal an integer into an interface{} as an
+    // int64 instead of as a float64.
+    UseInt64                      bool
+
+    // UseNumber indicates decoder to unmarshal a number into an interface{} as a
+    // json.Number instead of as a float64.
+    UseNumber                     bool
+
+    // UseUnicodeErrors indicates decoder to return an error when encounter invalid
+    // UTF-8 escape sequences.
+    UseUnicodeErrors              bool
+
+    // DisallowUnknownFields indicates decoder to return an error when the destination
+    // is a struct and the input contains object keys which do not match any
+    // non-ignored, exported fields in the destination.
+    DisallowUnknownFields         bool
+
+    // CopyString indicates decoder to decode string values by copying instead of referring.
+    CopyString                    bool
+
+    // ValidateString indicates decoder and encoder to valid string values: decoder will return errors 
+    // when unescaped control chars(\u0000-\u001f) in the string value of JSON.
+    ValidateString                bool
+
+    // NoValidateJSONMarshaler indicates that the encoder should not validate the output string
+    // after encoding the JSONMarshaler to JSON.
+    NoValidateJSONMarshaler       bool
+    
+    // NoEncoderNewline indicates that the encoder should not add a newline after every message
+    NoEncoderNewline bool
+
+    // Encode Infinity or Nan float into `null`, instead of returning an error.
+    EncodeNullForInfOrNan bool
+}
+ 
+var (
+    // ConfigDefault is the default config of APIs, aiming at efficiency and safety.
+    ConfigDefault = Config{}.Froze()
+ 
+    // ConfigStd is the standard config of APIs, aiming at being compatible with encoding/json.
+    ConfigStd = Config{
+        EscapeHTML : true,
+        SortMapKeys: true,
+        CompactMarshaler: true,
+        CopyString : true,
+        ValidateString : true,
+    }.Froze()
+ 
+    // ConfigFastest is the fastest config of APIs, aiming at speed.
+    ConfigFastest = Config{
+        NoQuoteTextMarshaler: true,
+        NoValidateJSONMarshaler: true,
+    }.Froze()
+)
+ 
+ 
+// API is a binding of specific config.
+// This interface is inspired by github.com/json-iterator/go,
+// and has same behaviors under equavilent config.
+type API interface {
+    // MarshalToString returns the JSON encoding string of v
+    MarshalToString(v interface{}) (string, error)
+    // Marshal returns the JSON encoding bytes of v.
+    Marshal(v interface{}) ([]byte, error)
+    // MarshalIndent returns the JSON encoding bytes with indent and prefix.
+    MarshalIndent(v interface{}, prefix, indent string) ([]byte, error)
+    // UnmarshalFromString parses the JSON-encoded bytes and stores the result in the value pointed to by v.
+    UnmarshalFromString(str string, v interface{}) error
+    // Unmarshal parses the JSON-encoded string and stores the result in the value pointed to by v.
+    Unmarshal(data []byte, v interface{}) error
+    // NewEncoder create a Encoder holding writer
+    NewEncoder(writer io.Writer) Encoder
+    // NewDecoder create a Decoder holding reader
+    NewDecoder(reader io.Reader) Decoder
+    // Valid validates the JSON-encoded bytes and reports if it is valid
+    Valid(data []byte) bool
+}
+
+// Encoder encodes JSON into io.Writer
+type Encoder interface {
+    // Encode writes the JSON encoding of v to the stream, followed by a newline character.
+    Encode(val interface{}) error
+    // SetEscapeHTML specifies whether problematic HTML characters 
+    // should be escaped inside JSON quoted strings. 
+    // The default behavior NOT ESCAPE 
+    SetEscapeHTML(on bool)
+    // SetIndent instructs the encoder to format each subsequent encoded value 
+    // as if indented by the package-level function Indent(dst, src, prefix, indent).
+    // Calling SetIndent("", "") disables indentation
+    SetIndent(prefix, indent string)
+}
+
+// Decoder decodes JSON from io.Read
+type Decoder interface {
+    // Decode reads the next JSON-encoded value from its input and stores it in the value pointed to by v.
+    Decode(val interface{}) error
+    // Buffered returns a reader of the data remaining in the Decoder's buffer.
+    // The reader is valid until the next call to Decode.
+    Buffered() io.Reader
+    // DisallowUnknownFields causes the Decoder to return an error when the destination is a struct 
+    // and the input contains object keys which do not match any non-ignored, exported fields in the destination.
+    DisallowUnknownFields()
+    // More reports whether there is another element in the current array or object being parsed.
+    More() bool
+    // UseNumber causes the Decoder to unmarshal a number into an interface{} as a Number instead of as a float64.
+    UseNumber()
+}
+
+// Marshal returns the JSON encoding bytes of v.
+func Marshal(val interface{}) ([]byte, error) {
+    return ConfigDefault.Marshal(val)
+}
+
+// MarshalIndent is like Marshal but applies Indent to format the output.
+// Each JSON element in the output will begin on a new line beginning with prefix
+// followed by one or more copies of indent according to the indentation nesting.
+func MarshalIndent(v interface{}, prefix, indent string) ([]byte, error) {
+    return ConfigDefault.MarshalIndent(v, prefix, indent)
+}
+
+// MarshalString returns the JSON encoding string of v.
+func MarshalString(val interface{}) (string, error) {
+    return ConfigDefault.MarshalToString(val)
+}
+
+// Unmarshal parses the JSON-encoded data and stores the result in the value pointed to by v.
+// NOTICE: This API copies given buffer by default,
+// if you want to pass JSON more efficiently, use UnmarshalString instead.
+func Unmarshal(buf []byte, val interface{}) error {
+    return ConfigDefault.Unmarshal(buf, val)
+}
+
+// UnmarshalString is like Unmarshal, except buf is a string.
+func UnmarshalString(buf string, val interface{}) error {
+    return ConfigDefault.UnmarshalFromString(buf, val)
+}
+
+// Get searches and locates the given path from src json,
+// and returns a ast.Node representing the partially json.
+//
+// Each path arg must be integer or string:
+//     - Integer is target index(>=0), means searching current node as array.
+//     - String is target key, means searching current node as object.
+//
+// 
+// Notice: It expects the src json is **Well-formed** and **Immutable** when calling,
+// otherwise it may return unexpected result. 
+// Considering memory safety, the returned JSON is **Copied** from the input
+func Get(src []byte, path ...interface{}) (ast.Node, error) {
+    return GetCopyFromString(rt.Mem2Str(src), path...)
+}
+
+//GetWithOptions searches and locates the given path from src json,
+// with specific options of ast.Searcher
+func GetWithOptions(src []byte, opts ast.SearchOptions, path ...interface{}) (ast.Node, error) {
+    s := ast.NewSearcher(rt.Mem2Str(src))
+    s.SearchOptions = opts
+    return s.GetByPath(path...)
+}
+
+// GetFromString is same with Get except src is string.
+//
+// WARNING: The returned JSON is **Referenced** from the input. 
+// Caching or long-time holding the returned node may cause OOM.
+// If your src is big, consider use GetFromStringCopy().
+func GetFromString(src string, path ...interface{}) (ast.Node, error) {
+    return ast.NewSearcher(src).GetByPath(path...)
+}
+
+// GetCopyFromString is same with Get except src is string
+func GetCopyFromString(src string, path ...interface{}) (ast.Node, error) {
+    return ast.NewSearcher(src).GetByPathCopy(path...)
+}
+
+// Valid reports whether data is a valid JSON encoding.
+func Valid(data []byte) bool {
+    return ConfigDefault.Valid(data)
+}
+
+// Valid reports whether data is a valid JSON encoding.
+func ValidString(data string) bool {
+    return ConfigDefault.Valid(rt.Str2Mem(data))
+}
@@ -0,0 +1,135 @@
+//go:build (amd64 && go1.17 && !go1.24) || (arm64 && go1.20 && !go1.24)
+// +build amd64,go1.17,!go1.24 arm64,go1.20,!go1.24
+
+/*
+ * Copyright 2022 ByteDance Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ast
+
+import (
+    `runtime`
+    `unsafe`
+
+    `github.com/bytedance/sonic/encoder`
+    `github.com/bytedance/sonic/internal/native`
+    `github.com/bytedance/sonic/internal/native/types`
+    `github.com/bytedance/sonic/internal/rt`
+    uq `github.com/bytedance/sonic/unquote`
+    `github.com/bytedance/sonic/utf8`
+)
+
+var typeByte = rt.UnpackEface(byte(0)).Type
+
+//go:nocheckptr
+func quote(buf *[]byte, val string) {
+    *buf = append(*buf, '"')
+    if len(val) == 0 {
+        *buf = append(*buf, '"')
+        return
+    }
+
+    sp := rt.IndexChar(val, 0)
+    nb := len(val)
+    b := (*rt.GoSlice)(unsafe.Pointer(buf))
+
+    // input buffer
+    for nb > 0 {
+        // output buffer
+        dp := unsafe.Pointer(uintptr(b.Ptr) + uintptr(b.Len))
+        dn := b.Cap - b.Len
+        // call native.Quote, dn is byte count it outputs
+        ret := native.Quote(sp, nb, dp, &dn, 0)
+        // update *buf length
+        b.Len += dn
+
+        // no need more output
+        if ret >= 0 {
+            break
+        }
+
+        // double buf size
+        *b = rt.GrowSlice(typeByte, *b, b.Cap*2)
+        // ret is the complement of consumed input
+        ret = ^ret
+        // update input buffer
+        nb -= ret
+        sp = unsafe.Pointer(uintptr(sp) + uintptr(ret))
+    }
+
+    runtime.KeepAlive(buf)
+    runtime.KeepAlive(sp)
+    *buf = append(*buf, '"')
+}
+
+func unquote(src string) (string, types.ParsingError) {
+    return uq.String(src)
+}
+
+func (self *Parser) decodeValue() (val types.JsonState) {
+    sv := (*rt.GoString)(unsafe.Pointer(&self.s))
+    flag := types.F_USE_NUMBER
+    if self.dbuf != nil {
+        flag = 0
+        val.Dbuf = self.dbuf
+        val.Dcap = types.MaxDigitNums
+    }
+    self.p = native.Value(sv.Ptr, sv.Len, self.p, &val, uint64(flag))
+    return
+}
+
+func (self *Parser) skip() (int, types.ParsingError) {
+    fsm := types.NewStateMachine()
+    start := native.SkipOne(&self.s, &self.p, fsm, 0)
+    types.FreeStateMachine(fsm)
+
+    if start < 0 {
+        return self.p, types.ParsingError(-start)
+    }
+    return start, 0
+}
+
+func (self *Node) encodeInterface(buf *[]byte) error {
+    //WARN: NOT compatible with json.Encoder
+    return encoder.EncodeInto(buf, self.packAny(), encoder.NoEncoderNewline)
+}
+
+func (self *Parser) skipFast() (int, types.ParsingError) {
+    start := native.SkipOneFast(&self.s, &self.p)
+    if start < 0 {
+        return self.p, types.ParsingError(-start)
+    }
+    return start, 0
+}
+
+func (self *Parser) getByPath(validate bool, path ...interface{}) (int, types.ParsingError) {
+    var fsm *types.StateMachine
+    if validate {
+        fsm = types.NewStateMachine()
+    }
+    start := native.GetByPath(&self.s, &self.p, &path, fsm)
+    if validate {
+        types.FreeStateMachine(fsm)
+    }
+    runtime.KeepAlive(path)
+    if start < 0 {
+        return self.p, types.ParsingError(-start)
+    }
+    return start, 0
+}
+
+func validate_utf8(str string) bool {
+    return utf8.ValidateString(str)
+}
@@ -0,0 +1,114 @@
+// +build !amd64,!arm64 go1.24 !go1.17 arm64,!go1.20
+
+/*
+* Copyright 2022 ByteDance Inc.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package ast
+
+import (
+    `encoding/json`
+    `unicode/utf8`
+
+    `github.com/bytedance/sonic/internal/native/types`
+    `github.com/bytedance/sonic/internal/rt`
+)
+
+func init() {
+    println("WARNING:(ast) sonic only supports go1.17~1.23, but your environment is not suitable")
+}
+
+func quote(buf *[]byte, val string) {
+    quoteString(buf, val)
+}
+
+// unquote unescapes a internal JSON string (it doesn't count quotas at the begining and end)
+func unquote(src string) (string, types.ParsingError) {
+    sp := rt.IndexChar(src, -1)
+    out, ok := unquoteBytes(rt.BytesFrom(sp, len(src)+2, len(src)+2))
+    if !ok {
+        return "", types.ERR_INVALID_ESCAPE
+    }
+    return rt.Mem2Str(out), 0
+}
+
+
+func (self *Parser) decodeValue() (val types.JsonState) {
+    e, v := decodeValue(self.s, self.p, self.dbuf == nil)
+    if e < 0 {
+        return v
+    }
+    self.p = e
+    return v
+}
+
+func (self *Parser) skip() (int, types.ParsingError) {
+    e, s := skipValue(self.s, self.p)
+    if e < 0 {
+        return self.p, types.ParsingError(-e)
+    }
+    self.p = e
+    return s, 0
+}
+
+func (self *Parser) skipFast() (int, types.ParsingError) {
+    e, s := skipValueFast(self.s, self.p)
+    if e < 0 {
+        return self.p, types.ParsingError(-e)
+    }
+    self.p = e
+    return s, 0
+}
+
+func (self *Node) encodeInterface(buf *[]byte) error {
+    out, err := json.Marshal(self.packAny())
+    if err != nil {
+        return err
+    }
+    *buf = append(*buf, out...)
+    return nil
+}
+
+func (self *Parser) getByPath(validate bool, path ...interface{}) (int, types.ParsingError) {
+    for _, p := range path {
+        if idx, ok := p.(int); ok && idx >= 0 {
+            if err := self.searchIndex(idx); err != 0 {
+                return self.p, err
+            }
+        } else if key, ok := p.(string); ok {
+            if err := self.searchKey(key); err != 0 {
+                return self.p, err
+            }
+        } else {
+            panic("path must be either int(>=0) or string")
+        }
+    }
+
+    var start int
+    var e types.ParsingError
+    if validate {
+        start, e = self.skip()
+    } else {
+        start, e = self.skipFast()
+    }
+    if e != 0 {
+        return self.p, e
+    }
+    return start, 0
+}
+
+func validate_utf8(str string) bool {
+    return utf8.ValidString(str)
+}
@@ -0,0 +1,470 @@
+/**
+ * Copyright 2023 ByteDance Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ast
+
+import (
+	"sort"
+	"unsafe"
+
+	"github.com/bytedance/sonic/internal/caching"
+)
+
+type nodeChunk [_DEFAULT_NODE_CAP]Node
+
+type linkedNodes struct {
+    head   nodeChunk
+    tail   []*nodeChunk
+    size   int
+}
+
+func (self *linkedNodes) Cap() int {
+    if self == nil {
+        return 0
+    }
+    return (len(self.tail)+1)*_DEFAULT_NODE_CAP 
+}
+
+func (self *linkedNodes) Len() int {
+    if self == nil {
+        return 0
+    }
+    return self.size 
+}
+
+func (self *linkedNodes) At(i int) (*Node) {
+    if self == nil {
+        return nil
+    }
+    if i >= 0 && i<self.size && i < _DEFAULT_NODE_CAP {
+        return &self.head[i]
+    } else if i >= _DEFAULT_NODE_CAP && i<self.size  {
+        a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP
+        if a < len(self.tail) {
+            return &self.tail[a][b]
+        }
+    }
+    return nil
+}
+
+func (self *linkedNodes) MoveOne(source int,  target int) {
+    if source == target {
+        return
+    }
+    if source < 0 || source >= self.size || target < 0 || target >= self.size {
+        return
+    }
+    // reserve source
+    n := *self.At(source)
+    if source < target {
+        // move every element (source,target] one step back
+        for i:=source; i<target; i++ {
+            *self.At(i) = *self.At(i+1)
+        } 
+    } else {
+        // move every element [target,source) one step forward
+        for i:=source; i>target; i-- {
+            *self.At(i) = *self.At(i-1)
+        }
+    } 
+    // set target
+    *self.At(target) = n
+}
+
+func (self *linkedNodes) Pop() {
+    if self == nil || self.size == 0 {
+        return
+    }
+    self.Set(self.size-1, Node{})
+    self.size--
+}
+
+func (self *linkedNodes) Push(v Node) {
+    self.Set(self.size, v)
+}
+
+
+func (self *linkedNodes) Set(i int, v Node) {
+    if i < _DEFAULT_NODE_CAP {
+        self.head[i] = v
+        if self.size <= i {
+            self.size = i+1
+        }
+        return
+    }
+    a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP
+    if a < 0 {
+        self.head[b] = v
+    } else {
+        self.growTailLength(a+1)
+        var n = &self.tail[a]
+        if *n == nil {
+            *n = new(nodeChunk)
+        }
+        (*n)[b] = v
+    }
+    if self.size <= i {
+        self.size = i+1
+    }
+}
+
+func (self *linkedNodes) growTailLength(l int) {
+    if l <= len(self.tail) {
+        return
+    }
+    c := cap(self.tail)
+    for c < l {
+        c += 1 + c>>_APPEND_GROW_SHIFT
+    }
+    if c == cap(self.tail) {
+        self.tail = self.tail[:l]
+        return
+    }
+    tmp := make([]*nodeChunk, l, c)
+    copy(tmp, self.tail)
+    self.tail = tmp
+}
+
+func (self *linkedNodes) ToSlice(con []Node) {
+    if len(con) < self.size {
+        return
+    }
+    i := (self.size-1)
+    a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP
+    if a < 0 {
+        copy(con, self.head[:b+1])
+        return
+    } else {
+        copy(con, self.head[:])
+        con = con[_DEFAULT_NODE_CAP:]
+    }
+
+    for i:=0; i<a; i++ {
+        copy(con, self.tail[i][:])
+        con = con[_DEFAULT_NODE_CAP:]
+    }
+    copy(con, self.tail[a][:b+1])
+}
+
+func (self *linkedNodes) FromSlice(con []Node) {
+    self.size = len(con)
+    i := self.size-1
+    a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP
+    if a < 0 {
+        copy(self.head[:b+1], con)
+        return
+    } else {
+        copy(self.head[:], con)
+        con = con[_DEFAULT_NODE_CAP:]
+    }
+
+    if cap(self.tail) <= a {
+        c := (a+1) + (a+1)>>_APPEND_GROW_SHIFT
+        self.tail = make([]*nodeChunk, a+1, c)
+    }
+    self.tail = self.tail[:a+1]
+
+    for i:=0; i<a; i++ {
+        self.tail[i] = new(nodeChunk)
+        copy(self.tail[i][:], con)
+        con = con[_DEFAULT_NODE_CAP:]
+    }
+
+    self.tail[a] = new(nodeChunk)
+    copy(self.tail[a][:b+1], con)
+}
+
+type pairChunk [_DEFAULT_NODE_CAP]Pair
+
+type linkedPairs struct {
+    index map[uint64]int
+    head pairChunk
+    tail []*pairChunk
+    size int
+}
+
+func (self *linkedPairs) BuildIndex() {
+    if self.index == nil {
+        self.index = make(map[uint64]int, self.size)
+    }
+    for i:=0; i<self.size; i++ {
+        p := self.At(i)
+        self.index[p.hash] = i
+    }
+}
+
+func (self *linkedPairs) Cap() int {
+    if self == nil {
+        return 0
+    }
+    return (len(self.tail)+1)*_DEFAULT_NODE_CAP 
+}
+
+func (self *linkedPairs) Len() int {
+    if self == nil {
+        return 0
+    }
+    return self.size 
+}
+
+func (self *linkedPairs) At(i int) *Pair {
+    if self == nil {
+        return nil
+    }
+    if i >= 0 && i < _DEFAULT_NODE_CAP && i<self.size {
+        return &self.head[i]
+    } else if i >= _DEFAULT_NODE_CAP && i<self.size {
+        a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP
+        if a < len(self.tail) {
+            return &self.tail[a][b]
+        }
+    }
+    return nil
+}
+
+func (self *linkedPairs) Push(v Pair) {
+    self.Set(self.size, v)
+}
+
+func (self *linkedPairs) Pop() {
+    if self == nil || self.size == 0 {
+        return
+    }
+    self.Unset(self.size-1)
+    self.size--
+}
+
+func (self *linkedPairs) Unset(i int) {
+    if self.index != nil {
+        p := self.At(i)
+        delete(self.index, p.hash)
+    }
+    self.set(i, Pair{}) 
+}
+
+func (self *linkedPairs) Set(i int, v Pair) {
+    if self.index != nil {
+        h := v.hash
+        self.index[h] = i
+    }
+    self.set(i, v)
+}
+
+func (self *linkedPairs) set(i int, v Pair) {
+    if i < _DEFAULT_NODE_CAP {
+        self.head[i] = v
+        if self.size <= i {
+            self.size = i+1
+        }
+        return
+    }
+    a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP
+    if a < 0 {
+        self.head[b] = v
+    } else {
+        self.growTailLength(a+1)
+        var n = &self.tail[a]
+        if *n == nil {
+            *n = new(pairChunk)
+        }
+        (*n)[b] = v
+    }
+    if self.size <= i {
+        self.size = i+1
+    }
+}
+
+func (self *linkedPairs) growTailLength(l int) {
+    if l <= len(self.tail) {
+        return
+    }
+    c := cap(self.tail)
+    for c < l {
+        c += 1 + c>>_APPEND_GROW_SHIFT
+    }
+    if c == cap(self.tail) {
+        self.tail = self.tail[:l]
+        return
+    }
+    tmp := make([]*pairChunk, l, c)
+    copy(tmp, self.tail)
+    self.tail = tmp
+}
+
+// linear search
+func (self *linkedPairs) Get(key string) (*Pair, int) {
+    if self.index != nil {
+        // fast-path
+        i, ok := self.index[caching.StrHash(key)]
+        if ok {
+            n := self.At(i)
+            if n.Key == key {
+                return n, i
+            }
+            // hash conflicts
+            goto linear_search
+        } else {
+            return nil, -1
+        }
+    }
+linear_search:
+    for i:=0; i<self.size; i++ {
+        if n := self.At(i); n.Key == key {
+            return n, i
+        }
+    }
+    return nil, -1
+}
+
+func (self *linkedPairs) ToSlice(con []Pair) {
+    if len(con) < self.size {
+        return
+    }
+    i := self.size-1
+    a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP
+
+    if a < 0 {
+        copy(con, self.head[:b+1])
+        return
+    } else {
+        copy(con, self.head[:])
+        con = con[_DEFAULT_NODE_CAP:]
+    }
+
+    for i:=0; i<a; i++ {
+        copy(con, self.tail[i][:])
+        con = con[_DEFAULT_NODE_CAP:]
+    }
+    copy(con, self.tail[a][:b+1])
+}
+
+func (self *linkedPairs) ToMap(con map[string]Node) {
+    for i:=0; i<self.size; i++ {
+        n := self.At(i)
+        con[n.Key] = n.Value
+    }
+}
+
+func (self *linkedPairs) copyPairs(to []Pair, from []Pair, l int) {
+    copy(to, from)
+    if self.index != nil {
+        for i:=0; i<l; i++ {
+            // NOTICE: in case of user not pass hash, just cal it
+            h := caching.StrHash(from[i].Key)
+            from[i].hash = h
+            self.index[h] = i
+        }
+    }
+}
+
+func (self *linkedPairs) FromSlice(con []Pair) {
+    self.size = len(con)
+    i := self.size-1
+    a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP
+    if a < 0 {
+        self.copyPairs(self.head[:b+1], con, b+1)
+        return
+    } else {
+        self.copyPairs(self.head[:], con, len(self.head))
+        con = con[_DEFAULT_NODE_CAP:]
+    }
+
+    if cap(self.tail) <= a {
+        c := (a+1) + (a+1)>>_APPEND_GROW_SHIFT
+        self.tail = make([]*pairChunk, a+1, c)
+    }
+    self.tail = self.tail[:a+1]
+
+    for i:=0; i<a; i++ {
+        self.tail[i] = new(pairChunk)
+        self.copyPairs(self.tail[i][:], con, len(self.tail[i]))
+        con = con[_DEFAULT_NODE_CAP:]
+    }
+
+    self.tail[a] = new(pairChunk)
+    self.copyPairs(self.tail[a][:b+1], con, b+1)
+}
+
+func (self *linkedPairs) Less(i, j int) bool {
+    return lessFrom(self.At(i).Key, self.At(j).Key, 0)
+}
+
+func (self *linkedPairs) Swap(i, j int) {
+    a, b := self.At(i), self.At(j)
+    if self.index != nil {
+        self.index[a.hash] = j
+        self.index[b.hash] = i
+    }
+    *a, *b = *b, *a
+}
+
+func (self *linkedPairs) Sort() {
+    sort.Stable(self)
+}
+
+// Compare two strings from the pos d.
+func lessFrom(a, b string, d int) bool {
+    l := len(a)
+    if l > len(b) {
+        l = len(b)
+    }
+    for i := d; i < l; i++ {
+        if a[i] == b[i] {
+            continue
+        }
+        return a[i] < b[i]
+    }
+    return len(a) < len(b)
+}
+
+type parseObjectStack struct {
+    parser Parser
+    v      linkedPairs
+}
+
+type parseArrayStack struct {
+    parser Parser
+    v      linkedNodes
+}
+
+func newLazyArray(p *Parser) Node {
+    s := new(parseArrayStack)
+    s.parser = *p
+    return Node{
+        t: _V_ARRAY_LAZY,
+        p: unsafe.Pointer(s),
+    }
+}
+
+func newLazyObject(p *Parser) Node {
+    s := new(parseObjectStack)
+    s.parser = *p
+    return Node{
+        t: _V_OBJECT_LAZY,
+        p: unsafe.Pointer(s),
+    }
+}
+
+func (self *Node) getParserAndArrayStack() (*Parser, *parseArrayStack) {
+    stack := (*parseArrayStack)(self.p)
+    return &stack.parser, stack
+}
+
+func (self *Node) getParserAndObjectStack() (*Parser, *parseObjectStack) {
+    stack := (*parseObjectStack)(self.p)
+    return &stack.parser, stack
+}
+
--- a/Show More
+++ b/Show More